tesseract  5.0.0-alpha-619-ge9db
tabfind.cpp
Go to the documentation of this file.
1 // File: tabfind.cpp
3 // Description: Subclass of BBGrid to find vertically aligned blobs.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2008, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config_auto.h"
21 #endif
22 
23 #include "tabfind.h"
24 #include "alignedblob.h"
25 #include "colpartitiongrid.h"
26 #include "detlinefit.h"
27 #include "host.h" // for NearlyEqual
28 #include "linefind.h"
29 
30 #include <algorithm>
31 
32 namespace tesseract {
33 
34 // Multiple of box size to search for initial gaps.
35 const int kTabRadiusFactor = 5;
36 // Min and Max multiple of height to search vertically when extrapolating.
37 const int kMinVerticalSearch = 3;
38 const int kMaxVerticalSearch = 12;
39 const int kMaxRaggedSearch = 25;
40 // Minimum number of lines in a column width to make it interesting.
41 const int kMinLinesInColumn = 10;
42 // Minimum width of a column to be interesting.
43 const int kMinColumnWidth = 200;
44 // Minimum fraction of total column lines for a column to be interesting.
45 const double kMinFractionalLinesInColumn = 0.125;
46 // Fraction of height used as alignment tolerance for aligned tabs.
47 const double kAlignedFraction = 0.03125;
48 // Maximum gutter width (in absolute inch) that we care about
49 const double kMaxGutterWidthAbsolute = 2.00;
50 // Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs.
51 const int kRaggedGutterMultiple = 5;
52 // Min aspect ratio of tall objects to be considered a separator line.
53 // (These will be ignored in searching the gutter for obstructions.)
54 const double kLineFragmentAspectRatio = 10.0;
55 // Min number of points to accept after evaluation.
56 const int kMinEvaluatedTabs = 3;
57 // Up to 30 degrees is allowed for rotations of diacritic blobs.
58 // Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
59 // so that the assert there never fails.
60 const double kCosMaxSkewAngle = 0.866025;
61 
62 static BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
63 static BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
64 
65 TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
66  TabVector_LIST* vlines, int vertical_x, int vertical_y,
67  int resolution)
68  : AlignedBlob(gridsize, bleft, tright),
69  resolution_(resolution),
70  image_origin_(0, tright.y() - 1),
71  v_it_(&vectors_) {
72  width_cb_ = nullptr;
73  v_it_.add_list_after(vlines);
74  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
75  using namespace std::placeholders; // for _1
76  width_cb_ = std::bind(&TabFind::CommonWidth, this, _1);
77 }
78 
80 }
81 
83 
84 // Insert a list of blobs into the given grid (not necessarily this).
85 // If take_ownership is true, then the blobs are removed from the source list.
86 // See InsertBlob for the other arguments.
87 // It would seem to make more sense to swap this and grid, but this way
88 // around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
89 // while the grid that provides the tab stops(this) has to be derived from
90 // TabFind.
91 void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread,
92  BLOBNBOX_LIST* blobs,
93  BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
94  BLOBNBOX_C_IT>* grid) {
95  BLOBNBOX_IT blob_it(blobs);
96  int b_count = 0;
97  int reject_count = 0;
98  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
99  BLOBNBOX* blob = blob_it.data();
100 // if (InsertBlob(true, true, blob, grid)) {
101  if (InsertBlob(h_spread, v_spread, blob, grid)) {
102  ++b_count;
103  } else {
104  ++reject_count;
105  }
106  }
107  if (textord_debug_tabfind) {
108  tprintf("Inserted %d blobs into grid, %d rejected.\n",
109  b_count, reject_count);
110  }
111 }
112 
113 // Insert a single blob into the given grid (not necessarily this).
114 // If h_spread, then all cells covered horizontally by the box are
115 // used, otherwise, just the bottom-left. Similarly for v_spread.
116 // A side effect is that the left and right rule edges of the blob are
117 // set according to the tab vectors in this (not grid).
118 bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
119  BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
120  BLOBNBOX_C_IT>* grid) {
121  TBOX box = blob->bounding_box();
122  blob->set_left_rule(LeftEdgeForBox(box, false, false));
123  blob->set_right_rule(RightEdgeForBox(box, false, false));
124  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
125  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
126  if (blob->joined_to_prev())
127  return false;
128  grid->InsertBBox(h_spread, v_spread, blob);
129  return true;
130 }
131 
132 // Calls SetBlobRuleEdges for all the blobs in the given block.
134  SetBlobRuleEdges(&block->blobs);
135  SetBlobRuleEdges(&block->small_blobs);
136  SetBlobRuleEdges(&block->noise_blobs);
137  SetBlobRuleEdges(&block->large_blobs);
138 }
139 
140 // Sets the left and right rule and crossing_rules for the blobs in the given
141 // list by fiding the next outermost tabvectors for each blob.
142 void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST* blobs) {
143  BLOBNBOX_IT blob_it(blobs);
144  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
145  BLOBNBOX* blob = blob_it.data();
146  TBOX box = blob->bounding_box();
147  blob->set_left_rule(LeftEdgeForBox(box, false, false));
148  blob->set_right_rule(RightEdgeForBox(box, false, false));
149  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
150  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
151  }
152 }
153 
154 // Returns the gutter width of the given TabVector between the given y limits.
155 // Also returns x-shift to be added to the vector to clear any intersecting
156 // blobs. The shift is deducted from the returned gutter.
157 // If ignore_unmergeables is true, then blobs of UnMergeableType are
158 // ignored as if they don't exist. (Used for text on image.)
159 // max_gutter_width is used as the maximum width worth searching for in case
160 // there is nothing near the TabVector.
161 int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v,
162  bool ignore_unmergeables, int max_gutter_width,
163  int* required_shift) {
164  bool right_to_left = v.IsLeftTab();
165  int bottom_x = v.XAtY(bottom_y);
166  int top_x = v.XAtY(top_y);
167  int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
168  BlobGridSearch sidesearch(this);
169  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
170  int min_gap = max_gutter_width;
171  *required_shift = 0;
172  BLOBNBOX* blob = nullptr;
173  while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
174  const TBOX& box = blob->bounding_box();
175  if (box.bottom() >= top_y || box.top() <= bottom_y)
176  continue; // Doesn't overlap enough.
177  if (box.height() >= gridsize() * 2 &&
178  box.height() > box.width() * kLineFragmentAspectRatio) {
179  // Skip likely separator line residue.
180  continue;
181  }
182  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
183  continue; // Skip non-text if required.
184  int mid_y = (box.bottom() + box.top()) / 2;
185  // We use the x at the mid-y so that the required_shift guarantees
186  // to clear all the blobs on the tab-stop. If we use the min/max
187  // of x at top/bottom of the blob, then exactness would be required,
188  // which is not a good thing.
189  int tab_x = v.XAtY(mid_y);
190  int gap;
191  if (right_to_left) {
192  gap = tab_x - box.right();
193  if (gap < 0 && box.left() - tab_x < *required_shift)
194  *required_shift = box.left() - tab_x;
195  } else {
196  gap = box.left() - tab_x;
197  if (gap < 0 && box.right() - tab_x > *required_shift)
198  *required_shift = box.right() - tab_x;
199  }
200  if (gap > 0 && gap < min_gap)
201  min_gap = gap;
202  }
203  // Result may be negative, in which case, this is a really bad tabstop.
204  return min_gap - abs(*required_shift);
205 }
206 
207 // Find the gutter width and distance to inner neighbour for the given blob.
208 void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
209  int max_gutter, bool left,
210  BLOBNBOX* bbox, int* gutter_width,
211  int* neighbour_gap) {
212  const TBOX& box = bbox->bounding_box();
213  // The gutter and internal sides of the box.
214  int gutter_x = left ? box.left() : box.right();
215  int internal_x = left ? box.right() : box.left();
216  // On ragged edges, the gutter side of the box is away from the tabstop.
217  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
218  *gutter_width = max_gutter;
219  // If the box is away from the tabstop, we need to increase
220  // the allowed gutter width.
221  if (tab_gap > 0)
222  *gutter_width += tab_gap;
223  bool debug = WithinTestRegion(2, box.left(), box.bottom());
224  if (debug)
225  tprintf("Looking in gutter\n");
226  // Find the nearest blob on the outside of the column.
227  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
228  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
229  *gutter_width, box.top(), box.bottom());
230  if (gutter_bbox != nullptr) {
231  const TBOX& gutter_box = gutter_bbox->bounding_box();
232  *gutter_width = left ? tab_x - gutter_box.right()
233  : gutter_box.left() - tab_x;
234  }
235  if (*gutter_width >= max_gutter) {
236  // If there is no box because a tab was in the way, get the tab coord.
237  TBOX gutter_box(box);
238  if (left) {
239  gutter_box.set_left(tab_x - max_gutter - 1);
240  gutter_box.set_right(tab_x - max_gutter);
241  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
242  if (tab_gutter < tab_x - 1)
243  *gutter_width = tab_x - tab_gutter;
244  } else {
245  gutter_box.set_left(tab_x + max_gutter);
246  gutter_box.set_right(tab_x + max_gutter + 1);
247  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
248  if (tab_gutter > tab_x + 1)
249  *gutter_width = tab_gutter - tab_x;
250  }
251  }
252  if (*gutter_width > max_gutter)
253  *gutter_width = max_gutter;
254  // Now look for a neighbour on the inside.
255  if (debug)
256  tprintf("Looking for neighbour\n");
257  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
258  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
259  *gutter_width, box.top(), box.bottom());
260  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
261  : LeftEdgeForBox(box, true, false);
262  if (neighbour != nullptr) {
263  const TBOX& n_box = neighbour->bounding_box();
264  if (debug) {
265  tprintf("Found neighbour:");
266  n_box.print();
267  }
268  if (left && n_box.left() < neighbour_edge)
269  neighbour_edge = n_box.left();
270  else if (!left && n_box.right() > neighbour_edge)
271  neighbour_edge = n_box.right();
272  }
273  *neighbour_gap = left ? neighbour_edge - internal_x
274  : internal_x - neighbour_edge;
275 }
276 
277 // Return the x-coord that corresponds to the right edge for the given
278 // box. If there is a rule line to the right that vertically overlaps it,
279 // then return the x-coord of the rule line, otherwise return the right
280 // edge of the page. For details see RightTabForBox below.
281 int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) {
282  TabVector* v = RightTabForBox(box, crossing, extended);
283  return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
284 }
285 // As RightEdgeForBox, but finds the left Edge instead.
286 int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) {
287  TabVector* v = LeftTabForBox(box, crossing, extended);
288  return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
289 }
290 
291 // This comment documents how this function works.
292 // For its purpose and arguments, see the comment in tabfind.h.
293 // TabVectors are stored sorted by perpendicular distance of middle from
294 // the global mean vertical vector. Since the individual vectors can have
295 // differing directions, their XAtY for a given y is not necessarily in the
296 // right order. Therefore the search has to be run with a margin.
297 // The middle of a vector that passes through (x,y) cannot be higher than
298 // halfway from y to the top, or lower than halfway from y to the bottom
299 // of the coordinate range; therefore, the search margin is the range of
300 // sort keys between these halfway points. Any vector with a sort key greater
301 // than the upper margin must be to the right of x at y, and likewise any
302 // vector with a sort key less than the lower margin must pass to the left
303 // of x at y.
304 TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing,
305  bool extended) {
306  if (v_it_.empty())
307  return nullptr;
308  int top_y = box.top();
309  int bottom_y = box.bottom();
310  int mid_y = (top_y + bottom_y) / 2;
311  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
312  int min_key, max_key;
313  SetupTabSearch(right, mid_y, &min_key, &max_key);
314  // Position the iterator at the first TabVector with sort_key >= min_key.
315  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
316  v_it_.backward();
317  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
318  v_it_.forward();
319  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
320  TabVector* best_v = nullptr;
321  int best_x = -1;
322  int key_limit = -1;
323  do {
324  TabVector* v = v_it_.data();
325  int x = v->XAtY(mid_y);
326  if (x >= right &&
327  (v->VOverlap(top_y, bottom_y) > 0 ||
328  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
329  if (best_v == nullptr || x < best_x) {
330  best_v = v;
331  best_x = x;
332  // We can guarantee that no better vector can be found if the
333  // sort key exceeds that of the best by max_key - min_key.
334  key_limit = v->sort_key() + max_key - min_key;
335  }
336  }
337  // Break when the search is done to avoid wrapping the iterator and
338  // thereby potentially slowing the next search.
339  if (v_it_.at_last() ||
340  (best_v != nullptr && v->sort_key() > key_limit))
341  break; // Prevent restarting list for next call.
342  v_it_.forward();
343  } while (!v_it_.at_first());
344  return best_v;
345 }
346 
347 // As RightTabForBox, but finds the left TabVector instead.
348 TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing,
349  bool extended) {
350  if (v_it_.empty())
351  return nullptr;
352  int top_y = box.top();
353  int bottom_y = box.bottom();
354  int mid_y = (top_y + bottom_y) / 2;
355  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
356  int min_key, max_key;
357  SetupTabSearch(left, mid_y, &min_key, &max_key);
358  // Position the iterator at the last TabVector with sort_key <= max_key.
359  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
360  v_it_.forward();
361  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
362  v_it_.backward();
363  }
364  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
365  TabVector* best_v = nullptr;
366  int best_x = -1;
367  int key_limit = -1;
368  do {
369  TabVector* v = v_it_.data();
370  int x = v->XAtY(mid_y);
371  if (x <= left &&
372  (v->VOverlap(top_y, bottom_y) > 0 ||
373  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
374  if (best_v == nullptr || x > best_x) {
375  best_v = v;
376  best_x = x;
377  // We can guarantee that no better vector can be found if the
378  // sort key is less than that of the best by max_key - min_key.
379  key_limit = v->sort_key() - (max_key - min_key);
380  }
381  }
382  // Break when the search is done to avoid wrapping the iterator and
383  // thereby potentially slowing the next search.
384  if (v_it_.at_first() ||
385  (best_v != nullptr && v->sort_key() < key_limit))
386  break; // Prevent restarting list for next call.
387  v_it_.backward();
388  } while (!v_it_.at_last());
389  return best_v;
390 }
391 
392 // Return true if the given width is close to one of the common
393 // widths in column_widths_.
394 bool TabFind::CommonWidth(int width) {
395  width /= kColumnWidthFactor;
396  ICOORDELT_IT it(&column_widths_);
397  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
398  ICOORDELT* w = it.data();
399  if (w->x() - 1 <= width && width <= w->y() + 1)
400  return true;
401  }
402  return false;
403 }
404 
405 // Return true if the sizes are more than a
406 // factor of 2 different.
407 bool TabFind::DifferentSizes(int size1, int size2) {
408  return size1 > size2 * 2 || size2 > size1 * 2;
409 }
410 
411 // Return true if the sizes are more than a
412 // factor of 5 different.
413 bool TabFind::VeryDifferentSizes(int size1, int size2) {
414  return size1 > size2 * 5 || size2 > size1 * 5;
415 }
416 
418 
419 // Top-level function to find TabVectors in an input page block.
420 // Returns false if the detected skew angle is impossible.
421 // Applies the detected skew angle to deskew the tabs, blobs and part_grid.
422 bool TabFind::FindTabVectors(TabVector_LIST* hlines,
423  BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
424  int min_gutter_width,
425  double tabfind_aligned_gap_fraction,
426  ColPartitionGrid* part_grid,
427  FCOORD* deskew, FCOORD* reskew) {
428  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
429  tabfind_aligned_gap_fraction,
430  block);
431  ComputeColumnWidths(tab_win, part_grid);
433  SortVectors();
434  CleanupTabs();
435  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
436  return false; // Skew angle is too large.
437  part_grid->Deskew(*deskew);
438  ApplyTabConstraints();
439  #ifndef GRAPHICS_DISABLED
440  if (textord_tabfind_show_finaltabs) {
441  tab_win = MakeWindow(640, 50, "FinalTabs");
442  DisplayBoxes(tab_win);
443  DisplayTabs("FinalTabs", tab_win);
444  tab_win = DisplayTabVectors(tab_win);
445  }
446  #endif // GRAPHICS_DISABLED
447  return true;
448 }
449 
450 // Top-level function to not find TabVectors in an input page block,
451 // but setup for single column mode.
452 void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
453  FCOORD* deskew, FCOORD* reskew) {
454  InsertBlobsToGrid(false, false, image_blobs, this);
455  InsertBlobsToGrid(true, false, &block->blobs, this);
456  deskew->set_x(1.0f);
457  deskew->set_y(0.0f);
458  reskew->set_x(1.0f);
459  reskew->set_y(0.0f);
460 }
461 
462 // Cleans up the lists of blobs in the block ready for use by TabFind.
463 // Large blobs that look like text are moved to the main blobs list.
464 // Main blobs that are superseded by the image blobs are deleted.
466  BLOBNBOX_IT large_it = &block->large_blobs;
467  BLOBNBOX_IT blob_it = &block->blobs;
468  int b_count = 0;
469  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
470  BLOBNBOX* large_blob = large_it.data();
471  if (large_blob->owner() != nullptr) {
472  blob_it.add_to_end(large_it.extract());
473  ++b_count;
474  }
475  }
476  if (textord_debug_tabfind) {
477  tprintf("Moved %d large blobs to normal list\n",
478  b_count);
479  #ifndef GRAPHICS_DISABLED
480  ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
481  block->plot_graded_blobs(rej_win);
482  block->plot_noise_blobs(rej_win);
483  rej_win->Update();
484  #endif // GRAPHICS_DISABLED
485  }
486  block->DeleteUnownedNoise();
487 }
488 
489 // Helper function to setup search limits for *TabForBox.
490 void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) {
491  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
492  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
493  *min_key = std::min(key1, key2);
494  *max_key = std::max(key1, key2);
495 }
496 
498 #ifndef GRAPHICS_DISABLED
499  // For every vector, display it.
500  TabVector_IT it(&vectors_);
501  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
502  TabVector* vector = it.data();
503  vector->Display(tab_win);
504  }
505  tab_win->Update();
506 #endif
507  return tab_win;
508 }
509 
510 // PRIVATE CODE.
511 //
512 // First part of FindTabVectors, which may be used twice if the text
513 // is mostly of vertical alignment.
514 ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
515  int min_gutter_width,
516  double tabfind_aligned_gap_fraction,
517  TO_BLOCK* block) {
518  if (textord_tabfind_show_initialtabs) {
519  ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
520  line_win = DisplayTabVectors(line_win);
521  }
522  // Prepare the grid.
523  if (image_blobs != nullptr)
524  InsertBlobsToGrid(true, false, image_blobs, this);
525  InsertBlobsToGrid(true, false, &block->blobs, this);
526  ScrollView* initial_win = FindTabBoxes(min_gutter_width,
527  tabfind_aligned_gap_fraction);
528  FindAllTabVectors(min_gutter_width);
529 
531  SortVectors();
532  EvaluateTabs();
533  if (textord_tabfind_show_initialtabs && initial_win != nullptr)
534  initial_win = DisplayTabVectors(initial_win);
535  MarkVerticalText();
536  return initial_win;
537 }
538 
539 // Helper displays all the boxes in the given vector on the given window.
540 static void DisplayBoxVector(const GenericVector<BLOBNBOX*>& boxes,
541  ScrollView* win) {
542  #ifndef GRAPHICS_DISABLED
543  for (int i = 0; i < boxes.size(); ++i) {
544  TBOX box = boxes[i]->bounding_box();
545  int left_x = box.left();
546  int right_x = box.right();
547  int top_y = box.top();
548  int bottom_y = box.bottom();
549  ScrollView::Color box_color = boxes[i]->BoxColor();
550  win->Pen(box_color);
551  win->Rectangle(left_x, bottom_y, right_x, top_y);
552  }
553  win->Update();
554  #endif // GRAPHICS_DISABLED
555 }
556 
557 // For each box in the grid, decide whether it is a candidate tab-stop,
558 // and if so add it to the left/right tab boxes.
559 ScrollView* TabFind::FindTabBoxes(int min_gutter_width,
560  double tabfind_aligned_gap_fraction) {
561  left_tab_boxes_.clear();
562  right_tab_boxes_.clear();
563  // For every bbox in the grid, determine whether it uses a tab on an edge.
564  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
565  gsearch.StartFullSearch();
566  BLOBNBOX* bbox;
567  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
568  if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
569  // If it is any kind of tab, insert it into the vectors.
570  if (bbox->left_tab_type() != TT_NONE)
571  left_tab_boxes_.push_back(bbox);
572  if (bbox->right_tab_type() != TT_NONE)
573  right_tab_boxes_.push_back(bbox);
574  }
575  }
576  // Sort left tabs by left and right by right to see the outermost one first
577  // on a ragged tab.
578  left_tab_boxes_.sort(SortByBoxLeft<BLOBNBOX>);
579  right_tab_boxes_.sort(SortRightToLeft<BLOBNBOX>);
580  ScrollView* tab_win = nullptr;
581  #ifndef GRAPHICS_DISABLED
582  if (textord_tabfind_show_initialtabs) {
583  tab_win = MakeWindow(0, 100, "InitialTabs");
584  tab_win->Pen(ScrollView::BLUE);
585  tab_win->Brush(ScrollView::NONE);
586  // Display the left and right tab boxes.
587  DisplayBoxVector(left_tab_boxes_, tab_win);
588  DisplayBoxVector(right_tab_boxes_, tab_win);
589  tab_win = DisplayTabs("Tabs", tab_win);
590  }
591  #endif // GRAPHICS_DISABLED
592  return tab_win;
593 }
594 
595 bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
596  double tabfind_aligned_gap_fraction) {
597  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this);
598  TBOX box = bbox->bounding_box();
599  // If there are separator lines, get the column edges.
600  int left_column_edge = bbox->left_rule();
601  int right_column_edge = bbox->right_rule();
602  // The edges of the bounding box of the blob being processed.
603  int left_x = box.left();
604  int right_x = box.right();
605  int top_y = box.top();
606  int bottom_y = box.bottom();
607  int height = box.height();
608  bool debug = WithinTestRegion(3, left_x, top_y);
609  if (debug) {
610  tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
611  left_x, top_y, right_x, bottom_y,
612  left_column_edge, right_column_edge);
613  }
614  // Compute a search radius based on a multiple of the height.
615  int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
616  radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
617  // In Vertical Page mode, once we have an estimate of the vertical line
618  // spacing, the minimum amount of gutter space before a possible tab is
619  // increased under the assumption that column partition is always larger
620  // than line spacing.
621  int min_spacing =
622  static_cast<int>(height * tabfind_aligned_gap_fraction);
623  if (min_gutter_width > min_spacing)
624  min_spacing = min_gutter_width;
625  int min_ragged_gutter = kRaggedGutterMultiple * gridsize();
626  if (min_gutter_width > min_ragged_gutter)
627  min_ragged_gutter = min_gutter_width;
628  int target_right = left_x - min_spacing;
629  int target_left = right_x + min_spacing;
630  // We will be evaluating whether the left edge could be a left tab, and
631  // whether the right edge could be a right tab.
632  // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
633  // that no blobs have been found in the gutter during the radial search.
634  // A box can also be a tab if there are objects in the gutter only above
635  // or only below, and there are aligned objects on the opposite side, but
636  // not too many unaligned objects. The maybe_(left/right)_tab_up counts
637  // aligned objects above and negatively counts unaligned objects above,
638  // and is set to -INT32_MAX if a gutter object is found above.
639  // The other 3 maybe ints work similarly for the other sides.
640  // These conditions are very strict, to minimize false positives, and really
641  // only aligned tabs and outermost ragged tab blobs will qualify, so we
642  // also have maybe_ragged_left/right with less stringent rules.
643  // A blob that is maybe_ragged_left/right will be further qualified later,
644  // using the min_ragged_gutter.
645  bool is_left_tab = true;
646  bool is_right_tab = true;
647  bool maybe_ragged_left = true;
648  bool maybe_ragged_right = true;
649  int maybe_left_tab_up = 0;
650  int maybe_right_tab_up = 0;
651  int maybe_left_tab_down = 0;
652  int maybe_right_tab_down = 0;
653  if (bbox->leader_on_left()) {
654  is_left_tab = false;
655  maybe_ragged_left = false;
656  maybe_left_tab_up = -INT32_MAX;
657  maybe_left_tab_down = -INT32_MAX;
658  }
659  if (bbox->leader_on_right()) {
660  is_right_tab = false;
661  maybe_ragged_right = false;
662  maybe_right_tab_up = -INT32_MAX;
663  maybe_right_tab_down = -INT32_MAX;
664  }
665  int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
666  BLOBNBOX* neighbour = nullptr;
667  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
668  if (neighbour == bbox)
669  continue;
670  TBOX nbox = neighbour->bounding_box();
671  int n_left = nbox.left();
672  int n_right = nbox.right();
673  if (debug)
674  tprintf("Neighbour at (%d,%d)->(%d,%d)\n",
675  n_left, nbox.bottom(), n_right, nbox.top());
676  // If the neighbouring blob is the wrong side of a separator line, then it
677  // "doesn't exist" as far as we are concerned.
678  if (n_right > right_column_edge || n_left < left_column_edge ||
679  left_x < neighbour->left_rule() || right_x > neighbour->right_rule())
680  continue; // Separator line in the way.
681  int n_mid_x = (n_left + n_right) / 2;
682  int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
683  if (n_mid_x <= left_x && n_right >= target_right) {
684  if (debug)
685  tprintf("Not a left tab\n");
686  is_left_tab = false;
687  if (n_mid_y < top_y)
688  maybe_left_tab_down = -INT32_MAX;
689  if (n_mid_y > bottom_y)
690  maybe_left_tab_up = -INT32_MAX;
691  } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
692  if (debug)
693  tprintf("Maybe a left tab\n");
694  if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX)
695  ++maybe_left_tab_up;
696  if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX)
697  ++maybe_left_tab_down;
698  } else if (n_left < left_x && n_right >= left_x) {
699  // Overlaps but not aligned so negative points on a maybe.
700  if (debug)
701  tprintf("Maybe Not a left tab\n");
702  if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX)
703  --maybe_left_tab_up;
704  if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX)
705  --maybe_left_tab_down;
706  }
707  if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
708  maybe_ragged_left = false;
709  if (debug)
710  tprintf("Not a ragged left\n");
711  }
712  if (n_mid_x >= right_x && n_left <= target_left) {
713  if (debug)
714  tprintf("Not a right tab\n");
715  is_right_tab = false;
716  if (n_mid_y < top_y)
717  maybe_right_tab_down = -INT32_MAX;
718  if (n_mid_y > bottom_y)
719  maybe_right_tab_up = -INT32_MAX;
720  } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
721  if (debug)
722  tprintf("Maybe a right tab\n");
723  if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX)
724  ++maybe_right_tab_up;
725  if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX)
726  ++maybe_right_tab_down;
727  } else if (n_right > right_x && n_left <= right_x) {
728  // Overlaps but not aligned so negative points on a maybe.
729  if (debug)
730  tprintf("Maybe Not a right tab\n");
731  if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX)
732  --maybe_right_tab_up;
733  if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX)
734  --maybe_right_tab_down;
735  }
736  if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
737  maybe_ragged_right = false;
738  if (debug)
739  tprintf("Not a ragged right\n");
740  }
741  if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX &&
742  maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX)
743  break;
744  }
745  if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
747  } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
749  } else {
750  bbox->set_left_tab_type(TT_NONE);
751  }
752  if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
754  } else if (maybe_ragged_right &&
755  ConfirmRaggedRight(bbox, min_ragged_gutter)) {
757  } else {
759  }
760  if (debug) {
761  tprintf("Left result = %s, Right result=%s\n",
762  bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
763  (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"),
764  bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
765  (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"));
766  }
767  return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
768 }
769 
770 // Returns true if there is nothing in the rectangle of width min_gutter to
771 // the left of bbox.
772 bool TabFind::ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter) {
773  TBOX search_box(bbox->bounding_box());
774  search_box.set_right(search_box.left());
775  search_box.set_left(search_box.left() - min_gutter);
776  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
777 }
778 
779 // Returns true if there is nothing in the rectangle of width min_gutter to
780 // the right of bbox.
781 bool TabFind::ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter) {
782  TBOX search_box(bbox->bounding_box());
783  search_box.set_left(search_box.right());
784  search_box.set_right(search_box.right() + min_gutter);
785  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
786 }
787 
788 // Returns true if there is nothing in the given search_box that vertically
789 // overlaps target_box other than target_box itself.
790 bool TabFind::NothingYOverlapsInBox(const TBOX& search_box,
791  const TBOX& target_box) {
792  BlobGridSearch rsearch(this);
793  rsearch.StartRectSearch(search_box);
794  BLOBNBOX* blob;
795  while ((blob = rsearch.NextRectSearch()) != nullptr) {
796  const TBOX& box = blob->bounding_box();
797  if (box.y_overlap(target_box) && !(box == target_box))
798  return false;
799  }
800  return true;
801 }
802 
803 void TabFind::FindAllTabVectors(int min_gutter_width) {
804  // A list of vectors that will be created in estimating the skew.
805  TabVector_LIST dummy_vectors;
806  // An estimate of the vertical direction, revised as more lines are added.
807  int vertical_x = 0;
808  int vertical_y = 1;
809  // Find an estimate of the vertical direction by finding some tab vectors.
810  // Slowly up the search size until we get some vectors.
811  for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
812  search_size += kMinVerticalSearch) {
813  int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED,
814  min_gutter_width,
815  &dummy_vectors,
816  &vertical_x, &vertical_y);
817  vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED,
818  min_gutter_width,
819  &dummy_vectors,
820  &vertical_x, &vertical_y);
821  if (vector_count > 0)
822  break;
823  }
824  // Get rid of the test vectors and reset the types of the tabs.
825  dummy_vectors.clear();
826  for (int i = 0; i < left_tab_boxes_.size(); ++i) {
827  BLOBNBOX* bbox = left_tab_boxes_[i];
828  if (bbox->left_tab_type() == TT_CONFIRMED)
830  }
831  for (int i = 0; i < right_tab_boxes_.size(); ++i) {
832  BLOBNBOX* bbox = right_tab_boxes_[i];
833  if (bbox->right_tab_type() == TT_CONFIRMED)
835  }
836  if (textord_debug_tabfind) {
837  tprintf("Beginning real tab search with vertical = %d,%d...\n",
838  vertical_x, vertical_y);
839  }
840  // Now do the real thing ,but keep the vectors in the dummy_vectors list
841  // until they are all done, so we don't get the tab vectors confused with
842  // the rule line vectors.
844  &dummy_vectors, &vertical_x, &vertical_y);
846  &dummy_vectors, &vertical_x, &vertical_y);
848  &dummy_vectors, &vertical_x, &vertical_y);
850  &dummy_vectors, &vertical_x, &vertical_y);
851  // Now add the vectors to the vectors_ list.
852  TabVector_IT v_it(&vectors_);
853  v_it.add_list_after(&dummy_vectors);
854  // Now use the summed (mean) vertical vector as the direction for everything.
855  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
856 }
857 
858 // Helper for FindAllTabVectors finds the vectors of a particular type.
859 int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment,
860  int min_gutter_width, TabVector_LIST* vectors,
861  int* vertical_x, int* vertical_y) {
862  TabVector_IT vector_it(vectors);
863  int vector_count = 0;
864  // Search the right or left tab boxes, looking for tab vectors.
865  bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
866  const GenericVector<BLOBNBOX*>& boxes = right ? right_tab_boxes_
867  : left_tab_boxes_;
868  for (int i = 0; i < boxes.size(); ++i) {
869  BLOBNBOX* bbox = boxes[i];
870  if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) ||
871  (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) {
872  TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width,
873  alignment,
874  bbox, vertical_x, vertical_y);
875  if (vector != nullptr) {
876  ++vector_count;
877  vector_it.add_to_end(vector);
878  }
879  }
880  }
881  return vector_count;
882 }
883 
884 // Finds a vector corresponding to a tabstop running through the
885 // given box of the given alignment type.
886 // search_size_multiple is a multiple of height used to control
887 // the size of the search.
888 // vertical_x and y are updated with an estimate of the real
889 // vertical direction. (skew finding.)
890 // Returns nullptr if no decent tabstop can be found.
891 TabVector* TabFind::FindTabVector(int search_size_multiple,
892  int min_gutter_width,
893  TabAlignment alignment,
894  BLOBNBOX* bbox,
895  int* vertical_x, int* vertical_y) {
896  int height = std::max(static_cast<int>(bbox->bounding_box().height()), gridsize());
897  AlignedBlobParams align_params(*vertical_x, *vertical_y,
898  height,
899  search_size_multiple, min_gutter_width,
900  resolution_, alignment);
901  // FindVerticalAlignment is in the parent (AlignedBlob) class.
902  return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
903 }
904 
905 // Set the vertical_skew_ member from the given vector and refit
906 // all vectors parallel to the skew vector.
907 void TabFind::SetVerticalSkewAndParallelize(int vertical_x, int vertical_y) {
908  // Fit the vertical vector into an ICOORD, which is 16 bit.
909  vertical_skew_.set_with_shrink(vertical_x, vertical_y);
911  tprintf("Vertical skew vector=(%d,%d)\n",
913  v_it_.set_to_list(&vectors_);
914  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
915  TabVector* v = v_it_.data();
916  v->Fit(vertical_skew_, true);
917  }
918  // Now sort the vectors as their direction has potentially changed.
919  SortVectors();
920 }
921 
922 // Sort all the current vectors using the given vertical direction vector.
923 void TabFind::SortVectors() {
924  vectors_.sort(TabVector::SortVectorsByKey);
925  v_it_.set_to_list(&vectors_);
926 }
927 
928 // Evaluate all the current tab vectors.
929 void TabFind::EvaluateTabs() {
930  TabVector_IT rule_it(&vectors_);
931  for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
932  TabVector* tab = rule_it.data();
933  if (!tab->IsSeparator()) {
934  tab->Evaluate(vertical_skew_, this);
935  if (tab->BoxCount() < kMinEvaluatedTabs) {
936  if (textord_debug_tabfind > 2)
937  tab->Print("Too few boxes");
938  delete rule_it.extract();
939  v_it_.set_to_list(&vectors_);
940  } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
941  tab->Print("Evaluated tab");
942  }
943  }
944  }
945 }
946 
947 // Trace textlines from one side to the other of each tab vector, saving
948 // the most frequent column widths found in a list so that a given width
949 // can be tested for being a common width with a simple callback function.
950 void TabFind::ComputeColumnWidths(ScrollView* tab_win,
951  ColPartitionGrid* part_grid) {
952  #ifndef GRAPHICS_DISABLED
953  if (tab_win != nullptr)
954  tab_win->Pen(ScrollView::WHITE);
955  #endif // GRAPHICS_DISABLED
956  // Accumulate column sections into a STATS
957  int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
958  STATS col_widths(0, col_widths_size + 1);
959  ApplyPartitionsToColumnWidths(part_grid, &col_widths);
960  #ifndef GRAPHICS_DISABLED
961  if (tab_win != nullptr) {
962  tab_win->Update();
963  }
964  #endif // GRAPHICS_DISABLED
965  if (textord_debug_tabfind > 1)
966  col_widths.print();
967  // Now make a list of column widths.
968  MakeColumnWidths(col_widths_size, &col_widths);
969  // Turn the column width into a range.
970  ApplyPartitionsToColumnWidths(part_grid, nullptr);
971 }
972 
973 // Finds column width and:
974 // if col_widths is not null (pass1):
975 // pair-up tab vectors with existing ColPartitions and accumulate widths.
976 // else (pass2):
977 // find the largest real partition width for each recorded column width,
978 // to be used as the minimum acceptable width.
979 void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
980  STATS* col_widths) {
981  // For every ColPartition in the part_grid, add partners to the tabvectors
982  // and accumulate the column widths.
983  ColPartitionGridSearch gsearch(part_grid);
984  gsearch.StartFullSearch();
985  ColPartition* part;
986  while ((part = gsearch.NextFullSearch()) != nullptr) {
987  BLOBNBOX_C_IT blob_it(part->boxes());
988  if (blob_it.empty())
989  continue;
990  BLOBNBOX* left_blob = blob_it.data();
991  blob_it.move_to_last();
992  BLOBNBOX* right_blob = blob_it.data();
993  TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(),
994  true, false);
995  if (left_vector == nullptr || left_vector->IsRightTab())
996  continue;
997  TabVector* right_vector = RightTabForBox(right_blob->bounding_box(),
998  true, false);
999  if (right_vector == nullptr || right_vector->IsLeftTab())
1000  continue;
1001 
1002  int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
1003  int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
1004  // Add to STATS of measurements if the width is significant.
1005  int width = line_right - line_left;
1006  if (col_widths != nullptr) {
1007  AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
1008  if (width >= kMinColumnWidth)
1009  col_widths->add(width / kColumnWidthFactor, 1);
1010  } else {
1011  width /= kColumnWidthFactor;
1012  ICOORDELT_IT it(&column_widths_);
1013  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1014  ICOORDELT* w = it.data();
1015  if (NearlyEqual<int>(width, w->y(), 1)) {
1016  int true_width = part->bounding_box().width() / kColumnWidthFactor;
1017  if (true_width <= w->y() && true_width > w->x())
1018  w->set_x(true_width);
1019  break;
1020  }
1021  }
1022  }
1023  }
1024 }
1025 
1026 // Helper makes the list of common column widths in column_widths_ from the
1027 // input col_widths. Destroys the content of col_widths by repeatedly
1028 // finding the mode and erasing the peak.
1029 void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) {
1030  ICOORDELT_IT w_it(&column_widths_);
1031  int total_col_count = col_widths->get_total();
1032  while (col_widths->get_total() > 0) {
1033  int width = col_widths->mode();
1034  int col_count = col_widths->pile_count(width);
1035  col_widths->add(width, -col_count);
1036  // Get the entire peak.
1037  for (int left = width - 1; left > 0 &&
1038  col_widths->pile_count(left) > 0;
1039  --left) {
1040  int new_count = col_widths->pile_count(left);
1041  col_count += new_count;
1042  col_widths->add(left, -new_count);
1043  }
1044  for (int right = width + 1; right < col_widths_size &&
1045  col_widths->pile_count(right) > 0;
1046  ++right) {
1047  int new_count = col_widths->pile_count(right);
1048  col_count += new_count;
1049  col_widths->add(right, -new_count);
1050  }
1051  if (col_count > kMinLinesInColumn &&
1052  col_count > kMinFractionalLinesInColumn * total_col_count) {
1053  auto* w = new ICOORDELT(0, width);
1054  w_it.add_after_then_move(w);
1056  tprintf("Column of width %d has %d = %.2f%% lines\n",
1057  width * kColumnWidthFactor, col_count,
1058  100.0 * col_count / total_col_count);
1059  }
1060  }
1061 }
1062 
1063 // Mark blobs as being in a vertical text line where that is the case.
1064 // Returns true if the majority of the image is vertical text lines.
1065 void TabFind::MarkVerticalText() {
1067  tprintf("Checking for vertical lines\n");
1068  BlobGridSearch gsearch(this);
1069  gsearch.StartFullSearch();
1070  BLOBNBOX* blob = nullptr;
1071  while ((blob = gsearch.NextFullSearch()) != nullptr) {
1072  if (blob->region_type() < BRT_UNKNOWN)
1073  continue;
1074  if (blob->UniquelyVertical()) {
1076  }
1077  }
1078 }
1079 
1080 int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
1081  TabVector_IT it(lines);
1082  int prev_right = -1;
1083  int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
1084  STATS gaps(0, max_gap);
1085  STATS heights(0, max_gap);
1086  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1087  TabVector* v = it.data();
1088  TabVector* partner = v->GetSinglePartner();
1089  if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue;
1090  heights.add(partner->startpt().x() - v->startpt().x(), 1);
1091  if (prev_right > 0 && v->startpt().x() > prev_right) {
1092  gaps.add(v->startpt().x() - prev_right, 1);
1093  }
1094  prev_right = partner->startpt().x();
1095  }
1097  tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n",
1098  gaps.get_total(), gaps.median(), heights.median());
1099  if (gaps.get_total() < kMinLinesInColumn) return 0;
1100  return static_cast<int>(gaps.median());
1101 }
1102 
1103 // Find the next adjacent (looking to the left or right) blob on this text
1104 // line, with the constraint that it must vertically significantly overlap
1105 // the [top_y, bottom_y] range.
1106 // If ignore_images is true, then blobs with aligned_text() < 0 are treated
1107 // as if they do not exist.
1108 BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox,
1109  bool look_left, bool ignore_images,
1110  double min_overlap_fraction,
1111  int gap_limit, int top_y, int bottom_y) {
1112  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this);
1113  const TBOX& box = bbox->bounding_box();
1114  int left = box.left();
1115  int right = box.right();
1116  int mid_x = (left + right) / 2;
1117  sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1118  int best_gap = 0;
1119  bool debug = WithinTestRegion(3, left, bottom_y);
1120  BLOBNBOX* result = nullptr;
1121  BLOBNBOX* neighbour = nullptr;
1122  while ((neighbour = sidesearch.NextSideSearch(look_left)) != nullptr) {
1123  if (debug) {
1124  tprintf("Adjacent blob: considering box:");
1125  neighbour->bounding_box().print();
1126  }
1127  if (neighbour == bbox ||
1128  (ignore_images && neighbour->region_type() < BRT_UNKNOWN))
1129  continue;
1130  const TBOX& nbox = neighbour->bounding_box();
1131  int n_top_y = nbox.top();
1132  int n_bottom_y = nbox.bottom();
1133  int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y);
1134  int height = top_y - bottom_y;
1135  int n_height = n_top_y - n_bottom_y;
1136  if (v_overlap > min_overlap_fraction * std::min(height, n_height) &&
1137  (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) {
1138  int n_left = nbox.left();
1139  int n_right = nbox.right();
1140  int h_gap = std::max(n_left, left) - std::min(n_right, right);
1141  int n_mid_x = (n_left + n_right) / 2;
1142  if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1143  if (h_gap > gap_limit) {
1144  // Hit a big gap before next tab so don't return anything.
1145  if (debug)
1146  tprintf("Giving up due to big gap = %d vs %d\n",
1147  h_gap, gap_limit);
1148  return result;
1149  }
1150  if (h_gap > 0 && (look_left ? neighbour->right_tab_type()
1151  : neighbour->left_tab_type()) >= TT_CONFIRMED) {
1152  // Hit a tab facing the wrong way. Stop in case we are crossing
1153  // the column boundary.
1154  if (debug)
1155  tprintf("Collision with like tab of type %d at %d,%d\n",
1156  look_left ? neighbour->right_tab_type()
1157  : neighbour->left_tab_type(),
1158  n_left, nbox.bottom());
1159  return result;
1160  }
1161  // This is a good fit to the line. Continue with this
1162  // neighbour as the bbox if the best gap.
1163  if (result == nullptr || h_gap < best_gap) {
1164  if (debug)
1165  tprintf("Good result\n");
1166  result = neighbour;
1167  best_gap = h_gap;
1168  } else {
1169  // The new one is worse, so we probably already have the best result.
1170  return result;
1171  }
1172  } else if (debug) {
1173  tprintf("Wrong way\n");
1174  }
1175  } else if (debug) {
1176  tprintf("Insufficient overlap\n");
1177  }
1178  }
1179  if (WithinTestRegion(3, left, box.top()))
1180  tprintf("Giving up due to end of search\n");
1181  return result; // Hit the edge and found nothing.
1182 }
1183 
1184 // Add a bi-directional partner relationship between the left
1185 // and the right. If one (or both) of the vectors is a separator,
1186 // extend a nearby extendable vector or create a new one of the
1187 // correct type, using the given left or right blob as a guide.
1188 void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
1189  TabVector* left, TabVector* right) {
1190  const TBOX& left_box = left_blob->bounding_box();
1191  const TBOX& right_box = right_blob->bounding_box();
1192  if (left->IsSeparator()) {
1193  // Try to find a nearby left edge to extend.
1194  TabVector* v = LeftTabForBox(left_box, true, true);
1195  if (v != nullptr && v != left && v->IsLeftTab() &&
1196  v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
1197  left = v; // Found a good replacement.
1198  left->ExtendToBox(left_blob);
1199  } else {
1200  // Fake a vector.
1201  left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
1202  vectors_.add_sorted(TabVector::SortVectorsByKey, left);
1203  v_it_.move_to_first();
1204  }
1205  }
1206  if (right->IsSeparator()) {
1207  // Try to find a nearby left edge to extend.
1208  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1209  tprintf("Box edge (%d,%d-%d)",
1210  right_box.right(), right_box.bottom(), right_box.top());
1211  right->Print(" looking for improvement for");
1212  }
1213  TabVector* v = RightTabForBox(right_box, true, true);
1214  if (v != nullptr && v != right && v->IsRightTab() &&
1215  v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
1216  right = v; // Found a good replacement.
1217  right->ExtendToBox(right_blob);
1218  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1219  right->Print("Extended vector");
1220  }
1221  } else {
1222  // Fake a vector.
1223  right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_,
1224  right_blob);
1225  vectors_.add_sorted(TabVector::SortVectorsByKey, right);
1226  v_it_.move_to_first();
1227  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1228  right->Print("Created new vector");
1229  }
1230  }
1231  }
1232  left->AddPartner(right);
1233  right->AddPartner(left);
1234 }
1235 
1236 // Remove separators and unused tabs from the main vectors_ list
1237 // to the dead_vectors_ list.
1238 void TabFind::CleanupTabs() {
1239  // TODO(rays) Before getting rid of separators and unused vectors, it
1240  // would be useful to try moving ragged vectors outwards to see if this
1241  // allows useful extension. Could be combined with checking ends of partners.
1242  TabVector_IT it(&vectors_);
1243  TabVector_IT dead_it(&dead_vectors_);
1244  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1245  TabVector* v = it.data();
1246  if (v->IsSeparator() || v->Partnerless()) {
1247  dead_it.add_after_then_move(it.extract());
1248  v_it_.set_to_list(&vectors_);
1249  } else {
1250  v->FitAndEvaluateIfNeeded(vertical_skew_, this);
1251  }
1252  }
1253 }
1254 
1255 // Apply the given rotation to the given list of blobs.
1256 void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) {
1257  BLOBNBOX_IT it(blobs);
1258  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1259  it.data()->rotate_box(rotation);
1260  }
1261 }
1262 
1263 // Recreate the grid with deskewed BLOBNBOXes.
1264 // Returns false if the detected skew angle is impossible.
1265 bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
1266  TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) {
1267  ComputeDeskewVectors(deskew, reskew);
1268  if (deskew->x() < kCosMaxSkewAngle)
1269  return false;
1270  RotateBlobList(*deskew, image_blobs);
1271  RotateBlobList(*deskew, &block->blobs);
1272  RotateBlobList(*deskew, &block->small_blobs);
1273  RotateBlobList(*deskew, &block->noise_blobs);
1274 
1275  // Rotate the horizontal vectors. The vertical vectors don't need
1276  // rotating as they can just be refitted.
1277  TabVector_IT h_it(hlines);
1278  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1279  TabVector* h = h_it.data();
1280  h->Rotate(*deskew);
1281  }
1282  TabVector_IT d_it(&dead_vectors_);
1283  for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
1284  TabVector* d = d_it.data();
1285  d->Rotate(*deskew);
1286  }
1287  SetVerticalSkewAndParallelize(0, 1);
1288  // Rebuild the grid to the new size.
1289  TBOX grid_box(bleft_, tright_);
1290  grid_box.rotate_large(*deskew);
1291  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1292  InsertBlobsToGrid(false, false, image_blobs, this);
1293  InsertBlobsToGrid(true, false, &block->blobs, this);
1294  return true;
1295 }
1296 
1297 // Flip the vertical and horizontal lines and rotate the grid ready
1298 // for working on the rotated image.
1299 // This also makes parameter adjustments for FindInitialTabVectors().
1300 void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
1301  TabVector_LIST* horizontal_lines,
1302  int* min_gutter_width) {
1303  // Rotate the horizontal and vertical vectors and swap them over.
1304  // Only the separators are kept and rotated; other tabs are used
1305  // to estimate the gutter width then thrown away.
1306  TabVector_LIST ex_verticals;
1307  TabVector_IT ex_v_it(&ex_verticals);
1308  TabVector_LIST vlines;
1309  TabVector_IT v_it(&vlines);
1310  while (!v_it_.empty()) {
1311  TabVector* v = v_it_.extract();
1312  if (v->IsSeparator()) {
1313  v->Rotate(rotate);
1314  ex_v_it.add_after_then_move(v);
1315  } else {
1316  v_it.add_after_then_move(v);
1317  }
1318  v_it_.forward();
1319  }
1320 
1321  // Adjust the min gutter width for better tabbox selection
1322  // in 2nd call to FindInitialTabVectors().
1323  int median_gutter = FindMedianGutterWidth(&vlines);
1324  if (median_gutter > *min_gutter_width)
1325  *min_gutter_width = median_gutter;
1326 
1327  TabVector_IT h_it(horizontal_lines);
1328  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1329  TabVector* h = h_it.data();
1330  h->Rotate(rotate);
1331  }
1332  v_it_.add_list_after(horizontal_lines);
1333  v_it_.move_to_first();
1334  h_it.set_to_list(horizontal_lines);
1335  h_it.add_list_after(&ex_verticals);
1336 
1337  // Rebuild the grid to the new size.
1338  TBOX grid_box(bleft(), tright());
1339  grid_box.rotate_large(rotate);
1340  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1341 }
1342 
1343 // Clear the grid and get rid of the tab vectors, but not separators,
1344 // ready to start again.
1346  v_it_.move_to_first();
1347  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1348  if (!v_it_.data()->IsSeparator())
1349  delete v_it_.extract();
1350  }
1351  Clear();
1352 }
1353 
1354 // Reflect the separator tab vectors and the grids in the y-axis.
1355 // Can only be called after Reset!
1357  TabVector_LIST temp_list;
1358  TabVector_IT temp_it(&temp_list);
1359  v_it_.move_to_first();
1360  // The TabVector list only contains vertical lines, but they need to be
1361  // reflected and the list needs to be reversed, so they are still in
1362  // sort_key order.
1363  while (!v_it_.empty()) {
1364  TabVector* v = v_it_.extract();
1365  v_it_.forward();
1366  v->ReflectInYAxis();
1367  temp_it.add_before_then_move(v);
1368  }
1369  v_it_.add_list_after(&temp_list);
1370  v_it_.move_to_first();
1371  // Reset this grid with reflected bounding boxes.
1372  TBOX grid_box(bleft(), tright());
1373  int tmp = grid_box.left();
1374  grid_box.set_left(-grid_box.right());
1375  grid_box.set_right(-tmp);
1376  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1377 }
1378 
1379 // Compute the rotation required to deskew, and its inverse rotation.
1380 void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
1381  double length = vertical_skew_ % vertical_skew_;
1382  length = sqrt(length);
1383  deskew->set_x(static_cast<float>(vertical_skew_.y() / length));
1384  deskew->set_y(static_cast<float>(vertical_skew_.x() / length));
1385  reskew->set_x(deskew->x());
1386  reskew->set_y(-deskew->y());
1387 }
1388 
1389 // Compute and apply constraints to the end positions of TabVectors so
1390 // that where possible partners end at the same y coordinate.
1391 void TabFind::ApplyTabConstraints() {
1392  TabVector_IT it(&vectors_);
1393  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1394  TabVector* v = it.data();
1395  v->SetupConstraints();
1396  }
1397  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1398  TabVector* v = it.data();
1399  // With the first and last partner, we want a common bottom and top,
1400  // respectively, and for each change of partner, we want a common
1401  // top of first with bottom of next.
1402  v->SetupPartnerConstraints();
1403  }
1404  // TODO(rays) The back-to-back pairs should really be done like the
1405  // front-to-front pairs, but there is no convenient way of producing the
1406  // list of partners like there is with the front-to-front.
1407  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1408  TabVector* v = it.data();
1409  if (!v->IsRightTab())
1410  continue;
1411  // For each back-to-back pair of vectors, try for common top and bottom.
1412  TabVector_IT partner_it(it);
1413  for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1414  TabVector* partner = partner_it.data();
1415  if (!partner->IsLeftTab() || !v->VOverlap(*partner))
1416  continue;
1417  v->SetupPartnerConstraints(partner);
1418  }
1419  }
1420  // Now actually apply the constraints to get common start/end points.
1421  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1422  TabVector* v = it.data();
1423  if (!v->IsSeparator())
1424  v->ApplyConstraints();
1425  }
1426  // TODO(rays) Where constraint application fails, it would be good to try
1427  // checking the ends to see if they really should be moved.
1428 }
1429 
1430 } // namespace tesseract.
STATS::get_total
int32_t get_total() const
Definition: statistc.h:83
tesseract::TabVector::SortVectorsByKey
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:293
TO_BLOCK::small_blobs
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:774
tesseract::TA_LEFT_RAGGED
Definition: tabvector.h:46
ScrollView
Definition: scrollview.h:97
TO_BLOCK::plot_graded_blobs
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1054
ScrollView::Brush
void Brush(Color color)
Definition: scrollview.cpp:723
BLOBNBOX::set_right_tab_type
void set_right_tab_type(TabType new_type)
Definition: blobbox.h:279
ICOORD::set_x
void set_x(int16_t xin)
rewrite function
Definition: points.h:60
tesseract::kAlignedFraction
const double kAlignedFraction
Definition: alignedblob.cpp:38
TBOX::rotate_large
void rotate_large(const FCOORD &vec)
Definition: rect.cpp:69
tesseract::TabVector
Definition: tabvector.h:111
tesseract::TabVector::SortKey
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:279
tesseract::TabFind::Reset
void Reset()
Definition: tabfind.cpp:1345
TO_BLOCK::plot_noise_blobs
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1046
tesseract::GridSearch::StartSideSearch
void StartSideSearch(int x, int ymin, int ymax)
Definition: bbgrid.h:746
tesseract::TabFind::FindTabVectors
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:422
host.h
tabfind.h
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::MakeWindow
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
TO_BLOCK::noise_blobs
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:773
tesseract::TabFind::SetBlobRuleEdges
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:142
TT_NONE
Definition: blobbox.h:59
FCOORD::set_x
void set_x(float xin)
rewrite function
Definition: points.h:213
linefind.h
NearlyEqual
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:36
tesseract::kMaxRaggedSearch
const int kMaxRaggedSearch
Definition: tabfind.cpp:39
tesseract::GridBase::tright_
ICOORD tright_
Definition: bbgrid.h:91
tesseract::kColumnWidthFactor
const int kColumnWidthFactor
Definition: tabfind.h:41
BRT_UNKNOWN
Definition: blobbox.h:77
FCOORD::y
float y() const
Definition: points.h:209
ICOORD
integer coordinate
Definition: points.h:30
BLOBNBOX::leader_on_left
bool leader_on_left() const
Definition: blobbox.h:357
TBOX::print
void print() const
Definition: rect.h:277
TO_BLOCK::DeleteUnownedNoise
void DeleteUnownedNoise()
Definition: blobbox.cpp:1020
FCOORD::x
float x() const
Definition: points.h:206
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::Clear
void Clear()
Definition: bbgrid.h:455
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
STATS::pile_count
int32_t pile_count(int32_t value) const
Definition: statistc.h:75
TO_BLOCK
Definition: blobbox.h:691
BRT_VERT_TEXT
Definition: blobbox.h:78
tesseract::TabVector::Rotate
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:273
BLOBNBOX::right_rule
int right_rule() const
Definition: blobbox.h:318
ScrollView::NONE
Definition: scrollview.h:101
tesseract::TA_RIGHT_ALIGNED
Definition: tabvector.h:48
TT_CONFIRMED
Definition: blobbox.h:63
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:717
detlinefit.h
tesseract::TabVector::ExtendedOverlap
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:207
tesseract::TabFind::SetupTabSearch
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:490
tesseract::ColPartitionGridSearch
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:935
tesseract::TabFind::GutterWidthAndNeighbourGap
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
Definition: tabfind.cpp:208
tesseract::kMinEvaluatedTabs
const int kMinEvaluatedTabs
Definition: tabfind.cpp:56
TT_MAYBE_ALIGNED
Definition: blobbox.h:62
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
FCOORD
Definition: points.h:187
FCOORD::set_y
void set_y(float yin)
rewrite function
Definition: points.h:217
tesseract::TabFind::InsertBlobsToGrid
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:91
BLOBNBOX
Definition: blobbox.h:142
ScrollView::BLUE
Definition: scrollview.h:108
BLOBNBOX::set_left_tab_type
void set_left_tab_type(TabType new_type)
Definition: blobbox.h:273
BLOBNBOX::UnMergeableType
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:429
tesseract::TabVector::IsLeftTab
bool IsLeftTab() const
Definition: tabvector.h:212
tesseract::GridBase::tright
const ICOORD & tright() const
Definition: bbgrid.h:75
TBOX::height
int16_t height() const
Definition: rect.h:107
TBOX::y_overlap
bool y_overlap(const TBOX &box) const
Definition: rect.h:418
tesseract::TabFind::GutterWidth
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
Definition: tabfind.cpp:161
tesseract::TabFind::FindInitialTabVectors
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:514
tesseract::TabFind::ReflectInYAxis
void ReflectInYAxis()
Definition: tabfind.cpp:1356
BLOBNBOX::left_rule
int left_rule() const
Definition: blobbox.h:312
tesseract::TabVector::sort_key
int sort_key() const
Definition: tabvector.h:157
TBOX::set_right
void set_right(int x)
Definition: rect.h:81
tesseract::TabFind::RightEdgeForBox
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:281
tesseract::TabFind::CommonWidth
bool CommonWidth(int width)
Definition: tabfind.cpp:394
tesseract::ColPartitionGrid::Deskew
void Deskew(const FCOORD &deskew)
Definition: colpartitiongrid.cpp:729
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
tesseract::GridSearch::NextSideSearch
BBC * NextSideSearch(bool right_to_left)
Definition: bbgrid.h:761
BLOBNBOX::leader_on_right
bool leader_on_right() const
Definition: blobbox.h:363
tesseract::TabFind::resolution_
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:367
TO_BLOCK::large_blobs
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:775
tesseract::TabFind::LeftEdgeForBox
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:286
tesseract::AlignedBlob::FindVerticalAlignment
TabVector * FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
Definition: alignedblob.cpp:225
BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:119
tesseract::kMinColumnWidth
const double kMinColumnWidth
Definition: colpartitionset.cpp:31
tesseract::TabFind::vectors
TabVector_LIST * vectors()
Definition: tabfind.h:172
tesseract::AlignedBlob::WithinTestRegion
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: alignedblob.cpp:150
tesseract::TA_LEFT_ALIGNED
Definition: tabvector.h:45
BLOBNBOX::joined_to_prev
bool joined_to_prev() const
Definition: blobbox.h:255
BLOBNBOX::set_left_rule
void set_left_rule(int new_left)
Definition: blobbox.h:315
TBOX::width
int16_t width() const
Definition: rect.h:114
BLOBNBOX::set_right_crossing_rule
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:333
tesseract::AlignedBlob
Definition: alignedblob.h:81
tesseract::TabFind::DontFindTabVectors
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:452
BOOL_VAR
#define BOOL_VAR(name, val, comment)
Definition: params.h:303
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::TabFind::DifferentSizes
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407
tesseract::TabVector::XAtY
int XAtY(int y) const
Definition: tabvector.h:188
tesseract::BBGrid
Definition: bbgrid.h:158
BLOBNBOX::set_left_crossing_rule
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:327
ScrollView::WHITE
Definition: scrollview.h:103
TBOX::topright
const ICOORD & topright() const
Definition: rect.h:103
tesseract::TabVector::ReflectInYAxis
void ReflectInYAxis()
Definition: tabvector.h:264
tesseract::GridBase::bleft_
ICOORD bleft_
Definition: bbgrid.h:90
tesseract::GridSearch
Definition: bbgrid.h:48
tesseract::kMinLinesInColumn
const int kMinLinesInColumn
Definition: tabfind.cpp:41
tesseract
Definition: baseapi.h:65
BLOBNBOX::set_region_type
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:285
tesseract::TabFind::VeryDifferentSizes
static bool VeryDifferentSizes(int size1, int size2)
Definition: tabfind.cpp:413
TBOX::botleft
const ICOORD & botleft() const
Definition: rect.h:91
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
tesseract::TabFind::vertical_skew_
ICOORD vertical_skew_
Estimate of true vertical in this image.
Definition: tabfind.h:366
tesseract::TabFind::RightTabForBox
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:304
BLOBNBOX::UniquelyVertical
bool UniquelyVertical() const
Definition: blobbox.h:409
tesseract::TabFind::TabFind
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
Definition: tabfind.cpp:65
GenericVector< BLOBNBOX * >
tesseract::TabVector::VOverlap
int VOverlap(const TabVector &other) const
Definition: tabvector.h:198
STATS::mode
int32_t mode() const
Definition: statistc.cpp:100
tesseract::kMinVerticalSearch
const int kMinVerticalSearch
Definition: tabfind.cpp:37
tesseract::TabVector::MergeSimilarTabVectors
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:353
TT_MAYBE_RAGGED
Definition: blobbox.h:61
tesseract::kTabRadiusFactor
const int kTabRadiusFactor
Definition: tabfind.cpp:35
tesseract::TabFind::SetBlockRuleEdges
void SetBlockRuleEdges(TO_BLOCK *block)
Definition: tabfind.cpp:133
tesseract::GridBase::gridsize
int gridsize() const
Definition: bbgrid.h:63
tesseract::TabFind::~TabFind
~TabFind() override
Definition: tabfind.cpp:79
BLOBNBOX::right_tab_type
TabType right_tab_type() const
Definition: blobbox.h:276
tesseract::TabFind::LeftTabForBox
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:348
tesseract::TabFind::DisplayTabVectors
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:497
tesseract::kRaggedGutterMultiple
const int kRaggedGutterMultiple
Definition: tabfind.cpp:51
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::DisplayBoxes
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:613
alignedblob.h
tesseract::GridBase::gridsize_
int gridsize_
Definition: bbgrid.h:86
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::Init
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:445
tesseract::kMaxGutterWidthAbsolute
const double kMaxGutterWidthAbsolute
Definition: tabfind.cpp:49
tesseract::TabFind::ResetForVerticalText
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
Definition: tabfind.cpp:1300
BLOBNBOX::flow
BlobTextFlowType flow() const
Definition: blobbox.h:294
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::ColPartitionGrid
Definition: colpartitiongrid.h:32
tesseract::TabVector::Display
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:539
STATS::add
void add(int32_t value, int32_t count)
Definition: statistc.cpp:87
tesseract::AlignedBlob::DisplayTabs
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
Definition: alignedblob.cpp:158
BLOBNBOX::region_type
BlobRegionType region_type() const
Definition: blobbox.h:282
GenericVector::clear
void clear()
Definition: genericvector.h:857
TBOX::right
int16_t right() const
Definition: rect.h:78
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
ICOORD::set_with_shrink
void set_with_shrink(int x, int y)
Set from the given x,y, shrinking the vector to fit if needed.
Definition: points.cpp:40
tesseract::TabFind::InsertBlob
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:118
tesseract::kMaxVerticalSearch
const int kMaxVerticalSearch
Definition: tabfind.cpp:38
ScrollView::Update
static void Update()
Definition: scrollview.cpp:708
tesseract::TabAlignment
TabAlignment
Definition: tabvector.h:44
tesseract::kLineFragmentAspectRatio
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
tesseract::TabVector::IsSeparator
bool IsSeparator() const
Definition: tabvector.h:220
ScrollView::Color
Color
Definition: scrollview.h:100
tesseract::TabFind::TidyBlobs
void TidyBlobs(TO_BLOCK *block)
Definition: tabfind.cpp:465
BLOBNBOX::owner
tesseract::ColPartition * owner() const
Definition: blobbox.h:351
tesseract::TA_RIGHT_RAGGED
Definition: tabvector.h:49
GenericVector::sort
void sort()
Definition: genericvector.h:1102
ICOORDELT
Definition: points.h:160
ScrollView::Rectangle
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:599
tesseract::BlobGridSearch
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::kMinFractionalLinesInColumn
const double kMinFractionalLinesInColumn
Definition: tabfind.cpp:45
BLOBNBOX::left_tab_type
TabType left_tab_type() const
Definition: blobbox.h:270
textord_debug_tabfind
int textord_debug_tabfind
Definition: alignedblob.cpp:27
tesseract::GridBase::bleft
const ICOORD & bleft() const
Definition: bbgrid.h:72
colpartitiongrid.h
tesseract::kCosMaxSkewAngle
const double kCosMaxSkewAngle
Definition: tabfind.cpp:60
TBOX::set_left
void set_left(int x)
Definition: rect.h:74
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
TBOX
Definition: rect.h:33
tesseract::TabFind::RotateBlobList
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:1256
BLOBNBOX::set_right_rule
void set_right_rule(int new_right)
Definition: blobbox.h:321