All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tabfind.cpp
Go to the documentation of this file.
1 // File: TabFind.cpp
3 // Description: Subclass of BBGrid to find vertically aligned blobs.
4 // Author: Ray Smith
5 // Created: Fri Mar 21 15:03:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config_auto.h"
22 #endif
23 
24 #include "tabfind.h"
25 #include "alignedblob.h"
26 #include "blobbox.h"
27 #include "colpartitiongrid.h"
28 #include "detlinefit.h"
29 #include "linefind.h"
30 #include "ndminx.h"
31 
32 namespace tesseract {
33 
34 // Multiple of box size to search for initial gaps.
35 const int kTabRadiusFactor = 5;
36 // Min and Max multiple of height to search vertically when extrapolating.
37 const int kMinVerticalSearch = 3;
38 const int kMaxVerticalSearch = 12;
39 const int kMaxRaggedSearch = 25;
40 // Minimum number of lines in a column width to make it interesting.
41 const int kMinLinesInColumn = 10;
42 // Minimum width of a column to be interesting.
43 const int kMinColumnWidth = 200;
44 // Minimum fraction of total column lines for a column to be interesting.
45 const double kMinFractionalLinesInColumn = 0.125;
46 // Fraction of height used as alignment tolerance for aligned tabs.
47 const double kAlignedFraction = 0.03125;
48 // Minimum gutter width in absolute inch (multiplied by resolution)
49 const double kMinGutterWidthAbsolute = 0.02;
50 // Maximum gutter width (in absolute inch) that we care about
51 const double kMaxGutterWidthAbsolute = 2.00;
52 // Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs.
53 const int kRaggedGutterMultiple = 5;
54 // Min aspect ratio of tall objects to be considered a separator line.
55 // (These will be ignored in searching the gutter for obstructions.)
56 const double kLineFragmentAspectRatio = 10.0;
57 // Multiplier of new y positions in running average for skew estimation.
58 const double kSmoothFactor = 0.25;
59 // Min coverage for a good baseline between vectors
60 const double kMinBaselineCoverage = 0.5;
61 // Minimum overlap fraction when scanning text lines for column widths.
62 const double kCharVerticalOverlapFraction = 0.375;
63 // Maximum horizontal gap allowed when scanning for column widths
64 const double kMaxHorizontalGap = 3.0;
65 // Maximum upper quartile error allowed on a baseline fit as a fraction
66 // of height.
67 const double kMaxBaselineError = 0.4375;
68 // Min number of points to accept after evaluation.
69 const int kMinEvaluatedTabs = 3;
70 // Minimum aspect ratio of a textline to make a good textline blob with a
71 // single blob.
72 const int kMaxTextLineBlobRatio = 5;
73 // Minimum aspect ratio of a textline to make a good textline blob with
74 // multiple blobs. Target ratio varies according to number of blobs.
75 const int kMinTextLineBlobRatio = 3;
76 // Fraction of box area covered by image to make a blob image.
77 const double kMinImageArea = 0.5;
78 // Upto 30 degrees is allowed for rotations of diacritic blobs.
79 // Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
80 // so that the assert there never fails.
81 const double kCosMaxSkewAngle = 0.866025;
82 
83 BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
84 BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
85 
86 TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
87  TabVector_LIST* vlines, int vertical_x, int vertical_y,
88  int resolution)
89  : AlignedBlob(gridsize, bleft, tright),
90  resolution_(resolution),
91  image_origin_(0, tright.y() - 1) {
92  width_cb_ = NULL;
93  v_it_.set_to_list(&vectors_);
94  v_it_.add_list_after(vlines);
95  SetVerticalSkewAndParellelize(vertical_x, vertical_y);
97 }
98 
100  if (width_cb_ != NULL)
101  delete width_cb_;
102 }
103 
105 
106 // Insert a list of blobs into the given grid (not necessarily this).
107 // If take_ownership is true, then the blobs are removed from the source list.
108 // See InsertBlob for the other arguments.
109 // It would seem to make more sense to swap this and grid, but this way
110 // around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
111 // while the grid that provides the tab stops(this) has to be derived from
112 // TabFind.
113 void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread,
114  BLOBNBOX_LIST* blobs,
115  BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
116  BLOBNBOX_C_IT>* grid) {
117  BLOBNBOX_IT blob_it(blobs);
118  int b_count = 0;
119  int reject_count = 0;
120  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
121  BLOBNBOX* blob = blob_it.data();
122 // if (InsertBlob(true, true, blob, grid)) {
123  if (InsertBlob(h_spread, v_spread, blob, grid)) {
124  ++b_count;
125  } else {
126  ++reject_count;
127  }
128  }
129  if (textord_debug_tabfind) {
130  tprintf("Inserted %d blobs into grid, %d rejected.\n",
131  b_count, reject_count);
132  }
133 }
134 
135 // Insert a single blob into the given grid (not necessarily this).
136 // If h_spread, then all cells covered horizontally by the box are
137 // used, otherwise, just the bottom-left. Similarly for v_spread.
138 // A side effect is that the left and right rule edges of the blob are
139 // set according to the tab vectors in this (not grid).
140 bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
141  BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
142  BLOBNBOX_C_IT>* grid) {
143  TBOX box = blob->bounding_box();
144  blob->set_left_rule(LeftEdgeForBox(box, false, false));
145  blob->set_right_rule(RightEdgeForBox(box, false, false));
146  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
147  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
148  if (blob->joined_to_prev())
149  return false;
150  grid->InsertBBox(h_spread, v_spread, blob);
151  return true;
152 }
153 
154 // Calls SetBlobRuleEdges for all the blobs in the given block.
156  SetBlobRuleEdges(&block->blobs);
157  SetBlobRuleEdges(&block->small_blobs);
158  SetBlobRuleEdges(&block->noise_blobs);
159  SetBlobRuleEdges(&block->large_blobs);
160 }
161 
162 // Sets the left and right rule and crossing_rules for the blobs in the given
163 // list by fiding the next outermost tabvectors for each blob.
164 void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST* blobs) {
165  BLOBNBOX_IT blob_it(blobs);
166  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
167  BLOBNBOX* blob = blob_it.data();
168  TBOX box = blob->bounding_box();
169  blob->set_left_rule(LeftEdgeForBox(box, false, false));
170  blob->set_right_rule(RightEdgeForBox(box, false, false));
171  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
172  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
173  }
174 }
175 
176 // Returns the gutter width of the given TabVector between the given y limits.
177 // Also returns x-shift to be added to the vector to clear any intersecting
178 // blobs. The shift is deducted from the returned gutter.
179 // If ignore_unmergeables is true, then blobs of UnMergeableType are
180 // ignored as if they don't exist. (Used for text on image.)
181 // max_gutter_width is used as the maximum width worth searching for in case
182 // there is nothing near the TabVector.
183 int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v,
184  bool ignore_unmergeables, int max_gutter_width,
185  int* required_shift) {
186  bool right_to_left = v.IsLeftTab();
187  int bottom_x = v.XAtY(bottom_y);
188  int top_x = v.XAtY(top_y);
189  int start_x = right_to_left ? MAX(top_x, bottom_x) : MIN(top_x, bottom_x);
190  BlobGridSearch sidesearch(this);
191  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
192  int min_gap = max_gutter_width;
193  *required_shift = 0;
194  BLOBNBOX* blob = NULL;
195  while ((blob = sidesearch.NextSideSearch(right_to_left)) != NULL) {
196  const TBOX& box = blob->bounding_box();
197  if (box.bottom() >= top_y || box.top() <= bottom_y)
198  continue; // Doesn't overlap enough.
199  if (box.height() >= gridsize() * 2 &&
200  box.height() > box.width() * kLineFragmentAspectRatio) {
201  // Skip likely separator line residue.
202  continue;
203  }
204  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
205  continue; // Skip non-text if required.
206  int mid_y = (box.bottom() + box.top()) / 2;
207  // We use the x at the mid-y so that the required_shift guarantees
208  // to clear all the blobs on the tab-stop. If we use the min/max
209  // of x at top/bottom of the blob, then exactness would be required,
210  // which is not a good thing.
211  int tab_x = v.XAtY(mid_y);
212  int gap;
213  if (right_to_left) {
214  gap = tab_x - box.right();
215  if (gap < 0 && box.left() - tab_x < *required_shift)
216  *required_shift = box.left() - tab_x;
217  } else {
218  gap = box.left() - tab_x;
219  if (gap < 0 && box.right() - tab_x > *required_shift)
220  *required_shift = box.right() - tab_x;
221  }
222  if (gap > 0 && gap < min_gap)
223  min_gap = gap;
224  }
225  // Result may be negative, in which case, this is a really bad tabstop.
226  return min_gap - abs(*required_shift);
227 }
228 
229 // Find the gutter width and distance to inner neighbour for the given blob.
230 void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
231  int max_gutter, bool left,
232  BLOBNBOX* bbox, int* gutter_width,
233  int* neighbour_gap ) {
234  const TBOX& box = bbox->bounding_box();
235  // The gutter and internal sides of the box.
236  int gutter_x = left ? box.left() : box.right();
237  int internal_x = left ? box.right() : box.left();
238  // On ragged edges, the gutter side of the box is away from the tabstop.
239  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
240  *gutter_width = max_gutter;
241  // If the box is away from the tabstop, we need to increase
242  // the allowed gutter width.
243  if (tab_gap > 0)
244  *gutter_width += tab_gap;
245  bool debug = WithinTestRegion(2, box.left(), box.bottom());
246  if (debug)
247  tprintf("Looking in gutter\n");
248  // Find the nearest blob on the outside of the column.
249  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
250  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
251  *gutter_width, box.top(), box.bottom());
252  if (gutter_bbox != NULL) {
253  TBOX gutter_box = gutter_bbox->bounding_box();
254  *gutter_width = left ? tab_x - gutter_box.right()
255  : gutter_box.left() - tab_x;
256  }
257  if (*gutter_width >= max_gutter) {
258  // If there is no box because a tab was in the way, get the tab coord.
259  TBOX gutter_box(box);
260  if (left) {
261  gutter_box.set_left(tab_x - max_gutter - 1);
262  gutter_box.set_right(tab_x - max_gutter);
263  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
264  if (tab_gutter < tab_x - 1)
265  *gutter_width = tab_x - tab_gutter;
266  } else {
267  gutter_box.set_left(tab_x + max_gutter);
268  gutter_box.set_right(tab_x + max_gutter + 1);
269  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
270  if (tab_gutter > tab_x + 1)
271  *gutter_width = tab_gutter - tab_x;
272  }
273  }
274  if (*gutter_width > max_gutter)
275  *gutter_width = max_gutter;
276  // Now look for a neighbour on the inside.
277  if (debug)
278  tprintf("Looking for neighbour\n");
279  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
280  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
281  *gutter_width, box.top(), box.bottom());
282  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
283  : LeftEdgeForBox(box, true, false);
284  if (neighbour != NULL) {
285  TBOX n_box = neighbour->bounding_box();
286  if (debug) {
287  tprintf("Found neighbour:");
288  n_box.print();
289  }
290  if (left && n_box.left() < neighbour_edge)
291  neighbour_edge = n_box.left();
292  else if (!left && n_box.right() > neighbour_edge)
293  neighbour_edge = n_box.right();
294  }
295  *neighbour_gap = left ? neighbour_edge - internal_x
296  : internal_x - neighbour_edge;
297 }
298 
299 // Return the x-coord that corresponds to the right edge for the given
300 // box. If there is a rule line to the right that vertically overlaps it,
301 // then return the x-coord of the rule line, otherwise return the right
302 // edge of the page. For details see RightTabForBox below.
303 int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) {
304  TabVector* v = RightTabForBox(box, crossing, extended);
305  return v == NULL ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
306 }
307 // As RightEdgeForBox, but finds the left Edge instead.
308 int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) {
309  TabVector* v = LeftTabForBox(box, crossing, extended);
310  return v == NULL ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
311 }
312 
313 // This comment documents how this function works.
314 // For its purpose and arguments, see the comment in tabfind.h.
315 // TabVectors are stored sorted by perpendicular distance of middle from
316 // the global mean vertical vector. Since the individual vectors can have
317 // differing directions, their XAtY for a given y is not necessarily in the
318 // right order. Therefore the search has to be run with a margin.
319 // The middle of a vector that passes through (x,y) cannot be higher than
320 // halfway from y to the top, or lower than halfway from y to the bottom
321 // of the coordinate range; therefore, the search margin is the range of
322 // sort keys between these halfway points. Any vector with a sort key greater
323 // than the upper margin must be to the right of x at y, and likewise any
324 // vector with a sort key less than the lower margin must pass to the left
325 // of x at y.
326 TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing,
327  bool extended) {
328  if (v_it_.empty())
329  return NULL;
330  int top_y = box.top();
331  int bottom_y = box.bottom();
332  int mid_y = (top_y + bottom_y) / 2;
333  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
334  int min_key, max_key;
335  SetupTabSearch(right, mid_y, &min_key, &max_key);
336  // Position the iterator at the first TabVector with sort_key >= min_key.
337  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
338  v_it_.backward();
339  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
340  v_it_.forward();
341  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
342  TabVector* best_v = NULL;
343  int best_x = -1;
344  int key_limit = -1;
345  do {
346  TabVector* v = v_it_.data();
347  int x = v->XAtY(mid_y);
348  if (x >= right &&
349  (v->VOverlap(top_y, bottom_y) > 0 ||
350  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
351  if (best_v == NULL || x < best_x) {
352  best_v = v;
353  best_x = x;
354  // We can guarantee that no better vector can be found if the
355  // sort key exceeds that of the best by max_key - min_key.
356  key_limit = v->sort_key() + max_key - min_key;
357  }
358  }
359  // Break when the search is done to avoid wrapping the iterator and
360  // thereby potentially slowing the next search.
361  if (v_it_.at_last() ||
362  (best_v != NULL && v->sort_key() > key_limit))
363  break; // Prevent restarting list for next call.
364  v_it_.forward();
365  } while (!v_it_.at_first());
366  return best_v;
367 }
368 
369 // As RightTabForBox, but finds the left TabVector instead.
370 TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing,
371  bool extended) {
372  if (v_it_.empty())
373  return NULL;
374  int top_y = box.top();
375  int bottom_y = box.bottom();
376  int mid_y = (top_y + bottom_y) / 2;
377  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
378  int min_key, max_key;
379  SetupTabSearch(left, mid_y, &min_key, &max_key);
380  // Position the iterator at the last TabVector with sort_key <= max_key.
381  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
382  v_it_.forward();
383  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
384  v_it_.backward();
385  }
386  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
387  TabVector* best_v = NULL;
388  int best_x = -1;
389  int key_limit = -1;
390  do {
391  TabVector* v = v_it_.data();
392  int x = v->XAtY(mid_y);
393  if (x <= left &&
394  (v->VOverlap(top_y, bottom_y) > 0 ||
395  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
396  if (best_v == NULL || x > best_x) {
397  best_v = v;
398  best_x = x;
399  // We can guarantee that no better vector can be found if the
400  // sort key is less than that of the best by max_key - min_key.
401  key_limit = v->sort_key() - (max_key - min_key);
402  }
403  }
404  // Break when the search is done to avoid wrapping the iterator and
405  // thereby potentially slowing the next search.
406  if (v_it_.at_first() ||
407  (best_v != NULL && v->sort_key() < key_limit))
408  break; // Prevent restarting list for next call.
409  v_it_.backward();
410  } while (!v_it_.at_last());
411  return best_v;
412 }
413 
414 // Return true if the given width is close to one of the common
415 // widths in column_widths_.
416 bool TabFind::CommonWidth(int width) {
417  width /= kColumnWidthFactor;
418  ICOORDELT_IT it(&column_widths_);
419  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
420  ICOORDELT* w = it.data();
421  if (w->x() - 1 <= width && width <= w->y() + 1)
422  return true;
423  }
424  return false;
425 }
426 
427 // Return true if the sizes are more than a
428 // factor of 2 different.
429 bool TabFind::DifferentSizes(int size1, int size2) {
430  return size1 > size2 * 2 || size2 > size1 * 2;
431 }
432 
433 // Return true if the sizes are more than a
434 // factor of 5 different.
435 bool TabFind::VeryDifferentSizes(int size1, int size2) {
436  return size1 > size2 * 5 || size2 > size1 * 5;
437 }
438 
440 
441 // Top-level function to find TabVectors in an input page block.
442 // Returns false if the detected skew angle is impossible.
443 // Applies the detected skew angle to deskew the tabs, blobs and part_grid.
444 bool TabFind::FindTabVectors(TabVector_LIST* hlines,
445  BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
446  int min_gutter_width,
447  double tabfind_aligned_gap_fraction,
448  ColPartitionGrid* part_grid,
449  FCOORD* deskew, FCOORD* reskew) {
450  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
451  tabfind_aligned_gap_fraction,
452  block);
453  ComputeColumnWidths(tab_win, part_grid);
455  SortVectors();
456  CleanupTabs();
457  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
458  return false; // Skew angle is too large.
459  part_grid->Deskew(*deskew);
460  ApplyTabConstraints();
461  #ifndef GRAPHICS_DISABLED
463  tab_win = MakeWindow(640, 50, "FinalTabs");
464  if (textord_debug_images) {
465  tab_win->Image(AlignedBlob::textord_debug_pix().string(),
466  image_origin_.x(), image_origin_.y());
467  } else {
468  DisplayBoxes(tab_win);
469  DisplayTabs("FinalTabs", tab_win);
470  }
471  tab_win = DisplayTabVectors(tab_win);
472  }
473  #endif // GRAPHICS_DISABLED
474  return true;
475 }
476 
477 // Top-level function to not find TabVectors in an input page block,
478 // but setup for single column mode.
479 void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
480  FCOORD* deskew, FCOORD* reskew) {
481  InsertBlobsToGrid(false, false, image_blobs, this);
482  InsertBlobsToGrid(true, false, &block->blobs, this);
483  deskew->set_x(1.0f);
484  deskew->set_y(0.0f);
485  reskew->set_x(1.0f);
486  reskew->set_y(0.0f);
487 }
488 
489 // Cleans up the lists of blobs in the block ready for use by TabFind.
490 // Large blobs that look like text are moved to the main blobs list.
491 // Main blobs that are superseded by the image blobs are deleted.
493  BLOBNBOX_IT large_it = &block->large_blobs;
494  BLOBNBOX_IT blob_it = &block->blobs;
495  int b_count = 0;
496  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
497  BLOBNBOX* large_blob = large_it.data();
498  if (large_blob->owner() != NULL) {
499  blob_it.add_to_end(large_it.extract());
500  ++b_count;
501  }
502  }
503  if (textord_debug_tabfind) {
504  tprintf("Moved %d large blobs to normal list\n",
505  b_count);
506  #ifndef GRAPHICS_DISABLED
507  ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
508  block->plot_graded_blobs(rej_win);
509  block->plot_noise_blobs(rej_win);
510  rej_win->Update();
511  #endif // GRAPHICS_DISABLED
512  }
513  block->DeleteUnownedNoise();
514 }
515 
516 // Helper function to setup search limits for *TabForBox.
517 void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) {
518  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
519  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
520  *min_key = MIN(key1, key2);
521  *max_key = MAX(key1, key2);
522 }
523 
525 #ifndef GRAPHICS_DISABLED
526  // For every vector, display it.
527  TabVector_IT it(&vectors_);
528  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
529  TabVector* vector = it.data();
530  vector->Display(tab_win);
531  }
532  tab_win->Update();
533 #endif
534  return tab_win;
535 }
536 
537 // PRIVATE CODE.
538 //
539 // First part of FindTabVectors, which may be used twice if the text
540 // is mostly of vertical alignment.
541 ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
542  int min_gutter_width,
543  double tabfind_aligned_gap_fraction,
544  TO_BLOCK* block) {
546  ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
547  line_win = DisplayTabVectors(line_win);
548  }
549  // Prepare the grid.
550  if (image_blobs != NULL)
551  InsertBlobsToGrid(true, false, image_blobs, this);
552  InsertBlobsToGrid(true, false, &block->blobs, this);
553  ScrollView* initial_win = FindTabBoxes(min_gutter_width,
554  tabfind_aligned_gap_fraction);
555  FindAllTabVectors(min_gutter_width);
556 
558  SortVectors();
559  EvaluateTabs();
560  if (textord_tabfind_show_initialtabs && initial_win != NULL)
561  initial_win = DisplayTabVectors(initial_win);
562  MarkVerticalText();
563  return initial_win;
564 }
565 
566 // Helper displays all the boxes in the given vector on the given window.
567 static void DisplayBoxVector(const GenericVector<BLOBNBOX*>& boxes,
568  ScrollView* win) {
569  #ifndef GRAPHICS_DISABLED
570  for (int i = 0; i < boxes.size(); ++i) {
571  TBOX box = boxes[i]->bounding_box();
572  int left_x = box.left();
573  int right_x = box.right();
574  int top_y = box.top();
575  int bottom_y = box.bottom();
576  ScrollView::Color box_color = boxes[i]->BoxColor();
577  win->Pen(box_color);
578  win->Rectangle(left_x, bottom_y, right_x, top_y);
579  }
580  win->Update();
581  #endif // GRAPHICS_DISABLED
582 }
583 
584 // For each box in the grid, decide whether it is a candidate tab-stop,
585 // and if so add it to the left/right tab boxes.
586 ScrollView* TabFind::FindTabBoxes(int min_gutter_width,
587  double tabfind_aligned_gap_fraction) {
588  left_tab_boxes_.clear();
589  right_tab_boxes_.clear();
590  // For every bbox in the grid, determine whether it uses a tab on an edge.
591  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
592  gsearch.StartFullSearch();
593  BLOBNBOX* bbox;
594  while ((bbox = gsearch.NextFullSearch()) != NULL) {
595  if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
596  // If it is any kind of tab, insert it into the vectors.
597  if (bbox->left_tab_type() != TT_NONE)
598  left_tab_boxes_.push_back(bbox);
599  if (bbox->right_tab_type() != TT_NONE)
600  right_tab_boxes_.push_back(bbox);
601  }
602  }
603  // Sort left tabs by left and right by right to see the outermost one first
604  // on a ragged tab.
605  left_tab_boxes_.sort(SortByBoxLeft<BLOBNBOX>);
606  right_tab_boxes_.sort(SortRightToLeft<BLOBNBOX>);
607  ScrollView* tab_win = NULL;
608  #ifndef GRAPHICS_DISABLED
610  tab_win = MakeWindow(0, 100, "InitialTabs");
611  tab_win->Pen(ScrollView::BLUE);
612  tab_win->Brush(ScrollView::NONE);
613  // Display the left and right tab boxes.
614  DisplayBoxVector(left_tab_boxes_, tab_win);
615  DisplayBoxVector(right_tab_boxes_, tab_win);
616  tab_win = DisplayTabs("Tabs", tab_win);
617  }
618  #endif // GRAPHICS_DISABLED
619  return tab_win;
620 }
621 
622 bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
623  double tabfind_aligned_gap_fraction) {
624  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this);
625  TBOX box = bbox->bounding_box();
626  // If there are separator lines, get the column edges.
627  int left_column_edge = bbox->left_rule();
628  int right_column_edge = bbox->right_rule();
629  // The edges of the bounding box of the blob being processed.
630  int left_x = box.left();
631  int right_x = box.right();
632  int top_y = box.top();
633  int bottom_y = box.bottom();
634  int height = box.height();
635  bool debug = WithinTestRegion(3, left_x, top_y);
636  if (debug) {
637  tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
638  left_x, top_y, right_x, bottom_y,
639  left_column_edge, right_column_edge);
640  }
641  // Compute a search radius based on a multiple of the height.
642  int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
643  radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
644  // In Vertical Page mode, once we have an estimate of the vertical line
645  // spacing, the minimum amount of gutter space before a possible tab is
646  // increased under the assumption that column partition is always larger
647  // than line spacing.
648  int min_spacing =
649  static_cast<int>(height * tabfind_aligned_gap_fraction);
650  if (min_gutter_width > min_spacing)
651  min_spacing = min_gutter_width;
652  int min_ragged_gutter = kRaggedGutterMultiple * gridsize();
653  if (min_gutter_width > min_ragged_gutter)
654  min_ragged_gutter = min_gutter_width;
655  int target_right = left_x - min_spacing;
656  int target_left = right_x + min_spacing;
657  // We will be evaluating whether the left edge could be a left tab, and
658  // whether the right edge could be a right tab.
659  // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
660  // that no blobs have been found in the gutter during the radial search.
661  // A box can also be a tab if there are objects in the gutter only above
662  // or only below, and there are aligned objects on the opposite side, but
663  // not too many unaligned objects. The maybe_(left/right)_tab_up counts
664  // aligned objects above and negatively counts unaligned objects above,
665  // and is set to -MAX_INT32 if a gutter object is found above.
666  // The other 3 maybe ints work similarly for the other sides.
667  // These conditions are very strict, to minimize false positives, and really
668  // only aligned tabs and outermost ragged tab blobs will qualify, so we
669  // also have maybe_ragged_left/right with less stringent rules.
670  // A blob that is maybe_ragged_left/right will be further qualified later,
671  // using the min_ragged_gutter.
672  bool is_left_tab = true;
673  bool is_right_tab = true;
674  bool maybe_ragged_left = true;
675  bool maybe_ragged_right = true;
676  int maybe_left_tab_up = 0;
677  int maybe_right_tab_up = 0;
678  int maybe_left_tab_down = 0;
679  int maybe_right_tab_down = 0;
680  if (bbox->leader_on_left()) {
681  is_left_tab = false;
682  maybe_ragged_left = false;
683  maybe_left_tab_up = -MAX_INT32;
684  maybe_left_tab_down = -MAX_INT32;
685  }
686  if (bbox->leader_on_right()) {
687  is_right_tab = false;
688  maybe_ragged_right = false;
689  maybe_right_tab_up = -MAX_INT32;
690  maybe_right_tab_down = -MAX_INT32;
691  }
692  int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
693  BLOBNBOX* neighbour = NULL;
694  while ((neighbour = radsearch.NextRadSearch()) != NULL) {
695  if (neighbour == bbox)
696  continue;
697  TBOX nbox = neighbour->bounding_box();
698  int n_left = nbox.left();
699  int n_right = nbox.right();
700  if (debug)
701  tprintf("Neighbour at (%d,%d)->(%d,%d)\n",
702  n_left, nbox.bottom(), n_right, nbox.top());
703  // If the neighbouring blob is the wrong side of a separator line, then it
704  // "doesn't exist" as far as we are concerned.
705  if (n_right > right_column_edge || n_left < left_column_edge ||
706  left_x < neighbour->left_rule() || right_x > neighbour->right_rule())
707  continue; // Separator line in the way.
708  int n_mid_x = (n_left + n_right) / 2;
709  int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
710  if (n_mid_x <= left_x && n_right >= target_right) {
711  if (debug)
712  tprintf("Not a left tab\n");
713  is_left_tab = false;
714  if (n_mid_y < top_y)
715  maybe_left_tab_down = -MAX_INT32;
716  if (n_mid_y > bottom_y)
717  maybe_left_tab_up = -MAX_INT32;
718  } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
719  if (debug)
720  tprintf("Maybe a left tab\n");
721  if (n_mid_y > top_y && maybe_left_tab_up > -MAX_INT32)
722  ++maybe_left_tab_up;
723  if (n_mid_y < bottom_y && maybe_left_tab_down > -MAX_INT32)
724  ++maybe_left_tab_down;
725  } else if (n_left < left_x && n_right >= left_x) {
726  // Overlaps but not aligned so negative points on a maybe.
727  if (debug)
728  tprintf("Maybe Not a left tab\n");
729  if (n_mid_y > top_y && maybe_left_tab_up > -MAX_INT32)
730  --maybe_left_tab_up;
731  if (n_mid_y < bottom_y && maybe_left_tab_down > -MAX_INT32)
732  --maybe_left_tab_down;
733  }
734  if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
735  maybe_ragged_left = false;
736  if (debug)
737  tprintf("Not a ragged left\n");
738  }
739  if (n_mid_x >= right_x && n_left <= target_left) {
740  if (debug)
741  tprintf("Not a right tab\n");
742  is_right_tab = false;
743  if (n_mid_y < top_y)
744  maybe_right_tab_down = -MAX_INT32;
745  if (n_mid_y > bottom_y)
746  maybe_right_tab_up = -MAX_INT32;
747  } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
748  if (debug)
749  tprintf("Maybe a right tab\n");
750  if (n_mid_y > top_y && maybe_right_tab_up > -MAX_INT32)
751  ++maybe_right_tab_up;
752  if (n_mid_y < bottom_y && maybe_right_tab_down > -MAX_INT32)
753  ++maybe_right_tab_down;
754  } else if (n_right > right_x && n_left <= right_x) {
755  // Overlaps but not aligned so negative points on a maybe.
756  if (debug)
757  tprintf("Maybe Not a right tab\n");
758  if (n_mid_y > top_y && maybe_right_tab_up > -MAX_INT32)
759  --maybe_right_tab_up;
760  if (n_mid_y < bottom_y && maybe_right_tab_down > -MAX_INT32)
761  --maybe_right_tab_down;
762  }
763  if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
764  maybe_ragged_right = false;
765  if (debug)
766  tprintf("Not a ragged right\n");
767  }
768  if (maybe_left_tab_down == -MAX_INT32 && maybe_left_tab_up == -MAX_INT32 &&
769  maybe_right_tab_down == -MAX_INT32 && maybe_right_tab_up == -MAX_INT32)
770  break;
771  }
772  if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
774  } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
776  } else {
777  bbox->set_left_tab_type(TT_NONE);
778  }
779  if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
781  } else if (maybe_ragged_right &&
782  ConfirmRaggedRight(bbox, min_ragged_gutter)) {
784  } else {
786  }
787  if (debug) {
788  tprintf("Left result = %s, Right result=%s\n",
789  bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
790  (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"),
791  bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
792  (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"));
793  }
794  return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
795 }
796 
797 // Returns true if there is nothing in the rectangle of width min_gutter to
798 // the left of bbox.
799 bool TabFind::ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter) {
800  TBOX search_box(bbox->bounding_box());
801  search_box.set_right(search_box.left());
802  search_box.set_left(search_box.left() - min_gutter);
803  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
804 }
805 
806 // Returns true if there is nothing in the rectangle of width min_gutter to
807 // the right of bbox.
808 bool TabFind::ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter) {
809  TBOX search_box(bbox->bounding_box());
810  search_box.set_left(search_box.right());
811  search_box.set_right(search_box.right() + min_gutter);
812  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
813 }
814 
815 // Returns true if there is nothing in the given search_box that vertically
816 // overlaps target_box other than target_box itself.
817 bool TabFind::NothingYOverlapsInBox(const TBOX& search_box,
818  const TBOX& target_box) {
819  BlobGridSearch rsearch(this);
820  rsearch.StartRectSearch(search_box);
821  BLOBNBOX* blob;
822  while ((blob = rsearch.NextRectSearch()) != NULL) {
823  const TBOX& box = blob->bounding_box();
824  if (box.y_overlap(target_box) && !(box == target_box))
825  return false;
826  }
827  return true;
828 }
829 
830 void TabFind::FindAllTabVectors(int min_gutter_width) {
831  // A list of vectors that will be created in estimating the skew.
832  TabVector_LIST dummy_vectors;
833  // An estimate of the vertical direction, revised as more lines are added.
834  int vertical_x = 0;
835  int vertical_y = 1;
836  // Find an estimate of the vertical direction by finding some tab vectors.
837  // Slowly up the search size until we get some vectors.
838  for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
839  search_size += kMinVerticalSearch) {
840  int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED,
841  min_gutter_width,
842  &dummy_vectors,
843  &vertical_x, &vertical_y);
844  vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED,
845  min_gutter_width,
846  &dummy_vectors,
847  &vertical_x, &vertical_y);
848  if (vector_count > 0)
849  break;
850  }
851  // Get rid of the test vectors and reset the types of the tabs.
852  dummy_vectors.clear();
853  for (int i = 0; i < left_tab_boxes_.size(); ++i) {
854  BLOBNBOX* bbox = left_tab_boxes_[i];
855  if (bbox->left_tab_type() == TT_CONFIRMED)
857  }
858  for (int i = 0; i < right_tab_boxes_.size(); ++i) {
859  BLOBNBOX* bbox = right_tab_boxes_[i];
860  if (bbox->right_tab_type() == TT_CONFIRMED)
862  }
863  if (textord_debug_tabfind) {
864  tprintf("Beginning real tab search with vertical = %d,%d...\n",
865  vertical_x, vertical_y);
866  }
867  // Now do the real thing ,but keep the vectors in the dummy_vectors list
868  // until they are all done, so we don't get the tab vectors confused with
869  // the rule line vectors.
870  FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED, min_gutter_width,
871  &dummy_vectors, &vertical_x, &vertical_y);
872  FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED, min_gutter_width,
873  &dummy_vectors, &vertical_x, &vertical_y);
874  FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED, min_gutter_width,
875  &dummy_vectors, &vertical_x, &vertical_y);
876  FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED, min_gutter_width,
877  &dummy_vectors, &vertical_x, &vertical_y);
878  // Now add the vectors to the vectors_ list.
879  TabVector_IT v_it(&vectors_);
880  v_it.add_list_after(&dummy_vectors);
881  // Now use the summed (mean) vertical vector as the direction for everything.
882  SetVerticalSkewAndParellelize(vertical_x, vertical_y);
883 }
884 
885 // Helper for FindAllTabVectors finds the vectors of a particular type.
886 int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment,
887  int min_gutter_width, TabVector_LIST* vectors,
888  int* vertical_x, int* vertical_y) {
889  TabVector_IT vector_it(vectors);
890  int vector_count = 0;
891  // Search the right or left tab boxes, looking for tab vectors.
892  bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
893  const GenericVector<BLOBNBOX*>& boxes = right ? right_tab_boxes_
894  : left_tab_boxes_;
895  for (int i = 0; i < boxes.size(); ++i) {
896  BLOBNBOX* bbox = boxes[i];
897  if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) ||
898  (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) {
899  TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width,
900  alignment,
901  bbox, vertical_x, vertical_y);
902  if (vector != NULL) {
903  ++vector_count;
904  vector_it.add_to_end(vector);
905  }
906  }
907  }
908  return vector_count;
909 }
910 
911 // Finds a vector corresponding to a tabstop running through the
912 // given box of the given alignment type.
913 // search_size_multiple is a multiple of height used to control
914 // the size of the search.
915 // vertical_x and y are updated with an estimate of the real
916 // vertical direction. (skew finding.)
917 // Returns NULL if no decent tabstop can be found.
918 TabVector* TabFind::FindTabVector(int search_size_multiple,
919  int min_gutter_width,
920  TabAlignment alignment,
921  BLOBNBOX* bbox,
922  int* vertical_x, int* vertical_y) {
923  int height = MAX(bbox->bounding_box().height(), gridsize());
924  AlignedBlobParams align_params(*vertical_x, *vertical_y,
925  height,
926  search_size_multiple, min_gutter_width,
927  resolution_, alignment);
928  // FindVerticalAlignment is in the parent (AlignedBlob) class.
929  return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
930 }
931 
932 // Set the vertical_skew_ member from the given vector and refit
933 // all vectors parallel to the skew vector.
934 void TabFind::SetVerticalSkewAndParellelize(int vertical_x, int vertical_y) {
935  // Fit the vertical vector into an ICOORD, which is 16 bit.
936  vertical_skew_.set_with_shrink(vertical_x, vertical_y);
938  tprintf("Vertical skew vector=(%d,%d)\n",
940  v_it_.set_to_list(&vectors_);
941  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
942  TabVector* v = v_it_.data();
943  v->Fit(vertical_skew_, true);
944  }
945  // Now sort the vectors as their direction has potentially changed.
946  SortVectors();
947 }
948 
949 // Sort all the current vectors using the given vertical direction vector.
950 void TabFind::SortVectors() {
951  vectors_.sort(TabVector::SortVectorsByKey);
952  v_it_.set_to_list(&vectors_);
953 }
954 
955 // Evaluate all the current tab vectors.
956 void TabFind::EvaluateTabs() {
957  TabVector_IT rule_it(&vectors_);
958  for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
959  TabVector* tab = rule_it.data();
960  if (!tab->IsSeparator()) {
961  tab->Evaluate(vertical_skew_, this);
962  if (tab->BoxCount() < kMinEvaluatedTabs) {
963  if (textord_debug_tabfind > 2)
964  tab->Print("Too few boxes");
965  delete rule_it.extract();
966  v_it_.set_to_list(&vectors_);
967  } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
968  tab->Print("Evaluated tab");
969  }
970  }
971  }
972 }
973 
974 // Trace textlines from one side to the other of each tab vector, saving
975 // the most frequent column widths found in a list so that a given width
976 // can be tested for being a common width with a simple callback function.
977 void TabFind::ComputeColumnWidths(ScrollView* tab_win,
978  ColPartitionGrid* part_grid) {
979  #ifndef GRAPHICS_DISABLED
980  if (tab_win != NULL)
981  tab_win->Pen(ScrollView::WHITE);
982  #endif // GRAPHICS_DISABLED
983  // Accumulate column sections into a STATS
984  int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
985  STATS col_widths(0, col_widths_size + 1);
986  ApplyPartitionsToColumnWidths(part_grid, &col_widths);
987  #ifndef GRAPHICS_DISABLED
988  if (tab_win != NULL) {
989  tab_win->Update();
990  }
991  #endif // GRAPHICS_DISABLED
992  if (textord_debug_tabfind > 1)
993  col_widths.print();
994  // Now make a list of column widths.
995  MakeColumnWidths(col_widths_size, &col_widths);
996  // Turn the column width into a range.
997  ApplyPartitionsToColumnWidths(part_grid, NULL);
998 }
999 
1000 // Finds column width and:
1001 // if col_widths is not null (pass1):
1002 // pair-up tab vectors with existing ColPartitions and accumulate widths.
1003 // else (pass2):
1004 // find the largest real partition width for each recorded column width,
1005 // to be used as the minimum acceptable width.
1006 void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
1007  STATS* col_widths) {
1008  // For every ColPartition in the part_grid, add partners to the tabvectors
1009  // and accumulate the column widths.
1010  ColPartitionGridSearch gsearch(part_grid);
1011  gsearch.StartFullSearch();
1012  ColPartition* part;
1013  while ((part = gsearch.NextFullSearch()) != NULL) {
1014  BLOBNBOX_C_IT blob_it(part->boxes());
1015  if (blob_it.empty())
1016  continue;
1017  BLOBNBOX* left_blob = blob_it.data();
1018  blob_it.move_to_last();
1019  BLOBNBOX* right_blob = blob_it.data();
1020  TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(),
1021  true, false);
1022  if (left_vector == NULL || left_vector->IsRightTab())
1023  continue;
1024  TabVector* right_vector = RightTabForBox(right_blob->bounding_box(),
1025  true, false);
1026  if (right_vector == NULL || right_vector->IsLeftTab())
1027  continue;
1028 
1029  int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
1030  int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
1031  // Add to STATS of measurements if the width is significant.
1032  int width = line_right - line_left;
1033  if (col_widths != NULL) {
1034  AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
1035  if (width >= kMinColumnWidth)
1036  col_widths->add(width / kColumnWidthFactor, 1);
1037  } else {
1038  width /= kColumnWidthFactor;
1039  ICOORDELT_IT it(&column_widths_);
1040  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1041  ICOORDELT* w = it.data();
1042  if (NearlyEqual<int>(width, w->y(), 1)) {
1043  int true_width = part->bounding_box().width() / kColumnWidthFactor;
1044  if (true_width <= w->y() && true_width > w->x())
1045  w->set_x(true_width);
1046  break;
1047  }
1048  }
1049  }
1050  }
1051 }
1052 
1053 // Helper makes the list of common column widths in column_widths_ from the
1054 // input col_widths. Destroys the content of col_widths by repeatedly
1055 // finding the mode and erasing the peak.
1056 void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) {
1057  ICOORDELT_IT w_it(&column_widths_);
1058  int total_col_count = col_widths->get_total();
1059  while (col_widths->get_total() > 0) {
1060  int width = col_widths->mode();
1061  int col_count = col_widths->pile_count(width);
1062  col_widths->add(width, -col_count);
1063  // Get the entire peak.
1064  for (int left = width - 1; left > 0 &&
1065  col_widths->pile_count(left) > 0;
1066  --left) {
1067  int new_count = col_widths->pile_count(left);
1068  col_count += new_count;
1069  col_widths->add(left, -new_count);
1070  }
1071  for (int right = width + 1; right < col_widths_size &&
1072  col_widths->pile_count(right) > 0;
1073  ++right) {
1074  int new_count = col_widths->pile_count(right);
1075  col_count += new_count;
1076  col_widths->add(right, -new_count);
1077  }
1078  if (col_count > kMinLinesInColumn &&
1079  col_count > kMinFractionalLinesInColumn * total_col_count) {
1080  ICOORDELT* w = new ICOORDELT(0, width);
1081  w_it.add_after_then_move(w);
1083  tprintf("Column of width %d has %d = %.2f%% lines\n",
1084  width * kColumnWidthFactor, col_count,
1085  100.0 * col_count / total_col_count);
1086  }
1087  }
1088 }
1089 
1090 // Mark blobs as being in a vertical text line where that is the case.
1091 // Returns true if the majority of the image is vertical text lines.
1092 void TabFind::MarkVerticalText() {
1094  tprintf("Checking for vertical lines\n");
1095  BlobGridSearch gsearch(this);
1096  gsearch.StartFullSearch();
1097  BLOBNBOX* blob = NULL;
1098  while ((blob = gsearch.NextFullSearch()) != NULL) {
1099  if (blob->region_type() < BRT_UNKNOWN)
1100  continue;
1101  if (blob->UniquelyVertical()) {
1103  }
1104  }
1105 }
1106 
1107 int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
1108  TabVector_IT it(lines);
1109  int prev_right = -1;
1110  int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
1111  STATS gaps(0, max_gap);
1112  STATS heights(0, max_gap);
1113  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1114  TabVector* v = it.data();
1115  TabVector* partner = v->GetSinglePartner();
1116  if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue;
1117  heights.add(partner->startpt().x() - v->startpt().x(), 1);
1118  if (prev_right > 0 && v->startpt().x() > prev_right) {
1119  gaps.add(v->startpt().x() - prev_right, 1);
1120  }
1121  prev_right = partner->startpt().x();
1122  }
1124  tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n",
1125  gaps.get_total(), gaps.median(), heights.median());
1126  if (gaps.get_total() < kMinLinesInColumn) return 0;
1127  return static_cast<int>(gaps.median());
1128 }
1129 
1130 // Find the next adjacent (looking to the left or right) blob on this text
1131 // line, with the constraint that it must vertically significantly overlap
1132 // the [top_y, bottom_y] range.
1133 // If ignore_images is true, then blobs with aligned_text() < 0 are treated
1134 // as if they do not exist.
1135 BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox,
1136  bool look_left, bool ignore_images,
1137  double min_overlap_fraction,
1138  int gap_limit, int top_y, int bottom_y) {
1139  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this);
1140  const TBOX& box = bbox->bounding_box();
1141  int left = box.left();
1142  int right = box.right();
1143  int mid_x = (left + right) / 2;
1144  sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1145  int best_gap = 0;
1146  bool debug = WithinTestRegion(3, left, bottom_y);
1147  BLOBNBOX* result = NULL;
1148  BLOBNBOX* neighbour = NULL;
1149  while ((neighbour = sidesearch.NextSideSearch(look_left)) != NULL) {
1150  if (debug) {
1151  tprintf("Adjacent blob: considering box:");
1152  neighbour->bounding_box().print();
1153  }
1154  if (neighbour == bbox ||
1155  (ignore_images && neighbour->region_type() < BRT_UNKNOWN))
1156  continue;
1157  const TBOX& nbox = neighbour->bounding_box();
1158  int n_top_y = nbox.top();
1159  int n_bottom_y = nbox.bottom();
1160  int v_overlap = MIN(n_top_y, top_y) - MAX(n_bottom_y, bottom_y);
1161  int height = top_y - bottom_y;
1162  int n_height = n_top_y - n_bottom_y;
1163  if (v_overlap > min_overlap_fraction * MIN(height, n_height) &&
1164  (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) {
1165  int n_left = nbox.left();
1166  int n_right = nbox.right();
1167  int h_gap = MAX(n_left, left) - MIN(n_right, right);
1168  int n_mid_x = (n_left + n_right) / 2;
1169  if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1170  if (h_gap > gap_limit) {
1171  // Hit a big gap before next tab so don't return anything.
1172  if (debug)
1173  tprintf("Giving up due to big gap = %d vs %d\n",
1174  h_gap, gap_limit);
1175  return result;
1176  }
1177  if (h_gap > 0 && (look_left ? neighbour->right_tab_type()
1178  : neighbour->left_tab_type()) >= TT_CONFIRMED) {
1179  // Hit a tab facing the wrong way. Stop in case we are crossing
1180  // the column boundary.
1181  if (debug)
1182  tprintf("Collision with like tab of type %d at %d,%d\n",
1183  look_left ? neighbour->right_tab_type()
1184  : neighbour->left_tab_type(),
1185  n_left, nbox.bottom());
1186  return result;
1187  }
1188  // This is a good fit to the line. Continue with this
1189  // neighbour as the bbox if the best gap.
1190  if (result == NULL || h_gap < best_gap) {
1191  if (debug)
1192  tprintf("Good result\n");
1193  result = neighbour;
1194  best_gap = h_gap;
1195  } else {
1196  // The new one is worse, so we probably already have the best result.
1197  return result;
1198  }
1199  } else if (debug) {
1200  tprintf("Wrong way\n");
1201  }
1202  } else if (debug) {
1203  tprintf("Insufficient overlap\n");
1204  }
1205  }
1206  if (WithinTestRegion(3, left, box.top()))
1207  tprintf("Giving up due to end of search\n");
1208  return result; // Hit the edge and found nothing.
1209 }
1210 
1211 // Add a bi-directional partner relationship between the left
1212 // and the right. If one (or both) of the vectors is a separator,
1213 // extend a nearby extendable vector or create a new one of the
1214 // correct type, using the given left or right blob as a guide.
1215 void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
1216  TabVector* left, TabVector* right) {
1217  const TBOX& left_box = left_blob->bounding_box();
1218  const TBOX& right_box = right_blob->bounding_box();
1219  if (left->IsSeparator()) {
1220  // Try to find a nearby left edge to extend.
1221  TabVector* v = LeftTabForBox(left_box, true, true);
1222  if (v != NULL && v != left && v->IsLeftTab() &&
1223  v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
1224  left = v; // Found a good replacement.
1225  left->ExtendToBox(left_blob);
1226  } else {
1227  // Fake a vector.
1228  left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
1229  vectors_.add_sorted(TabVector::SortVectorsByKey, left);
1230  v_it_.move_to_first();
1231  }
1232  }
1233  if (right->IsSeparator()) {
1234  // Try to find a nearby left edge to extend.
1235  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1236  tprintf("Box edge (%d,%d-%d)",
1237  right_box.right(), right_box.bottom(), right_box.top());
1238  right->Print(" looking for improvement for");
1239  }
1240  TabVector* v = RightTabForBox(right_box, true, true);
1241  if (v != NULL && v != right && v->IsRightTab() &&
1242  v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
1243  right = v; // Found a good replacement.
1244  right->ExtendToBox(right_blob);
1245  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1246  right->Print("Extended vector");
1247  }
1248  } else {
1249  // Fake a vector.
1250  right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_,
1251  right_blob);
1252  vectors_.add_sorted(TabVector::SortVectorsByKey, right);
1253  v_it_.move_to_first();
1254  if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1255  right->Print("Created new vector");
1256  }
1257  }
1258  }
1259  left->AddPartner(right);
1260  right->AddPartner(left);
1261 }
1262 
1263 // Remove separators and unused tabs from the main vectors_ list
1264 // to the dead_vectors_ list.
1265 void TabFind::CleanupTabs() {
1266  // TODO(rays) Before getting rid of separators and unused vectors, it
1267  // would be useful to try moving ragged vectors outwards to see if this
1268  // allows useful extension. Could be combined with checking ends of partners.
1269  TabVector_IT it(&vectors_);
1270  TabVector_IT dead_it(&dead_vectors_);
1271  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1272  TabVector* v = it.data();
1273  if (v->IsSeparator() || v->Partnerless()) {
1274  dead_it.add_after_then_move(it.extract());
1275  v_it_.set_to_list(&vectors_);
1276  } else {
1277  v->FitAndEvaluateIfNeeded(vertical_skew_, this);
1278  }
1279  }
1280 }
1281 
1282 // Apply the given rotation to the given list of blobs.
1283 void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) {
1284  BLOBNBOX_IT it(blobs);
1285  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1286  it.data()->rotate_box(rotation);
1287  }
1288 }
1289 
1290 // Recreate the grid with deskewed BLOBNBOXes.
1291 // Returns false if the detected skew angle is impossible.
1292 bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
1293  TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) {
1294  ComputeDeskewVectors(deskew, reskew);
1295  if (deskew->x() < kCosMaxSkewAngle)
1296  return false;
1297  RotateBlobList(*deskew, image_blobs);
1298  RotateBlobList(*deskew, &block->blobs);
1299  RotateBlobList(*deskew, &block->small_blobs);
1300  RotateBlobList(*deskew, &block->noise_blobs);
1301  if (textord_debug_images) {
1302  // Rotate the debug pix and arrange for it to be drawn at the correct
1303  // pixel offset.
1304  Pix* pix_grey = pixRead(AlignedBlob::textord_debug_pix().string());
1305  int width = pixGetWidth(pix_grey);
1306  int height = pixGetHeight(pix_grey);
1307  float angle = atan2(deskew->y(), deskew->x());
1308  // Positive angle is clockwise to pixRotate.
1309  Pix* pix_rot = pixRotate(pix_grey, -angle, L_ROTATE_AREA_MAP,
1310  L_BRING_IN_WHITE, width, height);
1311  // The image must be translated by the rotation of its center, since it
1312  // has just been rotated about its center.
1313  ICOORD center_offset(width / 2, height / 2);
1314  ICOORD new_center_offset(center_offset);
1315  new_center_offset.rotate(*deskew);
1316  image_origin_ += new_center_offset - center_offset;
1317  // The image grew as it was rotated, so offset the (top/left) origin
1318  // by half the change in size. y is opposite to x because it is drawn
1319  // at ist top/left, not bottom/left.
1320  ICOORD corner_offset((width - pixGetWidth(pix_rot)) / 2,
1321  (pixGetHeight(pix_rot) - height) / 2);
1322  image_origin_ += corner_offset;
1323  pixWrite(AlignedBlob::textord_debug_pix().string(), pix_rot, IFF_PNG);
1324  pixDestroy(&pix_grey);
1325  pixDestroy(&pix_rot);
1326  }
1327 
1328  // Rotate the horizontal vectors. The vertical vectors don't need
1329  // rotating as they can just be refitted.
1330  TabVector_IT h_it(hlines);
1331  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1332  TabVector* h = h_it.data();
1333  h->Rotate(*deskew);
1334  }
1335  TabVector_IT d_it(&dead_vectors_);
1336  for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
1337  TabVector* d = d_it.data();
1338  d->Rotate(*deskew);
1339  }
1340  SetVerticalSkewAndParellelize(0, 1);
1341  // Rebuild the grid to the new size.
1342  TBOX grid_box(bleft_, tright_);
1343  grid_box.rotate_large(*deskew);
1344  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1345  InsertBlobsToGrid(false, false, image_blobs, this);
1346  InsertBlobsToGrid(true, false, &block->blobs, this);
1347  return true;
1348 }
1349 
1350 // Flip the vertical and horizontal lines and rotate the grid ready
1351 // for working on the rotated image.
1352 // This also makes parameter adjustments for FindInitialTabVectors().
1353 void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
1354  TabVector_LIST* horizontal_lines,
1355  int* min_gutter_width) {
1356  // Rotate the horizontal and vertical vectors and swap them over.
1357  // Only the separators are kept and rotated; other tabs are used
1358  // to estimate the gutter width then thrown away.
1359  TabVector_LIST ex_verticals;
1360  TabVector_IT ex_v_it(&ex_verticals);
1361  TabVector_LIST vlines;
1362  TabVector_IT v_it(&vlines);
1363  while (!v_it_.empty()) {
1364  TabVector* v = v_it_.extract();
1365  if (v->IsSeparator()) {
1366  v->Rotate(rotate);
1367  ex_v_it.add_after_then_move(v);
1368  } else {
1369  v_it.add_after_then_move(v);
1370  }
1371  v_it_.forward();
1372  }
1373 
1374  // Adjust the min gutter width for better tabbox selection
1375  // in 2nd call to FindInitialTabVectors().
1376  int median_gutter = FindMedianGutterWidth(&vlines);
1377  if (median_gutter > *min_gutter_width)
1378  *min_gutter_width = median_gutter;
1379 
1380  TabVector_IT h_it(horizontal_lines);
1381  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1382  TabVector* h = h_it.data();
1383  h->Rotate(rotate);
1384  }
1385  v_it_.add_list_after(horizontal_lines);
1386  v_it_.move_to_first();
1387  h_it.set_to_list(horizontal_lines);
1388  h_it.add_list_after(&ex_verticals);
1389 
1390  // Rebuild the grid to the new size.
1391  TBOX grid_box(bleft(), tright());
1392  grid_box.rotate_large(rotate);
1393  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1394 }
1395 
1396 // Clear the grid and get rid of the tab vectors, but not separators,
1397 // ready to start again.
1399  v_it_.move_to_first();
1400  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1401  if (!v_it_.data()->IsSeparator())
1402  delete v_it_.extract();
1403  }
1404  Clear();
1405 }
1406 
1407 // Reflect the separator tab vectors and the grids in the y-axis.
1408 // Can only be called after Reset!
1410  TabVector_LIST temp_list;
1411  TabVector_IT temp_it(&temp_list);
1412  v_it_.move_to_first();
1413  // The TabVector list only contains vertical lines, but they need to be
1414  // reflected and the list needs to be reversed, so they are still in
1415  // sort_key order.
1416  while (!v_it_.empty()) {
1417  TabVector* v = v_it_.extract();
1418  v_it_.forward();
1419  v->ReflectInYAxis();
1420  temp_it.add_before_then_move(v);
1421  }
1422  v_it_.add_list_after(&temp_list);
1423  v_it_.move_to_first();
1424  // Reset this grid with reflected bounding boxes.
1425  TBOX grid_box(bleft(), tright());
1426  int tmp = grid_box.left();
1427  grid_box.set_left(-grid_box.right());
1428  grid_box.set_right(-tmp);
1429  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1430 }
1431 
1432 // Compute the rotation required to deskew, and its inverse rotation.
1433 void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
1434  double length = vertical_skew_ % vertical_skew_;
1435  length = sqrt(length);
1436  deskew->set_x(static_cast<float>(vertical_skew_.y() / length));
1437  deskew->set_y(static_cast<float>(vertical_skew_.x() / length));
1438  reskew->set_x(deskew->x());
1439  reskew->set_y(-deskew->y());
1440 }
1441 
1442 // Compute and apply constraints to the end positions of TabVectors so
1443 // that where possible partners end at the same y coordinate.
1444 void TabFind::ApplyTabConstraints() {
1445  TabVector_IT it(&vectors_);
1446  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1447  TabVector* v = it.data();
1448  v->SetupConstraints();
1449  }
1450  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1451  TabVector* v = it.data();
1452  // With the first and last partner, we want a common bottom and top,
1453  // respectively, and for each change of partner, we want a common
1454  // top of first with bottom of next.
1455  v->SetupPartnerConstraints();
1456  }
1457  // TODO(rays) The back-to-back pairs should really be done like the
1458  // front-to-front pairs, but there is no convenient way of producing the
1459  // list of partners like there is with the front-to-front.
1460  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1461  TabVector* v = it.data();
1462  if (!v->IsRightTab())
1463  continue;
1464  // For each back-to-back pair of vectors, try for common top and bottom.
1465  TabVector_IT partner_it(it);
1466  for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1467  TabVector* partner = partner_it.data();
1468  if (!partner->IsLeftTab() || !v->VOverlap(*partner))
1469  continue;
1470  v->SetupPartnerConstraints(partner);
1471  }
1472  }
1473  // Now actually apply the constraints to get common start/end points.
1474  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1475  TabVector* v = it.data();
1476  if (!v->IsSeparator())
1477  v->ApplyConstraints();
1478  }
1479  // TODO(rays) Where constraint application fails, it would be good to try
1480  // checking the ends to see if they really should be moved.
1481 }
1482 
1483 } // namespace tesseract.
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:113
inT32 get_total() const
Definition: statistc.h:86
const double kMinBaselineCoverage
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:303
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:319
void Pen(Color color)
Definition: scrollview.cpp:726
int size() const
Definition: genericvector.h:72
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
static bool WithinTestRegion(int detail_level, int x, int y)
const ICOORD & botleft() const
Definition: rect.h:88
#define MAX(x, y)
Definition: ndminx.h:24
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:1283
int XAtY(int y) const
Definition: tabvector.h:189
const int kTabRadiusFactor
Definition: tabfind.cpp:35
float x() const
Definition: points.h:209
bool textord_debug_images
Definition: alignedblob.cpp:33
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:415
int push_back(T object)
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:429
bool leader_on_left() const
Definition: blobbox.h:343
bool joined_to_prev() const
Definition: blobbox.h:241
static void Update()
Definition: scrollview.cpp:715
const int kMinVerticalSearch
Definition: tabfind.cpp:37
static bool VeryDifferentSizes(int size1, int size2)
Definition: tabfind.cpp:435
#define tprintf(...)
Definition: tprintf.h:31
#define MIN(x, y)
Definition: ndminx.h:28
const double kAlignedFraction
Definition: alignedblob.cpp:39
Definition: statistc.h:33
const int kMaxTextLineBlobRatio
Definition: tabfind.cpp:72
void set_right(int x)
Definition: rect.h:78
void print() const
Definition: rect.h:270
const double kMinGutterWidthAbsolute
Definition: tabfind.cpp:49
const int kMaxVerticalSearch
Definition: tabfind.cpp:38
inT32 mode() const
Definition: statistc.cpp:118
int VOverlap(const TabVector &other) const
Definition: tabvector.h:199
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:308
#define BOOL_VAR(name, val, comment)
Definition: params.h:280
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:281
const double kMinImageArea
Definition: tabfind.cpp:77
static const STRING & textord_debug_pix()
Definition: alignedblob.h:112
bool CommonWidth(int width)
Definition: tabfind.cpp:416
void Image(struct Pix *image, int x_pos, int y_pos)
Definition: scrollview.cpp:773
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:913
const double kCosMaxSkewAngle
Definition: tabfind.cpp:81
inT16 right() const
Definition: rect.h:75
BBC * NextSideSearch(bool right_to_left)
Definition: bbgrid.h:764
void set_left(int x)
Definition: rect.h:71
TabType left_tab_type() const
Definition: blobbox.h:256
void rotate_large(const FCOORD &vec)
Definition: rect.cpp:72
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:326
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:140
const int kRaggedGutterMultiple
Definition: tabfind.cpp:53
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:361
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
Definition: tabfind.cpp:1353
void DeleteUnownedNoise()
Definition: blobbox.cpp:1031
int textord_debug_tabfind
Definition: alignedblob.cpp:27
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:208
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
const double kMaxHorizontalGap
Definition: tabfind.cpp:64
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:479
inT16 y() const
access_function
Definition: points.h:56
const double kMinFractionalLinesInColumn
Definition: tabfind.cpp:45
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:148
inT16 left() const
Definition: rect.h:68
virtual ~TabFind()
Definition: tabfind.cpp:99
void set_x(float xin)
rewrite function
Definition: points.h:216
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
Definition: tabfind.cpp:230
int gridsize() const
Definition: bbgrid.h:63
BlobRegionType region_type() const
Definition: blobbox.h:268
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:524
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:517
void Brush(Color color)
Definition: scrollview.cpp:732
void Deskew(const FCOORD &deskew)
void set_y(float yin)
rewrite function
Definition: points.h:220
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
Definition: tabfind.cpp:86
const double kSmoothFactor
Definition: tabfind.cpp:58
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:313
bool y_overlap(const TBOX &box) const
Definition: rect.h:418
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
const int kMinEvaluatedTabs
Definition: tabfind.cpp:69
const ICOORD & bleft() const
Definition: bbgrid.h:72
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:444
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:164
#define MAX_INT32
Definition: host.h:120
integer coordinate
Definition: points.h:30
inT16 bottom() const
Definition: rect.h:61
const double kMaxGutterWidthAbsolute
Definition: tabfind.cpp:51
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:294
inT16 height() const
Definition: rect.h:104
bool IsLeftTab() const
Definition: tabvector.h:213
void StartSideSearch(int x, int ymin, int ymax)
Definition: bbgrid.h:749
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1065
void set_right_rule(int new_right)
Definition: blobbox.h:307
inT16 width() const
Definition: rect.h:111
const double kMaxBaselineError
ScrollView * MakeWindow(int x, int y, const char *window_name)
void TidyBlobs(TO_BLOCK *block)
Definition: tabfind.cpp:492
inT16 x() const
access function
Definition: points.h:52
void SetBlockRuleEdges(TO_BLOCK *block)
Definition: tabfind.cpp:155
Definition: rect.h:30
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:271
bool IsSeparator() const
Definition: tabvector.h:221
float y() const
Definition: points.h:212
void set_left_rule(int new_left)
Definition: blobbox.h:301
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
int left_rule() const
Definition: blobbox.h:298
bool UniquelyVertical() const
Definition: blobbox.h:395
bool textord_tabfind_show_initialtabs
Definition: tabfind.cpp:83
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
const ICOORD & tright() const
Definition: bbgrid.h:75
int sort_key() const
Definition: tabvector.h:158
#define NULL
Definition: host.h:144
const ICOORD & topright() const
Definition: rect.h:100
const TBOX & bounding_box() const
Definition: blobbox.h:215
void set_right_tab_type(TabType new_type)
Definition: blobbox.h:265
bool leader_on_right() const
Definition: blobbox.h:349
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:370
ICOORD tright_
Definition: bbgrid.h:91
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:56
const int kMinLinesInColumn
Definition: tabfind.cpp:41
ICOORD vertical_skew_
Definition: tabfind.h:367
const int kColumnWidthFactor
Definition: tabfind.h:42
void set_with_shrink(int x, int y)
Set from the given x,y, shrinking the vector to fit if needed.
Definition: points.cpp:43
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1057
const int kMinColumnWidth
Definition: colfind.cpp:49
inT16 top() const
Definition: rect.h:54
bool textord_tabfind_show_finaltabs
Definition: tabfind.cpp:84
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:541
BlobTextFlowType flow() const
Definition: blobbox.h:280
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
Definition: points.h:189
TabType right_tab_type() const
Definition: blobbox.h:262
const int kMinTextLineBlobRatio
Definition: tabfind.cpp:75
const double kCharVerticalOverlapFraction
Definition: tabfind.cpp:62
const int kMaxRaggedSearch
Definition: tabfind.cpp:39
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:551
TabVector * FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
int right_rule() const
Definition: blobbox.h:304
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
Definition: tabfind.cpp:183
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void set_left_tab_type(TabType new_type)
Definition: blobbox.h:259