tesseract  5.0.0-alpha-619-ge9db
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase tesseract::ColumnFinder

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
 ~TabFind () override
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallback WidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~AlignedBlob () override
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BlobGrid () override
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 Estimate of true vertical in this image. More...
 
int resolution_
 Of source image in pixels per inch. More...
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Definition at line 52 of file tabfind.h.

Constructor & Destructor Documentation

◆ TabFind()

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

Definition at line 65 of file tabfind.cpp.

69  resolution_(resolution),
70  image_origin_(0, tright.y() - 1),
71  v_it_(&vectors_) {
72  width_cb_ = nullptr;
73  v_it_.add_list_after(vlines);
74  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
75  using namespace std::placeholders; // for _1
76  width_cb_ = std::bind(&TabFind::CommonWidth, this, _1);
77 }

◆ ~TabFind()

tesseract::TabFind::~TabFind ( )
override

Definition at line 79 of file tabfind.cpp.

79  {
80 }

Member Function Documentation

◆ CommonWidth()

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

Definition at line 394 of file tabfind.cpp.

394  {
395  width /= kColumnWidthFactor;
396  ICOORDELT_IT it(&column_widths_);
397  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
398  ICOORDELT* w = it.data();
399  if (w->x() - 1 <= width && width <= w->y() + 1)
400  return true;
401  }
402  return false;
403 }

◆ dead_vectors()

TabVector_LIST* tesseract::TabFind::dead_vectors ( )
inlineprotected

Definition at line 175 of file tabfind.h.

175  {
176  return &dead_vectors_;
177  }

◆ DifferentSizes()

bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

Definition at line 407 of file tabfind.cpp.

407  {
408  return size1 > size2 * 2 || size2 > size1 * 2;
409 }

◆ DisplayTabVectors()

ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

Definition at line 497 of file tabfind.cpp.

497  {
498 #ifndef GRAPHICS_DISABLED
499  // For every vector, display it.
500  TabVector_IT it(&vectors_);
501  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
502  TabVector* vector = it.data();
503  vector->Display(tab_win);
504  }
505  tab_win->Update();
506 #endif
507  return tab_win;
508 }

◆ DontFindTabVectors()

void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

Definition at line 452 of file tabfind.cpp.

453  {
454  InsertBlobsToGrid(false, false, image_blobs, this);
455  InsertBlobsToGrid(true, false, &block->blobs, this);
456  deskew->set_x(1.0f);
457  deskew->set_y(0.0f);
458  reskew->set_x(1.0f);
459  reskew->set_y(0.0f);
460 }

◆ FindInitialTabVectors()

ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
TO_BLOCK block 
)
protected

Definition at line 514 of file tabfind.cpp.

517  {
518  if (textord_tabfind_show_initialtabs) {
519  ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
520  line_win = DisplayTabVectors(line_win);
521  }
522  // Prepare the grid.
523  if (image_blobs != nullptr)
524  InsertBlobsToGrid(true, false, image_blobs, this);
525  InsertBlobsToGrid(true, false, &block->blobs, this);
526  ScrollView* initial_win = FindTabBoxes(min_gutter_width,
527  tabfind_aligned_gap_fraction);
528  FindAllTabVectors(min_gutter_width);
529 
531  SortVectors();
532  EvaluateTabs();
533  if (textord_tabfind_show_initialtabs && initial_win != nullptr)
534  initial_win = DisplayTabVectors(initial_win);
535  MarkVerticalText();
536  return initial_win;
537 }

◆ FindTabVectors()

bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid. tabfind_aligned_gap_fraction should be the value of parameter textord_tabfind_aligned_gap_fraction

Definition at line 422 of file tabfind.cpp.

427  {
428  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
429  tabfind_aligned_gap_fraction,
430  block);
431  ComputeColumnWidths(tab_win, part_grid);
433  SortVectors();
434  CleanupTabs();
435  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
436  return false; // Skew angle is too large.
437  part_grid->Deskew(*deskew);
438  ApplyTabConstraints();
439  #ifndef GRAPHICS_DISABLED
440  if (textord_tabfind_show_finaltabs) {
441  tab_win = MakeWindow(640, 50, "FinalTabs");
442  DisplayBoxes(tab_win);
443  DisplayTabs("FinalTabs", tab_win);
444  tab_win = DisplayTabVectors(tab_win);
445  }
446  #endif // GRAPHICS_DISABLED
447  return true;
448 }

◆ GutterWidth()

int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

Definition at line 161 of file tabfind.cpp.

163  {
164  bool right_to_left = v.IsLeftTab();
165  int bottom_x = v.XAtY(bottom_y);
166  int top_x = v.XAtY(top_y);
167  int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
168  BlobGridSearch sidesearch(this);
169  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
170  int min_gap = max_gutter_width;
171  *required_shift = 0;
172  BLOBNBOX* blob = nullptr;
173  while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
174  const TBOX& box = blob->bounding_box();
175  if (box.bottom() >= top_y || box.top() <= bottom_y)
176  continue; // Doesn't overlap enough.
177  if (box.height() >= gridsize() * 2 &&
178  box.height() > box.width() * kLineFragmentAspectRatio) {
179  // Skip likely separator line residue.
180  continue;
181  }
182  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
183  continue; // Skip non-text if required.
184  int mid_y = (box.bottom() + box.top()) / 2;
185  // We use the x at the mid-y so that the required_shift guarantees
186  // to clear all the blobs on the tab-stop. If we use the min/max
187  // of x at top/bottom of the blob, then exactness would be required,
188  // which is not a good thing.
189  int tab_x = v.XAtY(mid_y);
190  int gap;
191  if (right_to_left) {
192  gap = tab_x - box.right();
193  if (gap < 0 && box.left() - tab_x < *required_shift)
194  *required_shift = box.left() - tab_x;
195  } else {
196  gap = box.left() - tab_x;
197  if (gap < 0 && box.right() - tab_x > *required_shift)
198  *required_shift = box.right() - tab_x;
199  }
200  if (gap > 0 && gap < min_gap)
201  min_gap = gap;
202  }
203  // Result may be negative, in which case, this is a really bad tabstop.
204  return min_gap - abs(*required_shift);
205 }

◆ GutterWidthAndNeighbourGap()

void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

Definition at line 208 of file tabfind.cpp.

211  {
212  const TBOX& box = bbox->bounding_box();
213  // The gutter and internal sides of the box.
214  int gutter_x = left ? box.left() : box.right();
215  int internal_x = left ? box.right() : box.left();
216  // On ragged edges, the gutter side of the box is away from the tabstop.
217  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
218  *gutter_width = max_gutter;
219  // If the box is away from the tabstop, we need to increase
220  // the allowed gutter width.
221  if (tab_gap > 0)
222  *gutter_width += tab_gap;
223  bool debug = WithinTestRegion(2, box.left(), box.bottom());
224  if (debug)
225  tprintf("Looking in gutter\n");
226  // Find the nearest blob on the outside of the column.
227  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
228  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
229  *gutter_width, box.top(), box.bottom());
230  if (gutter_bbox != nullptr) {
231  const TBOX& gutter_box = gutter_bbox->bounding_box();
232  *gutter_width = left ? tab_x - gutter_box.right()
233  : gutter_box.left() - tab_x;
234  }
235  if (*gutter_width >= max_gutter) {
236  // If there is no box because a tab was in the way, get the tab coord.
237  TBOX gutter_box(box);
238  if (left) {
239  gutter_box.set_left(tab_x - max_gutter - 1);
240  gutter_box.set_right(tab_x - max_gutter);
241  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
242  if (tab_gutter < tab_x - 1)
243  *gutter_width = tab_x - tab_gutter;
244  } else {
245  gutter_box.set_left(tab_x + max_gutter);
246  gutter_box.set_right(tab_x + max_gutter + 1);
247  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
248  if (tab_gutter > tab_x + 1)
249  *gutter_width = tab_gutter - tab_x;
250  }
251  }
252  if (*gutter_width > max_gutter)
253  *gutter_width = max_gutter;
254  // Now look for a neighbour on the inside.
255  if (debug)
256  tprintf("Looking for neighbour\n");
257  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
258  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
259  *gutter_width, box.top(), box.bottom());
260  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
261  : LeftEdgeForBox(box, true, false);
262  if (neighbour != nullptr) {
263  const TBOX& n_box = neighbour->bounding_box();
264  if (debug) {
265  tprintf("Found neighbour:");
266  n_box.print();
267  }
268  if (left && n_box.left() < neighbour_edge)
269  neighbour_edge = n_box.left();
270  else if (!left && n_box.right() > neighbour_edge)
271  neighbour_edge = n_box.right();
272  }
273  *neighbour_gap = left ? neighbour_edge - internal_x
274  : internal_x - neighbour_edge;
275 }

◆ image_origin()

const ICOORD& tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

Definition at line 164 of file tabfind.h.

164  {
165  return image_origin_;
166  }

◆ InsertBlob()

bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

Definition at line 118 of file tabfind.cpp.

120  {
121  TBOX box = blob->bounding_box();
122  blob->set_left_rule(LeftEdgeForBox(box, false, false));
123  blob->set_right_rule(RightEdgeForBox(box, false, false));
124  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
125  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
126  if (blob->joined_to_prev())
127  return false;
128  grid->InsertBBox(h_spread, v_spread, blob);
129  return true;
130 }

◆ InsertBlobsToGrid()

void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

Definition at line 91 of file tabfind.cpp.

94  {
95  BLOBNBOX_IT blob_it(blobs);
96  int b_count = 0;
97  int reject_count = 0;
98  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
99  BLOBNBOX* blob = blob_it.data();
100 // if (InsertBlob(true, true, blob, grid)) {
101  if (InsertBlob(h_spread, v_spread, blob, grid)) {
102  ++b_count;
103  } else {
104  ++reject_count;
105  }
106  }
107  if (textord_debug_tabfind) {
108  tprintf("Inserted %d blobs into grid, %d rejected.\n",
109  b_count, reject_count);
110  }
111 }

◆ LeftEdgeForBox()

int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

Definition at line 286 of file tabfind.cpp.

286  {
287  TabVector* v = LeftTabForBox(box, crossing, extended);
288  return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
289 }

◆ LeftTabForBox()

TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

Definition at line 348 of file tabfind.cpp.

349  {
350  if (v_it_.empty())
351  return nullptr;
352  int top_y = box.top();
353  int bottom_y = box.bottom();
354  int mid_y = (top_y + bottom_y) / 2;
355  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
356  int min_key, max_key;
357  SetupTabSearch(left, mid_y, &min_key, &max_key);
358  // Position the iterator at the last TabVector with sort_key <= max_key.
359  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
360  v_it_.forward();
361  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
362  v_it_.backward();
363  }
364  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
365  TabVector* best_v = nullptr;
366  int best_x = -1;
367  int key_limit = -1;
368  do {
369  TabVector* v = v_it_.data();
370  int x = v->XAtY(mid_y);
371  if (x <= left &&
372  (v->VOverlap(top_y, bottom_y) > 0 ||
373  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
374  if (best_v == nullptr || x > best_x) {
375  best_v = v;
376  best_x = x;
377  // We can guarantee that no better vector can be found if the
378  // sort key is less than that of the best by max_key - min_key.
379  key_limit = v->sort_key() - (max_key - min_key);
380  }
381  }
382  // Break when the search is done to avoid wrapping the iterator and
383  // thereby potentially slowing the next search.
384  if (v_it_.at_first() ||
385  (best_v != nullptr && v->sort_key() < key_limit))
386  break; // Prevent restarting list for next call.
387  v_it_.backward();
388  } while (!v_it_.at_last());
389  return best_v;
390 }

◆ ReflectInYAxis()

void tesseract::TabFind::ReflectInYAxis ( )
protected

Definition at line 1356 of file tabfind.cpp.

1356  {
1357  TabVector_LIST temp_list;
1358  TabVector_IT temp_it(&temp_list);
1359  v_it_.move_to_first();
1360  // The TabVector list only contains vertical lines, but they need to be
1361  // reflected and the list needs to be reversed, so they are still in
1362  // sort_key order.
1363  while (!v_it_.empty()) {
1364  TabVector* v = v_it_.extract();
1365  v_it_.forward();
1366  v->ReflectInYAxis();
1367  temp_it.add_before_then_move(v);
1368  }
1369  v_it_.add_list_after(&temp_list);
1370  v_it_.move_to_first();
1371  // Reset this grid with reflected bounding boxes.
1372  TBOX grid_box(bleft(), tright());
1373  int tmp = grid_box.left();
1374  grid_box.set_left(-grid_box.right());
1375  grid_box.set_right(-tmp);
1376  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1377 }

◆ Reset()

void tesseract::TabFind::Reset ( )
protected

Definition at line 1345 of file tabfind.cpp.

1345  {
1346  v_it_.move_to_first();
1347  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1348  if (!v_it_.data()->IsSeparator())
1349  delete v_it_.extract();
1350  }
1351  Clear();
1352 }

◆ ResetForVerticalText()

void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

Definition at line 1300 of file tabfind.cpp.

1302  {
1303  // Rotate the horizontal and vertical vectors and swap them over.
1304  // Only the separators are kept and rotated; other tabs are used
1305  // to estimate the gutter width then thrown away.
1306  TabVector_LIST ex_verticals;
1307  TabVector_IT ex_v_it(&ex_verticals);
1308  TabVector_LIST vlines;
1309  TabVector_IT v_it(&vlines);
1310  while (!v_it_.empty()) {
1311  TabVector* v = v_it_.extract();
1312  if (v->IsSeparator()) {
1313  v->Rotate(rotate);
1314  ex_v_it.add_after_then_move(v);
1315  } else {
1316  v_it.add_after_then_move(v);
1317  }
1318  v_it_.forward();
1319  }
1320 
1321  // Adjust the min gutter width for better tabbox selection
1322  // in 2nd call to FindInitialTabVectors().
1323  int median_gutter = FindMedianGutterWidth(&vlines);
1324  if (median_gutter > *min_gutter_width)
1325  *min_gutter_width = median_gutter;
1326 
1327  TabVector_IT h_it(horizontal_lines);
1328  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1329  TabVector* h = h_it.data();
1330  h->Rotate(rotate);
1331  }
1332  v_it_.add_list_after(horizontal_lines);
1333  v_it_.move_to_first();
1334  h_it.set_to_list(horizontal_lines);
1335  h_it.add_list_after(&ex_verticals);
1336 
1337  // Rebuild the grid to the new size.
1338  TBOX grid_box(bleft(), tright());
1339  grid_box.rotate_large(rotate);
1340  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1341 }

◆ RightEdgeForBox()

int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

Definition at line 281 of file tabfind.cpp.

281  {
282  TabVector* v = RightTabForBox(box, crossing, extended);
283  return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
284 }

◆ RightTabForBox()

TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return nullptr. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

Definition at line 304 of file tabfind.cpp.

305  {
306  if (v_it_.empty())
307  return nullptr;
308  int top_y = box.top();
309  int bottom_y = box.bottom();
310  int mid_y = (top_y + bottom_y) / 2;
311  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
312  int min_key, max_key;
313  SetupTabSearch(right, mid_y, &min_key, &max_key);
314  // Position the iterator at the first TabVector with sort_key >= min_key.
315  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
316  v_it_.backward();
317  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
318  v_it_.forward();
319  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
320  TabVector* best_v = nullptr;
321  int best_x = -1;
322  int key_limit = -1;
323  do {
324  TabVector* v = v_it_.data();
325  int x = v->XAtY(mid_y);
326  if (x >= right &&
327  (v->VOverlap(top_y, bottom_y) > 0 ||
328  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
329  if (best_v == nullptr || x < best_x) {
330  best_v = v;
331  best_x = x;
332  // We can guarantee that no better vector can be found if the
333  // sort key exceeds that of the best by max_key - min_key.
334  key_limit = v->sort_key() + max_key - min_key;
335  }
336  }
337  // Break when the search is done to avoid wrapping the iterator and
338  // thereby potentially slowing the next search.
339  if (v_it_.at_last() ||
340  (best_v != nullptr && v->sort_key() > key_limit))
341  break; // Prevent restarting list for next call.
342  v_it_.forward();
343  } while (!v_it_.at_first());
344  return best_v;
345 }

◆ RotateBlobList()

void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

Definition at line 1256 of file tabfind.cpp.

1256  {
1257  BLOBNBOX_IT it(blobs);
1258  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1259  it.data()->rotate_box(rotation);
1260  }
1261 }

◆ SetBlobRuleEdges()

void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

Definition at line 142 of file tabfind.cpp.

142  {
143  BLOBNBOX_IT blob_it(blobs);
144  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
145  BLOBNBOX* blob = blob_it.data();
146  TBOX box = blob->bounding_box();
147  blob->set_left_rule(LeftEdgeForBox(box, false, false));
148  blob->set_right_rule(RightEdgeForBox(box, false, false));
149  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
150  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
151  }
152 }

◆ SetBlockRuleEdges()

void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

Definition at line 133 of file tabfind.cpp.

133  {
134  SetBlobRuleEdges(&block->blobs);
135  SetBlobRuleEdges(&block->small_blobs);
136  SetBlobRuleEdges(&block->noise_blobs);
137  SetBlobRuleEdges(&block->large_blobs);
138 }

◆ SetupTabSearch()

void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

Definition at line 490 of file tabfind.cpp.

490  {
491  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
492  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
493  *min_key = std::min(key1, key2);
494  *max_key = std::max(key1, key2);
495 }

◆ TidyBlobs()

void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

Definition at line 465 of file tabfind.cpp.

465  {
466  BLOBNBOX_IT large_it = &block->large_blobs;
467  BLOBNBOX_IT blob_it = &block->blobs;
468  int b_count = 0;
469  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
470  BLOBNBOX* large_blob = large_it.data();
471  if (large_blob->owner() != nullptr) {
472  blob_it.add_to_end(large_it.extract());
473  ++b_count;
474  }
475  }
476  if (textord_debug_tabfind) {
477  tprintf("Moved %d large blobs to normal list\n",
478  b_count);
479  #ifndef GRAPHICS_DISABLED
480  ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
481  block->plot_graded_blobs(rej_win);
482  block->plot_noise_blobs(rej_win);
483  rej_win->Update();
484  #endif // GRAPHICS_DISABLED
485  }
486  block->DeleteUnownedNoise();
487 }

◆ vectors()

TabVector_LIST* tesseract::TabFind::vectors ( )
inlineprotected

Accessors

Definition at line 172 of file tabfind.h.

172  {
173  return &vectors_;
174  }

◆ VeryDifferentSizes()

bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

Definition at line 413 of file tabfind.cpp.

413  {
414  return size1 > size2 * 5 || size2 > size1 * 5;
415 }

◆ WidthCB()

WidthCallback tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Definition at line 157 of file tabfind.h.

157  {
158  return width_cb_;
159  }

Member Data Documentation

◆ resolution_

int tesseract::TabFind::resolution_
protected

Of source image in pixels per inch.

Definition at line 367 of file tabfind.h.

◆ vertical_skew_

ICOORD tesseract::TabFind::vertical_skew_
protected

Estimate of true vertical in this image.

Definition at line 366 of file tabfind.h.


The documentation for this class was generated from the following files:
TO_BLOCK::small_blobs
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:774
ScrollView
Definition: scrollview.h:97
TO_BLOCK::plot_graded_blobs
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1054
tesseract::TabVector::SortKey
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:279
TO_BLOCK::plot_noise_blobs
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1046
tesseract::BBGrid::InsertBBox
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:486
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::MakeWindow
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
TO_BLOCK::noise_blobs
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:773
tesseract::TabFind::SetBlobRuleEdges
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:142
FCOORD::set_x
void set_x(float xin)
rewrite function
Definition: points.h:213
tesseract::GridBase::tright_
ICOORD tright_
Definition: bbgrid.h:91
tesseract::kColumnWidthFactor
const int kColumnWidthFactor
Definition: tabfind.h:41
TBOX::print
void print() const
Definition: rect.h:277
TO_BLOCK::DeleteUnownedNoise
void DeleteUnownedNoise()
Definition: blobbox.cpp:1020
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::Clear
void Clear()
Definition: bbgrid.h:455
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
tesseract::TabFind::SetupTabSearch
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:490
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
FCOORD::set_y
void set_y(float yin)
rewrite function
Definition: points.h:217
tesseract::TabFind::InsertBlobsToGrid
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:91
BLOBNBOX
Definition: blobbox.h:142
BLOBNBOX::UnMergeableType
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:429
tesseract::GridBase::tright
const ICOORD & tright() const
Definition: bbgrid.h:75
TBOX::height
int16_t height() const
Definition: rect.h:107
tesseract::TabFind::FindInitialTabVectors
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:514
TBOX::set_right
void set_right(int x)
Definition: rect.h:81
tesseract::TabFind::RightEdgeForBox
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:281
tesseract::TabFind::CommonWidth
bool CommonWidth(int width)
Definition: tabfind.cpp:394
tesseract::TabFind::resolution_
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:367
TO_BLOCK::large_blobs
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:775
tesseract::TabFind::LeftEdgeForBox
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:286
BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:119
tesseract::AlignedBlob::WithinTestRegion
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: alignedblob.cpp:150
BLOBNBOX::joined_to_prev
bool joined_to_prev() const
Definition: blobbox.h:255
BLOBNBOX::set_left_rule
void set_left_rule(int new_left)
Definition: blobbox.h:315
TBOX::width
int16_t width() const
Definition: rect.h:114
BLOBNBOX::set_right_crossing_rule
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:333
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
BLOBNBOX::set_left_crossing_rule
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:327
tesseract::GridBase::bleft_
ICOORD bleft_
Definition: bbgrid.h:90
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
tesseract::TabFind::vertical_skew_
ICOORD vertical_skew_
Estimate of true vertical in this image.
Definition: tabfind.h:366
tesseract::TabFind::RightTabForBox
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:304
tesseract::TabVector::MergeSimilarTabVectors
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:353
tesseract::GridBase::gridsize
int gridsize() const
Definition: bbgrid.h:63
tesseract::TabFind::LeftTabForBox
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:348
tesseract::TabFind::DisplayTabVectors
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:497
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::DisplayBoxes
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:613
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::Init
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:445
BLOBNBOX::flow
BlobTextFlowType flow() const
Definition: blobbox.h:294
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::AlignedBlob::DisplayTabs
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
Definition: alignedblob.cpp:158
BLOBNBOX::region_type
BlobRegionType region_type() const
Definition: blobbox.h:282
TBOX::right
int16_t right() const
Definition: rect.h:78
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::TabFind::InsertBlob
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:118
ScrollView::Update
static void Update()
Definition: scrollview.cpp:708
tesseract::kLineFragmentAspectRatio
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
BLOBNBOX::owner
tesseract::ColPartition * owner() const
Definition: blobbox.h:351
ICOORDELT
Definition: points.h:160
tesseract::BlobGridSearch
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
textord_debug_tabfind
int textord_debug_tabfind
Definition: alignedblob.cpp:27
tesseract::GridBase::bleft
const ICOORD & bleft() const
Definition: bbgrid.h:72
tesseract::AlignedBlob::AlignedBlob
AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: alignedblob.cpp:143
TBOX::set_left
void set_left(int x)
Definition: rect.h:74
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
TBOX
Definition: rect.h:33
BLOBNBOX::set_right_rule
void set_right_rule(int new_right)
Definition: blobbox.h:321