tesseract  4.0.0-1-g2a2b
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase tesseract::ColumnFinder

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
virtual ~TabFind ()
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallbackWidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~AlignedBlob ()
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BlobGrid ()
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BBGrid ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 
int resolution_
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Definition at line 53 of file tabfind.h.

Constructor & Destructor Documentation

◆ TabFind()

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

Definition at line 66 of file tabfind.cpp.

70  resolution_(resolution),
71  image_origin_(0, tright.y() - 1),
72  v_it_(&vectors_) {
73  width_cb_ = nullptr;
74  v_it_.add_list_after(vlines);
75  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
77 }
int gridsize() const
Definition: bbgrid.h:64
const ICOORD & bleft() const
Definition: bbgrid.h:73
int16_t y() const
access_function
Definition: points.h:57
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool CommonWidth(int width)
Definition: tabfind.cpp:395
AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright)
const ICOORD & tright() const
Definition: bbgrid.h:76

◆ ~TabFind()

tesseract::TabFind::~TabFind ( )
virtual

Definition at line 79 of file tabfind.cpp.

79  {
80  delete width_cb_;
81 }

Member Function Documentation

◆ CommonWidth()

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

Definition at line 395 of file tabfind.cpp.

395  {
396  width /= kColumnWidthFactor;
397  ICOORDELT_IT it(&column_widths_);
398  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
399  ICOORDELT* w = it.data();
400  if (w->x() - 1 <= width && width <= w->y() + 1)
401  return true;
402  }
403  return false;
404 }
int16_t y() const
access_function
Definition: points.h:57
const int kColumnWidthFactor
Definition: tabfind.h:42
int16_t x() const
access function
Definition: points.h:53

◆ dead_vectors()

TabVector_LIST* tesseract::TabFind::dead_vectors ( )
inlineprotected

Definition at line 176 of file tabfind.h.

176  {
177  return &dead_vectors_;
178  }

◆ DifferentSizes()

bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

Definition at line 408 of file tabfind.cpp.

408  {
409  return size1 > size2 * 2 || size2 > size1 * 2;
410 }

◆ DisplayTabVectors()

ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

Definition at line 498 of file tabfind.cpp.

498  {
499 #ifndef GRAPHICS_DISABLED
500  // For every vector, display it.
501  TabVector_IT it(&vectors_);
502  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
503  TabVector* vector = it.data();
504  vector->Display(tab_win);
505  }
506  tab_win->Update();
507 #endif
508  return tab_win;
509 }
static void Update()
Definition: scrollview.cpp:711

◆ DontFindTabVectors()

void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

Definition at line 453 of file tabfind.cpp.

454  {
455  InsertBlobsToGrid(false, false, image_blobs, this);
456  InsertBlobsToGrid(true, false, &block->blobs, this);
457  deskew->set_x(1.0f);
458  deskew->set_y(0.0f);
459  reskew->set_x(1.0f);
460  reskew->set_y(0.0f);
461 }
void set_x(float xin)
rewrite function
Definition: points.h:215
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:92
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
void set_y(float yin)
rewrite function
Definition: points.h:219

◆ FindInitialTabVectors()

ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
TO_BLOCK block 
)
protected

Definition at line 515 of file tabfind.cpp.

518  {
520  ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
521  line_win = DisplayTabVectors(line_win);
522  }
523  // Prepare the grid.
524  if (image_blobs != nullptr)
525  InsertBlobsToGrid(true, false, image_blobs, this);
526  InsertBlobsToGrid(true, false, &block->blobs, this);
527  ScrollView* initial_win = FindTabBoxes(min_gutter_width,
528  tabfind_aligned_gap_fraction);
529  FindAllTabVectors(min_gutter_width);
530 
532  SortVectors();
533  EvaluateTabs();
534  if (textord_tabfind_show_initialtabs && initial_win != nullptr)
535  initial_win = DisplayTabVectors(initial_win);
536  MarkVerticalText();
537  return initial_win;
538 }
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:591
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:498
bool textord_tabfind_show_initialtabs
Definition: tabfind.cpp:63
ICOORD vertical_skew_
Definition: tabfind.h:367
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:92
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:356
BLOBNBOX_LIST blobs
Definition: blobbox.h:785

◆ FindTabVectors()

bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid. tabfind_aligned_gap_fraction should be the value of parameter textord_tabfind_aligned_gap_fraction

Definition at line 423 of file tabfind.cpp.

428  {
429  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
430  tabfind_aligned_gap_fraction,
431  block);
432  ComputeColumnWidths(tab_win, part_grid);
434  SortVectors();
435  CleanupTabs();
436  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
437  return false; // Skew angle is too large.
438  part_grid->Deskew(*deskew);
439  ApplyTabConstraints();
440  #ifndef GRAPHICS_DISABLED
442  tab_win = MakeWindow(640, 50, "FinalTabs");
443  DisplayBoxes(tab_win);
444  DisplayTabs("FinalTabs", tab_win);
445  tab_win = DisplayTabVectors(tab_win);
446  }
447  #endif // GRAPHICS_DISABLED
448  return true;
449 }
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:591
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:498
bool textord_tabfind_show_finaltabs
Definition: tabfind.cpp:64
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:515
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
ICOORD vertical_skew_
Definition: tabfind.h:367
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:356

◆ GutterWidth()

int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

Definition at line 162 of file tabfind.cpp.

164  {
165  bool right_to_left = v.IsLeftTab();
166  int bottom_x = v.XAtY(bottom_y);
167  int top_x = v.XAtY(top_y);
168  int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
169  BlobGridSearch sidesearch(this);
170  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
171  int min_gap = max_gutter_width;
172  *required_shift = 0;
173  BLOBNBOX* blob = nullptr;
174  while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
175  const TBOX& box = blob->bounding_box();
176  if (box.bottom() >= top_y || box.top() <= bottom_y)
177  continue; // Doesn't overlap enough.
178  if (box.height() >= gridsize() * 2 &&
179  box.height() > box.width() * kLineFragmentAspectRatio) {
180  // Skip likely separator line residue.
181  continue;
182  }
183  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
184  continue; // Skip non-text if required.
185  int mid_y = (box.bottom() + box.top()) / 2;
186  // We use the x at the mid-y so that the required_shift guarantees
187  // to clear all the blobs on the tab-stop. If we use the min/max
188  // of x at top/bottom of the blob, then exactness would be required,
189  // which is not a good thing.
190  int tab_x = v.XAtY(mid_y);
191  int gap;
192  if (right_to_left) {
193  gap = tab_x - box.right();
194  if (gap < 0 && box.left() - tab_x < *required_shift)
195  *required_shift = box.left() - tab_x;
196  } else {
197  gap = box.left() - tab_x;
198  if (gap < 0 && box.right() - tab_x > *required_shift)
199  *required_shift = box.right() - tab_x;
200  }
201  if (gap > 0 && gap < min_gap)
202  min_gap = gap;
203  }
204  // Result may be negative, in which case, this is a really bad tabstop.
205  return min_gap - abs(*required_shift);
206 }
STL namespace.
int gridsize() const
Definition: bbgrid.h:64
Definition: rect.h:34
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:431
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
BlobRegionType region_type() const
Definition: blobbox.h:284
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:55
int16_t height() const
Definition: rect.h:108

◆ GutterWidthAndNeighbourGap()

void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

Definition at line 209 of file tabfind.cpp.

212  {
213  const TBOX& box = bbox->bounding_box();
214  // The gutter and internal sides of the box.
215  int gutter_x = left ? box.left() : box.right();
216  int internal_x = left ? box.right() : box.left();
217  // On ragged edges, the gutter side of the box is away from the tabstop.
218  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
219  *gutter_width = max_gutter;
220  // If the box is away from the tabstop, we need to increase
221  // the allowed gutter width.
222  if (tab_gap > 0)
223  *gutter_width += tab_gap;
224  bool debug = WithinTestRegion(2, box.left(), box.bottom());
225  if (debug)
226  tprintf("Looking in gutter\n");
227  // Find the nearest blob on the outside of the column.
228  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
229  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
230  *gutter_width, box.top(), box.bottom());
231  if (gutter_bbox != nullptr) {
232  const TBOX& gutter_box = gutter_bbox->bounding_box();
233  *gutter_width = left ? tab_x - gutter_box.right()
234  : gutter_box.left() - tab_x;
235  }
236  if (*gutter_width >= max_gutter) {
237  // If there is no box because a tab was in the way, get the tab coord.
238  TBOX gutter_box(box);
239  if (left) {
240  gutter_box.set_left(tab_x - max_gutter - 1);
241  gutter_box.set_right(tab_x - max_gutter);
242  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
243  if (tab_gutter < tab_x - 1)
244  *gutter_width = tab_x - tab_gutter;
245  } else {
246  gutter_box.set_left(tab_x + max_gutter);
247  gutter_box.set_right(tab_x + max_gutter + 1);
248  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
249  if (tab_gutter > tab_x + 1)
250  *gutter_width = tab_gutter - tab_x;
251  }
252  }
253  if (*gutter_width > max_gutter)
254  *gutter_width = max_gutter;
255  // Now look for a neighbour on the inside.
256  if (debug)
257  tprintf("Looking for neighbour\n");
258  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
259  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
260  *gutter_width, box.top(), box.bottom());
261  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
262  : LeftEdgeForBox(box, true, false);
263  if (neighbour != nullptr) {
264  const TBOX& n_box = neighbour->bounding_box();
265  if (debug) {
266  tprintf("Found neighbour:");
267  n_box.print();
268  }
269  if (left && n_box.left() < neighbour_edge)
270  neighbour_edge = n_box.left();
271  else if (!left && n_box.right() > neighbour_edge)
272  neighbour_edge = n_box.right();
273  }
274  *neighbour_gap = left ? neighbour_edge - internal_x
275  : internal_x - neighbour_edge;
276 }
void print() const
Definition: rect.h:278
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:282
Definition: rect.h:34
BlobTextFlowType flow() const
Definition: blobbox.h:296
static bool WithinTestRegion(int detail_level, int x, int y)
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:287
void set_right(int x)
Definition: rect.h:82
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void set_left(int x)
Definition: rect.h:75
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65

◆ image_origin()

const ICOORD& tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

Definition at line 165 of file tabfind.h.

165  {
166  return image_origin_;
167  }

◆ InsertBlob()

bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

Definition at line 119 of file tabfind.cpp.

121  {
122  TBOX box = blob->bounding_box();
123  blob->set_left_rule(LeftEdgeForBox(box, false, false));
124  blob->set_right_rule(RightEdgeForBox(box, false, false));
125  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
126  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
127  if (blob->joined_to_prev())
128  return false;
129  grid->InsertBBox(h_spread, v_spread, blob);
130  return true;
131 }
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:282
Definition: rect.h:34
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:287
void set_left_rule(int new_left)
Definition: blobbox.h:317
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:329
void set_right_rule(int new_right)
Definition: blobbox.h:323
bool joined_to_prev() const
Definition: blobbox.h:257
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:488
const TBOX & bounding_box() const
Definition: blobbox.h:231
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:335

◆ InsertBlobsToGrid()

void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

Definition at line 92 of file tabfind.cpp.

95  {
96  BLOBNBOX_IT blob_it(blobs);
97  int b_count = 0;
98  int reject_count = 0;
99  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
100  BLOBNBOX* blob = blob_it.data();
101 // if (InsertBlob(true, true, blob, grid)) {
102  if (InsertBlob(h_spread, v_spread, blob, grid)) {
103  ++b_count;
104  } else {
105  ++reject_count;
106  }
107  }
108  if (textord_debug_tabfind) {
109  tprintf("Inserted %d blobs into grid, %d rejected.\n",
110  b_count, reject_count);
111  }
112 }
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:119
int textord_debug_tabfind
Definition: alignedblob.cpp:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

◆ LeftEdgeForBox()

int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

Definition at line 287 of file tabfind.cpp.

287  {
288  TabVector* v = LeftTabForBox(box, crossing, extended);
289  return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
290 }
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:349
int16_t top() const
Definition: rect.h:58
int16_t x() const
access function
Definition: points.h:53
int16_t bottom() const
Definition: rect.h:65

◆ LeftTabForBox()

TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

Definition at line 349 of file tabfind.cpp.

350  {
351  if (v_it_.empty())
352  return nullptr;
353  int top_y = box.top();
354  int bottom_y = box.bottom();
355  int mid_y = (top_y + bottom_y) / 2;
356  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
357  int min_key, max_key;
358  SetupTabSearch(left, mid_y, &min_key, &max_key);
359  // Position the iterator at the last TabVector with sort_key <= max_key.
360  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
361  v_it_.forward();
362  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
363  v_it_.backward();
364  }
365  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
366  TabVector* best_v = nullptr;
367  int best_x = -1;
368  int key_limit = -1;
369  do {
370  TabVector* v = v_it_.data();
371  int x = v->XAtY(mid_y);
372  if (x <= left &&
373  (v->VOverlap(top_y, bottom_y) > 0 ||
374  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
375  if (best_v == nullptr || x > best_x) {
376  best_v = v;
377  best_x = x;
378  // We can guarantee that no better vector can be found if the
379  // sort key is less than that of the best by max_key - min_key.
380  key_limit = v->sort_key() - (max_key - min_key);
381  }
382  }
383  // Break when the search is done to avoid wrapping the iterator and
384  // thereby potentially slowing the next search.
385  if (v_it_.at_first() ||
386  (best_v != nullptr && v->sort_key() < key_limit))
387  break; // Prevent restarting list for next call.
388  v_it_.backward();
389  } while (!v_it_.at_last());
390  return best_v;
391 }
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:491
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65

◆ ReflectInYAxis()

void tesseract::TabFind::ReflectInYAxis ( )
protected

Definition at line 1357 of file tabfind.cpp.

1357  {
1358  TabVector_LIST temp_list;
1359  TabVector_IT temp_it(&temp_list);
1360  v_it_.move_to_first();
1361  // The TabVector list only contains vertical lines, but they need to be
1362  // reflected and the list needs to be reversed, so they are still in
1363  // sort_key order.
1364  while (!v_it_.empty()) {
1365  TabVector* v = v_it_.extract();
1366  v_it_.forward();
1367  v->ReflectInYAxis();
1368  temp_it.add_before_then_move(v);
1369  }
1370  v_it_.add_list_after(&temp_list);
1371  v_it_.move_to_first();
1372  // Reset this grid with reflected bounding boxes.
1373  TBOX grid_box(bleft(), tright());
1374  int tmp = grid_box.left();
1375  grid_box.set_left(-grid_box.right());
1376  grid_box.set_right(-tmp);
1377  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1378 }
int gridsize() const
Definition: bbgrid.h:64
const ICOORD & bleft() const
Definition: bbgrid.h:73
Definition: rect.h:34
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:447
const ICOORD & tright() const
Definition: bbgrid.h:76

◆ Reset()

void tesseract::TabFind::Reset ( )
protected

Definition at line 1346 of file tabfind.cpp.

1346  {
1347  v_it_.move_to_first();
1348  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1349  if (!v_it_.data()->IsSeparator())
1350  delete v_it_.extract();
1351  }
1352  Clear();
1353 }

◆ ResetForVerticalText()

void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

Definition at line 1301 of file tabfind.cpp.

1303  {
1304  // Rotate the horizontal and vertical vectors and swap them over.
1305  // Only the separators are kept and rotated; other tabs are used
1306  // to estimate the gutter width then thrown away.
1307  TabVector_LIST ex_verticals;
1308  TabVector_IT ex_v_it(&ex_verticals);
1309  TabVector_LIST vlines;
1310  TabVector_IT v_it(&vlines);
1311  while (!v_it_.empty()) {
1312  TabVector* v = v_it_.extract();
1313  if (v->IsSeparator()) {
1314  v->Rotate(rotate);
1315  ex_v_it.add_after_then_move(v);
1316  } else {
1317  v_it.add_after_then_move(v);
1318  }
1319  v_it_.forward();
1320  }
1321 
1322  // Adjust the min gutter width for better tabbox selection
1323  // in 2nd call to FindInitialTabVectors().
1324  int median_gutter = FindMedianGutterWidth(&vlines);
1325  if (median_gutter > *min_gutter_width)
1326  *min_gutter_width = median_gutter;
1327 
1328  TabVector_IT h_it(horizontal_lines);
1329  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1330  TabVector* h = h_it.data();
1331  h->Rotate(rotate);
1332  }
1333  v_it_.add_list_after(horizontal_lines);
1334  v_it_.move_to_first();
1335  h_it.set_to_list(horizontal_lines);
1336  h_it.add_list_after(&ex_verticals);
1337 
1338  // Rebuild the grid to the new size.
1339  TBOX grid_box(bleft(), tright());
1340  grid_box.rotate_large(rotate);
1341  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1342 }
int gridsize() const
Definition: bbgrid.h:64
const ICOORD & bleft() const
Definition: bbgrid.h:73
Definition: rect.h:34
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:447
const ICOORD & tright() const
Definition: bbgrid.h:76

◆ RightEdgeForBox()

int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

Definition at line 282 of file tabfind.cpp.

282  {
283  TabVector* v = RightTabForBox(box, crossing, extended);
284  return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
285 }
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:305
ICOORD tright_
Definition: bbgrid.h:92
int16_t top() const
Definition: rect.h:58
int16_t x() const
access function
Definition: points.h:53
int16_t bottom() const
Definition: rect.h:65

◆ RightTabForBox()

TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return nullptr. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

Definition at line 305 of file tabfind.cpp.

306  {
307  if (v_it_.empty())
308  return nullptr;
309  int top_y = box.top();
310  int bottom_y = box.bottom();
311  int mid_y = (top_y + bottom_y) / 2;
312  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
313  int min_key, max_key;
314  SetupTabSearch(right, mid_y, &min_key, &max_key);
315  // Position the iterator at the first TabVector with sort_key >= min_key.
316  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
317  v_it_.backward();
318  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
319  v_it_.forward();
320  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
321  TabVector* best_v = nullptr;
322  int best_x = -1;
323  int key_limit = -1;
324  do {
325  TabVector* v = v_it_.data();
326  int x = v->XAtY(mid_y);
327  if (x >= right &&
328  (v->VOverlap(top_y, bottom_y) > 0 ||
329  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
330  if (best_v == nullptr || x < best_x) {
331  best_v = v;
332  best_x = x;
333  // We can guarantee that no better vector can be found if the
334  // sort key exceeds that of the best by max_key - min_key.
335  key_limit = v->sort_key() + max_key - min_key;
336  }
337  }
338  // Break when the search is done to avoid wrapping the iterator and
339  // thereby potentially slowing the next search.
340  if (v_it_.at_last() ||
341  (best_v != nullptr && v->sort_key() > key_limit))
342  break; // Prevent restarting list for next call.
343  v_it_.forward();
344  } while (!v_it_.at_first());
345  return best_v;
346 }
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:491
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65

◆ RotateBlobList()

void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

Definition at line 1257 of file tabfind.cpp.

1257  {
1258  BLOBNBOX_IT it(blobs);
1259  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1260  it.data()->rotate_box(rotation);
1261  }
1262 }

◆ SetBlobRuleEdges()

void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

Definition at line 143 of file tabfind.cpp.

143  {
144  BLOBNBOX_IT blob_it(blobs);
145  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
146  BLOBNBOX* blob = blob_it.data();
147  TBOX box = blob->bounding_box();
148  blob->set_left_rule(LeftEdgeForBox(box, false, false));
149  blob->set_right_rule(RightEdgeForBox(box, false, false));
150  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
151  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
152  }
153 }
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:282
Definition: rect.h:34
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:287
void set_left_rule(int new_left)
Definition: blobbox.h:317
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:329
void set_right_rule(int new_right)
Definition: blobbox.h:323
const TBOX & bounding_box() const
Definition: blobbox.h:231
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:335

◆ SetBlockRuleEdges()

void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

Definition at line 134 of file tabfind.cpp.

134  {
135  SetBlobRuleEdges(&block->blobs);
136  SetBlobRuleEdges(&block->small_blobs);
137  SetBlobRuleEdges(&block->noise_blobs);
138  SetBlobRuleEdges(&block->large_blobs);
139 }
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:143
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:788
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:787

◆ SetupTabSearch()

void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

Definition at line 491 of file tabfind.cpp.

491  {
492  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
493  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
494  *min_key = std::min(key1, key2);
495  *max_key = std::max(key1, key2);
496 }
int16_t y() const
access_function
Definition: points.h:57
ICOORD tright_
Definition: bbgrid.h:92
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
ICOORD vertical_skew_
Definition: tabfind.h:367

◆ TidyBlobs()

void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

Definition at line 466 of file tabfind.cpp.

466  {
467  BLOBNBOX_IT large_it = &block->large_blobs;
468  BLOBNBOX_IT blob_it = &block->blobs;
469  int b_count = 0;
470  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
471  BLOBNBOX* large_blob = large_it.data();
472  if (large_blob->owner() != nullptr) {
473  blob_it.add_to_end(large_it.extract());
474  ++b_count;
475  }
476  }
477  if (textord_debug_tabfind) {
478  tprintf("Moved %d large blobs to normal list\n",
479  b_count);
480  #ifndef GRAPHICS_DISABLED
481  ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
482  block->plot_graded_blobs(rej_win);
483  block->plot_noise_blobs(rej_win);
484  rej_win->Update();
485  #endif // GRAPHICS_DISABLED
486  }
487  block->DeleteUnownedNoise();
488 }
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:591
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1064
static void Update()
Definition: scrollview.cpp:711
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1072
int textord_debug_tabfind
Definition: alignedblob.cpp:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void DeleteUnownedNoise()
Definition: blobbox.cpp:1038
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
tesseract::ColPartition * owner() const
Definition: blobbox.h:353
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789

◆ vectors()

TabVector_LIST* tesseract::TabFind::vectors ( )
inlineprotected

Accessors

Definition at line 173 of file tabfind.h.

173  {
174  return &vectors_;
175  }

◆ VeryDifferentSizes()

bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

Definition at line 414 of file tabfind.cpp.

414  {
415  return size1 > size2 * 5 || size2 > size1 * 5;
416 }

◆ WidthCB()

WidthCallback* tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Definition at line 158 of file tabfind.h.

158  {
159  return width_cb_;
160  }

Member Data Documentation

◆ resolution_

int tesseract::TabFind::resolution_
protected

Definition at line 368 of file tabfind.h.

◆ vertical_skew_

ICOORD tesseract::TabFind::vertical_skew_
protected

Definition at line 367 of file tabfind.h.


The documentation for this class was generated from the following files: