tesseract  5.0.0-alpha-619-ge9db
tesseract::ColumnFinder Class Reference

#include <colfind.h>

Inheritance diagram for tesseract::ColumnFinder:
tesseract::TabFind tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase

Public Member Functions

 ColumnFinder (int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, TabVector_LIST *hlines, int vertical_x, int vertical_y)
 
 ~ColumnFinder () override
 
const DENORMdenorm () const
 
const TextlineProjectionprojection () const
 
void set_cjk_script (bool is_cjk)
 
void SetupAndFilterNoise (PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block)
 
bool IsVerticallyAlignedText (double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
 
void CorrectOrientation (TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
 
int FindBlocks (PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
 
void GetDeskewVectors (FCOORD *deskew, FCOORD *reskew)
 
void SetEquationDetect (EquationDetectBase *detect)
 
- Public Member Functions inherited from tesseract::TabFind
 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
 ~TabFind () override
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallback WidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~AlignedBlob () override
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BlobGrid () override
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::TabFind
static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 
- Protected Member Functions inherited from tesseract::TabFind
TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 
- Static Protected Member Functions inherited from tesseract::TabFind
static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 
- Protected Attributes inherited from tesseract::TabFind
ICOORD vertical_skew_
 Estimate of true vertical in this image. More...
 
int resolution_
 Of source image in pixels per inch. More...
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

Definition at line 50 of file colfind.h.

Constructor & Destructor Documentation

◆ ColumnFinder()

tesseract::ColumnFinder::ColumnFinder ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
int  resolution,
bool  cjk_script,
double  aligned_gap_fraction,
TabVector_LIST *  vlines,
TabVector_LIST *  hlines,
int  vertical_x,
int  vertical_y 
)

Definition at line 74 of file colfind.cpp.

80  : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
81  resolution),
82  cjk_script_(cjk_script),
83  min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)),
84  mean_column_gap_(tright.x() - bleft.x()),
85  tabfind_aligned_gap_fraction_(aligned_gap_fraction),
86  deskew_(0.0f, 0.0f),
87  reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
88  text_rotation_(0.0f, 0.0f),
89  best_columns_(nullptr), stroke_width_(nullptr),
90  part_grid_(gridsize, bleft, tright), nontext_map_(nullptr),
91  projection_(resolution),
92  denorm_(nullptr), input_blobs_win_(nullptr), equation_detect_(nullptr) {
93  TabVector_IT h_it(&horizontal_lines_);
94  h_it.add_list_after(hlines);
95 }

◆ ~ColumnFinder()

tesseract::ColumnFinder::~ColumnFinder ( )
override

Definition at line 97 of file colfind.cpp.

97  {
98  column_sets_.delete_data_pointers();
99  delete [] best_columns_;
100  delete stroke_width_;
101  delete input_blobs_win_;
102  pixDestroy(&nontext_map_);
103  while (denorm_ != nullptr) {
104  DENORM* dead_denorm = denorm_;
105  denorm_ = const_cast<DENORM*>(denorm_->predecessor());
106  delete dead_denorm;
107  }
108 
109  // The ColPartitions are destroyed automatically, but any boxes in
110  // the noise_parts_ list are owned and need to be deleted explicitly.
111  ColPartition_IT part_it(&noise_parts_);
112  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
113  ColPartition* part = part_it.data();
114  part->DeleteBoxes();
115  }
116  // Likewise any boxes in the good_parts_ list need to be deleted.
117  // These are just the image parts. Text parts have already given their
118  // boxes on to the TO_BLOCK, and have empty lists.
119  part_it.set_to_list(&good_parts_);
120  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
121  ColPartition* part = part_it.data();
122  part->DeleteBoxes();
123  }
124  // Also, any blobs on the image_bblobs_ list need to have their cblobs
125  // deleted. This only happens if there has been an early return from
126  // FindColumns, as in a normal return, the blobs go into the grid and
127  // end up in noise_parts_, good_parts_ or the output blocks.
128  BLOBNBOX_IT bb_it(&image_bblobs_);
129  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
130  BLOBNBOX* bblob = bb_it.data();
131  delete bblob->cblob();
132  }
133 }

Member Function Documentation

◆ CorrectOrientation()

void tesseract::ColumnFinder::CorrectOrientation ( TO_BLOCK block,
bool  vertical_text_lines,
int  recognition_rotation 
)

Definition at line 197 of file colfind.cpp.

199  {
200  const FCOORD anticlockwise90(0.0f, 1.0f);
201  const FCOORD clockwise90(0.0f, -1.0f);
202  const FCOORD rotation180(-1.0f, 0.0f);
203  const FCOORD norotation(1.0f, 0.0f);
204 
205  text_rotation_ = norotation;
206  // Rotate the page to make the text upright, as implied by
207  // recognition_rotation.
208  rotation_ = norotation;
209  if (recognition_rotation == 1) {
210  rotation_ = anticlockwise90;
211  } else if (recognition_rotation == 2) {
212  rotation_ = rotation180;
213  } else if (recognition_rotation == 3) {
214  rotation_ = clockwise90;
215  }
216  // We infer text writing direction to be vertical if there are several
217  // vertical text lines detected, and horizontal if not. But if the page
218  // orientation was determined to be 90 or 270 degrees, the true writing
219  // direction is the opposite of what we inferred.
220  if (recognition_rotation & 1) {
221  vertical_text_lines = !vertical_text_lines;
222  }
223  // If we still believe the writing direction is vertical, we use the
224  // convention of rotating the page ccw 90 degrees to make the text lines
225  // horizontal, and mark the blobs for rotation cw 90 degrees for
226  // classification so that the text order is correct after recognition.
227  if (vertical_text_lines) {
228  rotation_.rotate(anticlockwise90);
229  text_rotation_.rotate(clockwise90);
230  }
231  // Set rerotate_ to the inverse of rotation_.
232  rerotate_ = FCOORD(rotation_.x(), -rotation_.y());
233  if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) {
234  // Rotate all the blobs and tab vectors.
235  RotateBlobList(rotation_, &block->large_blobs);
236  RotateBlobList(rotation_, &block->blobs);
237  RotateBlobList(rotation_, &block->small_blobs);
238  RotateBlobList(rotation_, &block->noise_blobs);
239  TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_,
240  &min_gutter_width_);
241  part_grid_.Init(gridsize(), bleft(), tright());
242  // Reset all blobs to initial state and filter by size.
243  // Since they have rotated, the list they belong on could have changed.
244  block->ReSetAndReFilterBlobs();
245  SetBlockRuleEdges(block);
246  stroke_width_->CorrectForRotation(rerotate_, &part_grid_);
247  }
248  if (textord_debug_tabfind) {
249  tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
250  vertical_text_lines, recognition_rotation,
251  rotation_.x(), rotation_.y(),
252  text_rotation_.x(), text_rotation_.y());
253  }
254  // Setup the denormalization.
255  ASSERT_HOST(denorm_ == nullptr);
256  denorm_ = new DENORM;
257  denorm_->SetupNormalization(nullptr, &rotation_, nullptr,
258  0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
259 }

◆ denorm()

const DENORM* tesseract::ColumnFinder::denorm ( ) const
inline

Definition at line 68 of file colfind.h.

68  {
69  return denorm_;
70  }

◆ FindBlocks()

int tesseract::ColumnFinder::FindBlocks ( PageSegMode  pageseg_mode,
Pix *  scaled_color,
int  scaled_factor,
TO_BLOCK block,
Pix *  photo_mask_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
DebugPixa pixa_debug,
BLOCK_LIST *  blocks,
BLOBNBOX_LIST *  diacritic_blobs,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 285 of file colfind.cpp.

290  {
291  pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
292  stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
293  stroke_width_->RemoveLineResidue(&big_parts_);
294  FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_,
295  input_block);
296  SetBlockRuleEdges(input_block);
297  stroke_width_->GradeBlobsIntoPartitions(
298  pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
299  &projection_, diacritic_blobs, &part_grid_, &big_parts_);
300  if (!PSM_SPARSE(pageseg_mode)) {
301  ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
302  input_block, this, pixa_debug, &part_grid_,
303  &big_parts_);
304  ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_,
305  photo_mask_pix);
306  ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
307  input_block, this, pixa_debug, &part_grid_,
308  &big_parts_);
309  }
310  part_grid_.ReTypeBlobs(&image_bblobs_);
311  TidyBlobs(input_block);
312  Reset();
313  // TODO(rays) need to properly handle big_parts_.
314  ColPartition_IT p_it(&big_parts_);
315  for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
316  p_it.data()->DisownBoxesNoAssert();
317  big_parts_.clear();
318  delete stroke_width_;
319  stroke_width_ = nullptr;
320  // Compute the edge offsets whether or not there is a grey_pix. It is done
321  // here as the c_blobs haven't been touched by rotation or anything yet,
322  // so no denorm is required, yet the text has been separated from image, so
323  // no time is wasted running it on image blobs.
324  input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
325 
326  // A note about handling right-to-left scripts (Hebrew/Arabic):
327  // The columns must be reversed and come out in right-to-left instead of
328  // the normal left-to-right order. Because the left-to-right ordering
329  // is implicit in many data structures, it is simpler to fool the algorithms
330  // into thinking they are dealing with left-to-right text.
331  // To do this, we reflect the needed data in the y-axis and then reflect
332  // the blocks back after they have been created. This is a temporary
333  // arrangement that is confined to this function only, so the reflection
334  // is completely invisible in the output blocks.
335  // The only objects reflected are:
336  // The vertical separator lines that have already been found;
337  // The bounding boxes of all BLOBNBOXES on all lists on the input_block
338  // plus the image_bblobs. The outlines are not touched, since they are
339  // not looked at.
340  bool input_is_rtl = input_block->block->right_to_left();
341  if (input_is_rtl) {
342  // Reflect the vertical separator lines (member of TabFind).
343  ReflectInYAxis();
344  // Reflect the blob boxes.
345  ReflectForRtl(input_block, &image_bblobs_);
346  part_grid_.ReflectInYAxis();
347  }
348 
349  if (!PSM_SPARSE(pageseg_mode)) {
350  if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
351  // No tab stops needed. Just the grid that FindTabVectors makes.
352  DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_);
353  } else {
354  SetBlockRuleEdges(input_block);
355  // Find the tab stops, estimate skew, and deskew the tabs, blobs and
356  // part_grid_.
357  FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block,
358  min_gutter_width_, tabfind_aligned_gap_fraction_,
359  &part_grid_, &deskew_, &reskew_);
360  // Add the deskew to the denorm_.
361  auto* new_denorm = new DENORM;
362  new_denorm->SetupNormalization(nullptr, &deskew_, denorm_,
363  0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
364  denorm_ = new_denorm;
365  }
366  SetBlockRuleEdges(input_block);
367  part_grid_.SetTabStops(this);
368 
369  // Make the column_sets_.
370  if (!MakeColumns(false)) {
371  tprintf("Empty page!!\n");
372  part_grid_.DeleteParts();
373  return 0; // This is an empty page.
374  }
375 
376  // Refill the grid using rectangular spreading, and get the benefit
377  // of the completed tab vectors marking the rule edges of each blob.
378  Clear();
379  #ifndef GRAPHICS_DISABLED
380  if (textord_tabfind_show_reject_blobs) {
381  ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs");
382  input_block->plot_graded_blobs(rej_win);
383  }
384  #endif // GRAPHICS_DISABLED
385  InsertBlobsToGrid(false, false, &image_bblobs_, this);
386  InsertBlobsToGrid(true, true, &input_block->blobs, this);
387 
388  part_grid_.GridFindMargins(best_columns_);
389  // Split and merge the partitions by looking at local neighbours.
390  GridSplitPartitions();
391  // Resolve unknown partitions by adding to an existing partition, fixing
392  // the type, or declaring them noise.
393  part_grid_.GridFindMargins(best_columns_);
394  GridMergePartitions();
395  // Insert any unused noise blobs that are close enough to an appropriate
396  // partition.
397  InsertRemainingNoise(input_block);
398  // Add horizontal line separators as partitions.
399  GridInsertHLinePartitions();
400  GridInsertVLinePartitions();
401  // Recompute margins based on a local neighbourhood search.
402  part_grid_.GridFindMargins(best_columns_);
403  SetPartitionTypes();
404  }
405  if (textord_tabfind_show_initial_partitions) {
406  ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions");
407  part_grid_.DisplayBoxes(part_win);
408  DisplayTabVectors(part_win);
409  }
410 
411  if (!PSM_SPARSE(pageseg_mode)) {
412  if (equation_detect_) {
413  equation_detect_->FindEquationParts(&part_grid_, best_columns_);
414  }
415  if (textord_tabfind_find_tables) {
416  TableFinder table_finder;
417  table_finder.Init(gridsize(), bleft(), tright());
418  table_finder.set_resolution(resolution_);
419  table_finder.set_left_to_right_language(
420  !input_block->block->right_to_left());
421  // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
422  // insert dot-like noise into period_grid_
423  table_finder.InsertCleanPartitions(&part_grid_, input_block);
424  // Get Table Regions
425  table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_);
426  }
427  GridRemoveUnderlinePartitions();
428  part_grid_.DeleteUnknownParts(input_block);
429 
430  // Build the partitions into chains that belong in the same block and
431  // refine into one-to-one links, then smooth the types within each chain.
432  part_grid_.FindPartitionPartners();
433  part_grid_.FindFigureCaptions();
434  part_grid_.RefinePartitionPartners(true);
435  SmoothPartnerRuns();
436 
437  #ifndef GRAPHICS_DISABLED
438  if (textord_tabfind_show_partitions) {
439  ScrollView* window = MakeWindow(400, 300, "Partitions");
440  if (window != nullptr) {
441  part_grid_.DisplayBoxes(window);
443  DisplayTabVectors(window);
444  if (window != nullptr && textord_tabfind_show_partitions > 1) {
445  delete window->AwaitEvent(SVET_DESTROY);
446  }
447  }
448  }
449  #endif // GRAPHICS_DISABLED
450  part_grid_.AssertNoDuplicates();
451  }
452  // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here,
453  // and ownership of the BLOBNBOXes moves to the ColPartitions.
454  // (They were previously owned by the block or the image_bblobs list.)
455  ReleaseBlobsAndCleanupUnused(input_block);
456  // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and
457  // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves
458  // from the ColPartitions to the output TO_BLOCK. In non-text, the
459  // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor.
460  if (PSM_SPARSE(pageseg_mode))
461  part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks);
462  else
463  TransformToBlocks(blocks, to_blocks);
464  if (textord_debug_tabfind) {
465  tprintf("Found %d blocks, %d to_blocks\n",
466  blocks->length(), to_blocks->length());
467  }
468 
469  DisplayBlocks(blocks);
470  RotateAndReskewBlocks(input_is_rtl, to_blocks);
471  int result = 0;
472  #ifndef GRAPHICS_DISABLED
473  if (blocks_win_ != nullptr) {
474  bool waiting = false;
475  do {
476  waiting = false;
477  SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY);
478  if (event->type == SVET_INPUT && event->parameter != nullptr) {
479  if (*event->parameter == 'd')
480  result = -1;
481  else
482  blocks->clear();
483  } else if (event->type == SVET_DESTROY) {
484  blocks_win_ = nullptr;
485  } else {
486  waiting = true;
487  }
488  delete event;
489  } while (waiting);
490  }
491  #endif // GRAPHICS_DISABLED
492  return result;
493 }

◆ GetDeskewVectors()

void tesseract::ColumnFinder::GetDeskewVectors ( FCOORD deskew,
FCOORD reskew 
)

Definition at line 496 of file colfind.cpp.

496  {
497  *reskew = reskew_;
498  *deskew = reskew_;
499  deskew->set_y(-deskew->y());
500 }

◆ IsVerticallyAlignedText()

bool tesseract::ColumnFinder::IsVerticallyAlignedText ( double  find_vertical_text_ratio,
TO_BLOCK block,
BLOBNBOX_CLIST *  osd_blobs 
)

Definition at line 179 of file colfind.cpp.

181  {
182  return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio,
183  block, osd_blobs);
184 }

◆ projection()

const TextlineProjection* tesseract::ColumnFinder::projection ( ) const
inline

Definition at line 71 of file colfind.h.

71  {
72  return &projection_;
73  }

◆ set_cjk_script()

void tesseract::ColumnFinder::set_cjk_script ( bool  is_cjk)
inline

Definition at line 74 of file colfind.h.

74  {
75  cjk_script_ = is_cjk;
76  }

◆ SetEquationDetect()

void tesseract::ColumnFinder::SetEquationDetect ( EquationDetectBase detect)

Definition at line 502 of file colfind.cpp.

502  {
503  equation_detect_ = detect;
504 }

◆ SetupAndFilterNoise()

void tesseract::ColumnFinder::SetupAndFilterNoise ( PageSegMode  pageseg_mode,
Pix *  photo_mask_pix,
TO_BLOCK input_block 
)

Definition at line 142 of file colfind.cpp.

144  {
145  part_grid_.Init(gridsize(), bleft(), tright());
146  delete stroke_width_;
147  stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright());
148  min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize());
149  input_block->ReSetAndReFilterBlobs();
150  #ifndef GRAPHICS_DISABLED
151  if (textord_tabfind_show_blocks) {
152  input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs");
153  input_block->plot_graded_blobs(input_blobs_win_);
154  }
155  #endif // GRAPHICS_DISABLED
156  SetBlockRuleEdges(input_block);
157  pixDestroy(&nontext_map_);
158  // Run a preliminary strokewidth neighbour detection on the medium blobs.
159  stroke_width_->SetNeighboursOnMediumBlobs(input_block);
160  CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
161  // Remove obvious noise and make the initial non-text map.
162  nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
163  photo_mask_pix, input_block);
164  stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_,
165  input_block);
166  // Clear the strokewidth grid ready for rotation or leader finding.
167  stroke_width_->Clear();
168 }

The documentation for this class was generated from the following files:
GenericVector::delete_data_pointers
void delete_data_pointers()
Definition: genericvector.h:872
TO_BLOCK::small_blobs
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:774
ScrollView
Definition: scrollview.h:97
TO_BLOCK::plot_graded_blobs
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1054
SVET_DESTROY
Definition: scrollview.h:45
tesseract::TabFind::Reset
void Reset()
Definition: tabfind.cpp:1345
tesseract::ColPartitionGrid::GridFindMargins
void GridFindMargins(ColPartitionSet **best_columns)
Definition: colpartitiongrid.cpp:960
tesseract::BBGrid::AssertNoDuplicates
void AssertNoDuplicates()
Definition: bbgrid.h:638
tesseract::TabFind::FindTabVectors
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:422
tesseract::ColPartitionGrid::DeleteUnknownParts
void DeleteUnknownParts(TO_BLOCK *block)
Definition: colpartitiongrid.cpp:1017
SVET_INPUT
Definition: scrollview.h:49
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::MakeWindow
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
TO_BLOCK::noise_blobs
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:773
tesseract::PSM_COL_FIND_ENABLED
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:194
tesseract::kMinGutterWidthGrid
const double kMinGutterWidthGrid
Definition: colfind.cpp:51
tesseract::StrokeWidth::GradeBlobsIntoPartitions
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Definition: strokewidth.cpp:343
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
FCOORD::y
float y() const
Definition: points.h:209
tesseract::ColPartitionGrid::DeleteParts
void DeleteParts()
Definition: colpartitiongrid.cpp:1002
tesseract::ColPartitionGrid::FindFigureCaptions
void FindFigureCaptions()
Definition: colpartitiongrid.cpp:1054
FCOORD::x
float x() const
Definition: points.h:206
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::Clear
void Clear()
Definition: bbgrid.h:455
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
FCOORD
Definition: points.h:187
DENORM::predecessor
const DENORM * predecessor() const
Definition: normalis.h:262
FCOORD::set_y
void set_y(float yin)
rewrite function
Definition: points.h:217
tesseract::TabFind::InsertBlobsToGrid
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:91
BLOBNBOX
Definition: blobbox.h:142
tesseract::GridBase::tright
const ICOORD & tright() const
Definition: bbgrid.h:75
tesseract::TabFind::FindInitialTabVectors
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:514
tesseract::TabFind::ReflectInYAxis
void ReflectInYAxis()
Definition: tabfind.cpp:1356
tesseract::PSM_SPARSE
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:197
TO_BLOCK::ReSetAndReFilterBlobs
void ReSetAndReFilterBlobs()
Definition: blobbox.cpp:994
tesseract::ColPartitionGrid::RefinePartitionPartners
void RefinePartitionPartners(bool get_desperate)
Definition: colpartitiongrid.cpp:1287
DENORM::SetupNormalization
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:95
tesseract::TabFind::resolution_
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:367
TO_BLOCK::large_blobs
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:775
textord_debug_printable
bool textord_debug_printable
Definition: alignedblob.cpp:33
tesseract::StrokeWidth::FindTextlineDirectionAndFixBrokenCJK
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
Definition: strokewidth.cpp:156
tesseract::TabFind::DontFindTabVectors
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:452
tesseract::ImageFind::FindImagePartitions
static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Definition: imagefind.cpp:1298
tesseract::StrokeWidth::CorrectForRotation
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
Definition: strokewidth.cpp:250
tesseract::ColPartitionGrid::ReflectInYAxis
void ReflectInYAxis()
Definition: colpartitiongrid.cpp:640
FCOORD::rotate
void rotate(const FCOORD vec)
Definition: points.h:736
tesseract::StrokeWidth::TestVerticalTextDirection
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
Definition: strokewidth.cpp:208
tesseract::TabFind::TabFind
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
Definition: tabfind.cpp:65
SVET_ANY
Definition: scrollview.h:55
tesseract::TabFind::SetBlockRuleEdges
void SetBlockRuleEdges(TO_BLOCK *block)
Definition: tabfind.cpp:133
ScrollView::AwaitEvent
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:443
tesseract::GridBase::gridsize
int gridsize() const
Definition: bbgrid.h:63
tesseract::ImageFind::TransferImagePartsToImageMask
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, Pix *image_mask)
Definition: imagefind.cpp:1245
tesseract::TabFind::DisplayTabVectors
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:497
tesseract::BBGrid::DisplayBoxes
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:613
tesseract::ColPartitionGrid::FindPartitionPartners
void FindPartitionPartners()
Definition: colpartitiongrid.cpp:1179
tesseract::BBGrid::Init
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:445
tesseract::TabFind::WidthCB
WidthCallback WidthCB()
Definition: tabfind.h:157
tesseract::TabFind::ResetForVerticalText
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
Definition: tabfind.cpp:1300
tesseract::StrokeWidth::FindLeaderPartitions
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
Definition: strokewidth.cpp:259
tesseract::ColPartitionGrid::ReTypeBlobs
void ReTypeBlobs(BLOBNBOX_LIST *im_blobs)
Definition: colpartitiongrid.cpp:870
tesseract::StrokeWidth::SetNeighboursOnMediumBlobs
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
Definition: strokewidth.cpp:142
SVEvent
Definition: scrollview.h:60
tesseract::ColPartitionGrid::SetTabStops
void SetTabStops(TabFind *tabgrid)
Definition: colpartitiongrid.cpp:753
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::StrokeWidth::RemoveLineResidue
void RemoveLineResidue(ColPartition_LIST *big_part_list)
Definition: strokewidth.cpp:279
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
tesseract::TabFind::TidyBlobs
void TidyBlobs(TO_BLOCK *block)
Definition: tabfind.cpp:465
textord_debug_tabfind
int textord_debug_tabfind
Definition: alignedblob.cpp:27
tesseract::GridBase::bleft
const ICOORD & bleft() const
Definition: bbgrid.h:72
tesseract::ColPartitionGrid::ExtractPartitionsAsBlocks
void ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: colpartitiongrid.cpp:668
tesseract::EquationDetectBase::FindEquationParts
virtual int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns)=0
DENORM
Definition: normalis.h:49
tesseract::TabFind::RotateBlobList
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:1256