tesseract  4.0.0-1-g2a2b
tesseract::StrokeWidth Class Reference

#include <strokewidth.h>

Inheritance diagram for tesseract::StrokeWidth:
tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase

Public Member Functions

 StrokeWidth (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~StrokeWidth ()
 
void SetNeighboursOnMediumBlobs (TO_BLOCK *block)
 
void FindTextlineDirectionAndFixBrokenCJK (PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
 
bool TestVerticalTextDirection (double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
 
void CorrectForRotation (const FCOORD &rerotation, ColPartitionGrid *part_grid)
 
void FindLeaderPartitions (TO_BLOCK *block, ColPartitionGrid *part_grid)
 
void RemoveLineResidue (ColPartition_LIST *big_part_list)
 
void GradeBlobsIntoPartitions (PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BlobGrid ()
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BBGrid ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The StrokeWidth class holds all the normal and large blobs. It is used to find good large blobs and move them to the normal blobs by virtue of having a reasonable strokewidth compatible neighbour.

Definition at line 57 of file strokewidth.h.

Constructor & Destructor Documentation

◆ StrokeWidth()

tesseract::StrokeWidth::StrokeWidth ( int  gridsize,
const ICOORD bleft,
const ICOORD tright 
)

Definition at line 110 of file strokewidth.cpp.

112  : BlobGrid(gridsize, bleft, tright), nontext_map_(nullptr), projection_(nullptr),
113  denorm_(nullptr), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) {
114  leaders_win_ = nullptr;
115  widths_win_ = nullptr;
116  initial_widths_win_ = nullptr;
117  chains_win_ = nullptr;
118  diacritics_win_ = nullptr;
119  textlines_win_ = nullptr;
120  smoothed_win_ = nullptr;
121 }
BlobGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: blobgrid.cpp:24
int gridsize() const
Definition: bbgrid.h:64
const ICOORD & bleft() const
Definition: bbgrid.h:73
const ICOORD & tright() const
Definition: bbgrid.h:76

◆ ~StrokeWidth()

tesseract::StrokeWidth::~StrokeWidth ( )
virtual

Definition at line 123 of file strokewidth.cpp.

123  {
124  if (widths_win_ != nullptr) {
125  #ifndef GRAPHICS_DISABLED
126  delete widths_win_->AwaitEvent(SVET_DESTROY);
127  #endif // GRAPHICS_DISABLED
129  exit(0);
130  delete widths_win_;
131  }
132  delete leaders_win_;
133  delete initial_widths_win_;
134  delete chains_win_;
135  delete textlines_win_;
136  delete smoothed_win_;
137  delete diacritics_win_;
138 }
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:445
bool textord_tabfind_only_strokewidths
Definition: strokewidth.cpp:42

Member Function Documentation

◆ CorrectForRotation()

void tesseract::StrokeWidth::CorrectForRotation ( const FCOORD rerotation,
ColPartitionGrid part_grid 
)

Definition at line 251 of file strokewidth.cpp.

252  {
253  Init(part_grid->gridsize(), part_grid->bleft(), part_grid->tright());
254  grid_box_ = TBOX(bleft(), tright());
255  rerotation_.set_x(rotation.x());
256  rerotation_.set_y(-rotation.y());
257 }
const ICOORD & bleft() const
Definition: bbgrid.h:73
Definition: rect.h:34
void set_x(float xin)
rewrite function
Definition: points.h:215
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:447
void set_y(float yin)
rewrite function
Definition: points.h:219
const ICOORD & tright() const
Definition: bbgrid.h:76

◆ FindLeaderPartitions()

void tesseract::StrokeWidth::FindLeaderPartitions ( TO_BLOCK block,
ColPartitionGrid part_grid 
)

Definition at line 260 of file strokewidth.cpp.

261  {
262  Clear();
263  // Find and isolate leaders in the noise list.
264  ColPartition_LIST leader_parts;
265  FindLeadersAndMarkNoise(block, &leader_parts);
266  // Setup the strokewidth grid with the block's remaining (non-noise) blobs.
267  InsertBlobList(&block->blobs);
268  // Mark blobs that have leader neighbours.
269  for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
270  ColPartition* part = it.extract();
271  part->ClaimBoxes();
272  MarkLeaderNeighbours(part, LR_LEFT);
273  MarkLeaderNeighbours(part, LR_RIGHT);
274  part_grid->InsertBBox(true, true, part);
275  }
276 }
void InsertBlobList(BLOBNBOX_LIST *blobs)
Definition: blobgrid.cpp:36
BLOBNBOX_LIST blobs
Definition: blobbox.h:785

◆ FindTextlineDirectionAndFixBrokenCJK()

void tesseract::StrokeWidth::FindTextlineDirectionAndFixBrokenCJK ( PageSegMode  pageseg_mode,
bool  cjk_merge,
TO_BLOCK input_block 
)

Definition at line 157 of file strokewidth.cpp.

159  {
160  // Setup the grid with the remaining (non-noise) blobs.
161  InsertBlobs(input_block);
162  // Repair broken CJK characters if needed.
163  while (cjk_merge && FixBrokenCJK(input_block));
164  // Grade blobs by inspection of neighbours.
165  FindTextlineFlowDirection(pageseg_mode, false);
166  // Clear the grid ready for rotation or leader finding.
167  Clear();
168 }

◆ GradeBlobsIntoPartitions()

void tesseract::StrokeWidth::GradeBlobsIntoPartitions ( PageSegMode  pageseg_mode,
const FCOORD rerotation,
TO_BLOCK block,
Pix *  nontext_pix,
const DENORM denorm,
bool  cjk_script,
TextlineProjection projection,
BLOBNBOX_LIST *  diacritic_blobs,
ColPartitionGrid part_grid,
ColPartition_LIST *  big_parts 
)

Definition at line 344 of file strokewidth.cpp.

348  {
349  nontext_map_ = nontext_pix;
350  projection_ = projection;
351  denorm_ = denorm;
352  // Clear and re Insert to take advantage of the tab stops in the blobs.
353  Clear();
354  // Setup the strokewidth grid with the remaining non-noise, non-leader blobs.
355  InsertBlobs(block);
356 
357  // Run FixBrokenCJK() again if the page is CJK.
358  if (cjk_script) {
359  FixBrokenCJK(block);
360  }
361  FindTextlineFlowDirection(pageseg_mode, false);
362  projection_->ConstructProjection(block, rerotation, nontext_map_);
364  ScrollView* line_blobs_win = MakeWindow(0, 0, "Initial textline Blobs");
365  projection_->PlotGradedBlobs(&block->blobs, line_blobs_win);
366  projection_->PlotGradedBlobs(&block->small_blobs, line_blobs_win);
367  }
368  projection_->MoveNonTextlineBlobs(&block->blobs, &block->noise_blobs);
369  projection_->MoveNonTextlineBlobs(&block->small_blobs, &block->noise_blobs);
370  // Clear and re Insert to take advantage of the removed diacritics.
371  Clear();
372  InsertBlobs(block);
373  FCOORD skew;
374  FindTextlineFlowDirection(pageseg_mode, true);
376  FindInitialPartitions(pageseg_mode, rerotation, true, block,
377  diacritic_blobs, part_grid, big_parts, &skew);
378  if (r == PFR_NOISE) {
379  tprintf("Detected %d diacritics\n", diacritic_blobs->length());
380  // Noise was found, and removed.
381  Clear();
382  InsertBlobs(block);
383  FindTextlineFlowDirection(pageseg_mode, true);
384  r = FindInitialPartitions(pageseg_mode, rerotation, false, block,
385  diacritic_blobs, part_grid, big_parts, &skew);
386  }
387  nontext_map_ = nullptr;
388  projection_ = nullptr;
389  denorm_ = nullptr;
390 }
int textord_tabfind_show_strokewidths
Definition: strokewidth.cpp:41
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:591
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
PartitionFindResult
Definition: strokewidth.h:46
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: points.h:189
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:788
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:787

◆ HandleClick()

void tesseract::StrokeWidth::HandleClick ( int  x,
int  y 
)
virtual

Handles a click event in a display window.

Reimplemented from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >.

Definition at line 401 of file strokewidth.cpp.

401  {
403  // Run a radial search for blobs that overlap.
404  BlobGridSearch radsearch(this);
405  radsearch.StartRadSearch(x, y, 1);
406  BLOBNBOX* neighbour;
407  FCOORD click(static_cast<float>(x), static_cast<float>(y));
408  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
409  TBOX nbox = neighbour->bounding_box();
410  if (nbox.contains(click) && neighbour->cblob() != nullptr) {
411  PrintBoxWidths(neighbour);
412  if (neighbour->neighbour(BND_LEFT) != nullptr)
413  PrintBoxWidths(neighbour->neighbour(BND_LEFT));
414  if (neighbour->neighbour(BND_RIGHT) != nullptr)
415  PrintBoxWidths(neighbour->neighbour(BND_RIGHT));
416  if (neighbour->neighbour(BND_ABOVE) != nullptr)
417  PrintBoxWidths(neighbour->neighbour(BND_ABOVE));
418  if (neighbour->neighbour(BND_BELOW) != nullptr)
419  PrintBoxWidths(neighbour->neighbour(BND_BELOW));
420  int gaps[BND_COUNT];
421  neighbour->NeighbourGaps(gaps);
422  tprintf("Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
423  "Good= %d %d %d %d\n",
424  gaps[BND_LEFT], gaps[BND_RIGHT],
425  gaps[BND_ABOVE], gaps[BND_BELOW],
426  neighbour->horz_possible(),
427  neighbour->vert_possible(),
428  neighbour->good_stroke_neighbour(BND_LEFT),
429  neighbour->good_stroke_neighbour(BND_RIGHT),
430  neighbour->good_stroke_neighbour(BND_ABOVE),
431  neighbour->good_stroke_neighbour(BND_BELOW));
432  break;
433  }
434  }
435 }
Definition: rect.h:34
virtual void HandleClick(int x, int y)
Definition: bbgrid.h:657
bool good_stroke_neighbour(BlobNeighbourDir n) const
Definition: blobbox.h:374
bool horz_possible() const
Definition: blobbox.h:308
void NeighbourGaps(int gaps[BND_COUNT]) const
Definition: blobbox.cpp:182
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool vert_possible() const
Definition: blobbox.h:302
bool contains(const FCOORD pt) const
Definition: rect.h:333
Definition: points.h:189
const TBOX & bounding_box() const
Definition: blobbox.h:231
BLOBNBOX * neighbour(BlobNeighbourDir n) const
Definition: blobbox.h:371
C_BLOB * cblob() const
Definition: blobbox.h:269

◆ RemoveLineResidue()

void tesseract::StrokeWidth::RemoveLineResidue ( ColPartition_LIST *  big_part_list)

Definition at line 280 of file strokewidth.cpp.

280  {
281  BlobGridSearch gsearch(this);
282  BLOBNBOX* bbox;
283  // For every vertical line-like bbox in the grid, search its neighbours
284  // to find the tallest, and if the original box is taller by sufficient
285  // margin, then call it line residue and delete it.
286  gsearch.StartFullSearch();
287  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
288  TBOX box = bbox->bounding_box();
289  if (box.height() < box.width() * kLineResidueAspectRatio)
290  continue;
291  // Set up a rectangle search around the blob to find the size of its
292  // neighbours.
293  int padding = box.height() * kLineResiduePadRatio;
294  TBOX search_box = box;
295  search_box.pad(padding, padding);
296  bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
297  box.bottom());
298  // Find the largest object in the search box not equal to bbox.
299  BlobGridSearch rsearch(this);
300  int max_height = 0;
301  BLOBNBOX* n;
302  rsearch.StartRectSearch(search_box);
303  while ((n = rsearch.NextRectSearch()) != nullptr) {
304  if (n == bbox) continue;
305  TBOX nbox = n->bounding_box();
306  if (nbox.height() > max_height) {
307  max_height = nbox.height();
308  }
309  }
310  if (debug) {
311  tprintf("Max neighbour size=%d for candidate line box at:", max_height);
312  box.print();
313  }
314  if (max_height * kLineResidueSizeRatio < box.height()) {
315  #ifndef GRAPHICS_DISABLED
316  if (leaders_win_ != nullptr) {
317  // We are debugging, so display deleted in pink blobs in the same
318  // window that we use to display leader detection.
319  leaders_win_->Pen(ScrollView::PINK);
320  leaders_win_->Rectangle(box.left(), box.bottom(),
321  box.right(), box.top());
322  }
323  #endif // GRAPHICS_DISABLED
324  ColPartition::MakeBigPartition(bbox, big_part_list);
325  }
326  }
327 }
void print() const
Definition: rect.h:278
Definition: rect.h:34
static bool WithinTestRegion(int detail_level, int x, int y)
const double kLineResidueSizeRatio
Definition: strokewidth.cpp:99
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
int16_t top() const
Definition: rect.h:58
const double kLineResidueAspectRatio
Definition: strokewidth.cpp:95
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:602
void Pen(Color color)
Definition: scrollview.cpp:722
int16_t bottom() const
Definition: rect.h:65
const int kLineResiduePadRatio
Definition: strokewidth.cpp:97
int16_t height() const
Definition: rect.h:108
void pad(int xpad, int ypad)
Definition: rect.h:131

◆ SetNeighboursOnMediumBlobs()

void tesseract::StrokeWidth::SetNeighboursOnMediumBlobs ( TO_BLOCK block)

Definition at line 143 of file strokewidth.cpp.

143  {
144  // Run a preliminary strokewidth neighbour detection on the medium blobs.
145  InsertBlobList(&block->blobs);
146  BLOBNBOX_IT blob_it(&block->blobs);
147  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
148  SetNeighbours(false, false, blob_it.data());
149  }
150  Clear();
151 }
void InsertBlobList(BLOBNBOX_LIST *blobs)
Definition: blobgrid.cpp:36
BLOBNBOX_LIST blobs
Definition: blobbox.h:785

◆ TestVerticalTextDirection()

bool tesseract::StrokeWidth::TestVerticalTextDirection ( double  find_vertical_text_ratio,
TO_BLOCK block,
BLOBNBOX_CLIST *  osd_blobs 
)

Definition at line 209 of file strokewidth.cpp.

211  {
212  int vertical_boxes = 0;
213  int horizontal_boxes = 0;
214  // Count vertical normal and large blobs.
215  BLOBNBOX_CLIST vertical_blobs;
216  BLOBNBOX_CLIST horizontal_blobs;
217  BLOBNBOX_CLIST nondescript_blobs;
218  CollectHorizVertBlobs(&block->blobs, &vertical_boxes, &horizontal_boxes,
219  &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
220  CollectHorizVertBlobs(&block->large_blobs, &vertical_boxes, &horizontal_boxes,
221  &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
223  tprintf("TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n",
224  horizontal_boxes, vertical_boxes,
225  horizontal_blobs.length(), vertical_blobs.length(),
226  nondescript_blobs.length());
227  if (osd_blobs != nullptr && vertical_boxes == 0 && horizontal_boxes == 0) {
228  // Only nondescript blobs available, so return those.
229  BLOBNBOX_C_IT osd_it(osd_blobs);
230  osd_it.add_list_after(&nondescript_blobs);
231  return false;
232  }
233  int min_vert_boxes = static_cast<int>((vertical_boxes + horizontal_boxes) *
234  find_vertical_text_ratio);
235  if (vertical_boxes >= min_vert_boxes) {
236  if (osd_blobs != nullptr) {
237  BLOBNBOX_C_IT osd_it(osd_blobs);
238  osd_it.add_list_after(&vertical_blobs);
239  }
240  return true;
241  } else {
242  if (osd_blobs != nullptr) {
243  BLOBNBOX_C_IT osd_it(osd_blobs);
244  osd_it.add_list_after(&horizontal_blobs);
245  }
246  return false;
247  }
248 }
int textord_debug_tabfind
Definition: alignedblob.cpp:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789

The documentation for this class was generated from the following files: