tesseract  5.0.0-alpha-619-ge9db
ocrblock.cpp File Reference
#include "ocrblock.h"
#include <cstdlib>
#include <memory>
#include "stepblob.h"
#include "tprintf.h"

Go to the source code of this file.

Macros

#define ROW_SPACING   5
 

Functions

void PrintSegmentationStats (BLOCK_LIST *block_list)
 
void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)
 
void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)
 

Macro Definition Documentation

◆ ROW_SPACING

#define ROW_SPACING   5

Function Documentation

◆ ExtractBlobsFromSegmentation()

void ExtractBlobsFromSegmentation ( BLOCK_LIST *  blocks,
C_BLOB_LIST *  output_blob_list 
)

Definition at line 435 of file ocrblock.cpp.

439  {
440  C_BLOB_IT return_list_it(output_blob_list);
441  BLOCK_IT block_it(blocks);
442  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
443  BLOCK* block = block_it.data();
444  ROW_IT row_it(block->row_list());
445  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
446  ROW* row = row_it.data();
447  // Iterate over all werds in the row.
448  WERD_IT werd_it(row->word_list());
449  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
450  WERD* werd = werd_it.data();
451  return_list_it.move_to_last();
452  return_list_it.add_list_after(werd->cblob_list());
453  return_list_it.move_to_last();
454  return_list_it.add_list_after(werd->rej_cblob_list());
455  }

◆ PrintSegmentationStats()

void PrintSegmentationStats ( BLOCK_LIST *  block_list)

Definition at line 403 of file ocrblock.cpp.

405  {
406  int num_blocks = 0;
407  int num_rows = 0;
408  int num_words = 0;
409  int num_blobs = 0;
410  BLOCK_IT block_it(block_list);
411  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
412  BLOCK* block = block_it.data();
413  ++num_blocks;
414  ROW_IT row_it(block->row_list());
415  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
416  ++num_rows;
417  ROW* row = row_it.data();
418  // Iterate over all werds in the row.
419  WERD_IT werd_it(row->word_list());
420  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
421  WERD* werd = werd_it.data();
422  ++num_words;
423  num_blobs += werd->cblob_list()->length();
424  }
425  }
426  }
427  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",

◆ RefreshWordBlobsFromNewBlobs()

void RefreshWordBlobsFromNewBlobs ( BLOCK_LIST *  block_list,
C_BLOB_LIST *  new_blobs,
C_BLOB_LIST *  not_found_blobs 
)

Definition at line 469 of file ocrblock.cpp.

475  {
476  // Now iterate over all the blobs in the segmentation_block_list_, and just
477  // replace the corresponding c-blobs inside the werds.
478  BLOCK_IT block_it(block_list);
479  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
480  BLOCK* block = block_it.data();
481  if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText())
482  continue; // Don't touch non-text blocks.
483  // Iterate over all rows in the block.
484  ROW_IT row_it(block->row_list());
485  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
486  ROW* row = row_it.data();
487  // Iterate over all werds in the row.
488  WERD_IT werd_it(row->word_list());
489  WERD_LIST new_words;
490  WERD_IT new_words_it(&new_words);
491  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
492  WERD* werd = werd_it.extract();
493  WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
494  not_found_blobs);
495  if (new_werd) {
496  // Insert this new werd into the actual row's werd-list. Remove the
497  // existing one.
498  new_words_it.add_after_then_move(new_werd);
499  delete werd;
500  } else {
501  // Reinsert the older word back, for lack of better options.
502  // This is critical since dropping the words messes up segmentation:
503  // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
504  new_words_it.add_after_then_move(werd);
505  }
506  }
507  // Get rid of the old word list & replace it with the new one.
508  row->word_list()->clear();
509  werd_it.move_to_first();
POLY_BLOCK::IsText
bool IsText() const
Definition: polyblk.h:62
BLOCK::row_list
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:115
WERD::ConstructWerdWithNewBlobs
WERD * ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
Definition: werd.cpp:387
WERD::cblob_list
C_BLOB_LIST * cblob_list()
Definition: werd.h:94
BLOCK
Definition: ocrblock.h:28
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
WERD
Definition: werd.h:55
ROW
Definition: ocrrow.h:35
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
ROW::word_list
WERD_LIST * word_list()
Definition: ocrrow.h:54
WERD::rej_cblob_list
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:89