tesseract  5.0.0-alpha-619-ge9db
tesseract::BaselineBlock Class Reference

#include <baselinedetect.h>

Public Member Functions

 BaselineBlock (int debug_level, bool non_text, TO_BLOCK *block)
 
TO_BLOCKblock () const
 
double skew_angle () const
 
bool FitBaselinesAndFindSkew (bool use_box_bottoms)
 
void ParallelizeBaselines (double default_block_skew)
 
void SetupBlockParameters () const
 
void PrepareForSplineFitting (ICOORD page_tr, bool remove_noise)
 
void FitBaselineSplines (bool enable_splines, bool show_final_rows, Textord *textord)
 
void DrawFinalRows (const ICOORD &page_tr)
 
void DrawPixSpline (Pix *pix_in)
 

Static Public Member Functions

static double SpacingModelError (double perp_disp, double line_spacing, double line_offset)
 

Detailed Description

Definition at line 129 of file baselinedetect.h.

Constructor & Destructor Documentation

◆ BaselineBlock()

tesseract::BaselineBlock::BaselineBlock ( int  debug_level,
bool  non_text,
TO_BLOCK block 
)

Definition at line 408 of file baselinedetect.cpp.

409  : block_(block), debug_level_(debug_level), non_text_block_(non_text),
410  good_skew_angle_(false), skew_angle_(0.0),
411  line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
412  TO_ROW_IT row_it(block_->get_rows());
413  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
414  // Sort the blobs on the rows.
415  row_it.data()->blob_list()->sort(blob_x_order);
416  rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
417  }
418 }

Member Function Documentation

◆ block()

TO_BLOCK* tesseract::BaselineBlock::block ( ) const
inline

Definition at line 133 of file baselinedetect.h.

133  {
134  return block_;
135  }

◆ DrawFinalRows()

void tesseract::BaselineBlock::DrawFinalRows ( const ICOORD page_tr)

Definition at line 577 of file baselinedetect.cpp.

577  {
578 #ifndef GRAPHICS_DISABLED
579  if (non_text_block_) return;
580  double gradient = tan(skew_angle_);
581  FCOORD rotation(1.0f, 0.0f);
582  int left_edge = block_->block->pdblk.bounding_box().left();
583  ScrollView* win = create_to_win(page_tr);
585  TO_ROW_IT row_it = block_->get_rows();
586  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
587  plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
588  colour = static_cast<ScrollView::Color>(colour + 1);
589  if (colour > ScrollView::MAGENTA)
590  colour = ScrollView::RED;
591  }
593  // Show discarded blobs.
594  plot_blob_list(win, &block_->underlines,
596  if (block_->blobs.length() > 0)
597  tprintf("%d blobs discarded as noise\n", block_->blobs.length());
598  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
599 #endif
600 }

◆ DrawPixSpline()

void tesseract::BaselineBlock::DrawPixSpline ( Pix *  pix_in)

Definition at line 602 of file baselinedetect.cpp.

602  {
603  if (non_text_block_) return;
604  TO_ROW_IT row_it = block_->get_rows();
605  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
606  row_it.data()->baseline.plot(pix_in);
607  }
608 }

◆ FitBaselinesAndFindSkew()

bool tesseract::BaselineBlock::FitBaselinesAndFindSkew ( bool  use_box_bottoms)

Definition at line 434 of file baselinedetect.cpp.

434  {
435  if (non_text_block_) return false;
436  GenericVector<double> angles;
437  for (int r = 0; r < rows_.size(); ++r) {
438  BaselineRow* row = rows_[r];
439  if (row->FitBaseline(use_box_bottoms)) {
440  double angle = row->BaselineAngle();
441  angles.push_back(angle);
442  }
443  if (debug_level_ > 1)
444  row->Print();
445  }
446 
447  if (!angles.empty()) {
448  skew_angle_ = MedianOfCircularValues(M_PI, &angles);
449  good_skew_angle_ = true;
450  } else {
451  skew_angle_ = 0.0f;
452  good_skew_angle_ = false;
453  }
454  if (debug_level_ > 0) {
455  tprintf("Initial block skew angle = %g, good = %d\n",
456  skew_angle_, good_skew_angle_);
457  }
458  return good_skew_angle_;
459 }

◆ FitBaselineSplines()

void tesseract::BaselineBlock::FitBaselineSplines ( bool  enable_splines,
bool  show_final_rows,
Textord textord 
)

Definition at line 548 of file baselinedetect.cpp.

550  {
551  double gradient = tan(skew_angle_);
552  FCOORD rotation(1.0f, 0.0f);
553 
554  if (enable_splines) {
555  textord->make_spline_rows(block_, gradient, show_final_rows);
556  } else {
557  // Make a fake spline from the existing line.
558  TBOX block_box= block_->block->pdblk.bounding_box();
559  TO_ROW_IT row_it = block_->get_rows();
560  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
561  TO_ROW* row = row_it.data();
562  int32_t xstarts[2] = { block_box.left(), block_box.right() };
563  double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
564  row->baseline = QSPLINE(1, xstarts, coeffs);
565  textord->compute_row_xheight(row, block_->block->classify_rotation(),
566  row->line_m(), block_->line_size);
567  }
568  }
569  textord->compute_block_xheight(block_, gradient);
570  block_->block->set_xheight(block_->xheight);
571  if (textord_restore_underlines) // fix underlines
572  restore_underlined_blobs(block_);
573 }

◆ ParallelizeBaselines()

void tesseract::BaselineBlock::ParallelizeBaselines ( double  default_block_skew)

Definition at line 463 of file baselinedetect.cpp.

463  {
464  if (non_text_block_) return;
465  if (!good_skew_angle_) skew_angle_ = default_block_skew;
466  if (debug_level_ > 0)
467  tprintf("Adjusting block to skew angle %g\n", skew_angle_);
468  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
469  for (int r = 0; r < rows_.size(); ++r) {
470  BaselineRow* row = rows_[r];
471  row->AdjustBaselineToParallel(debug_level_, direction);
472  if (debug_level_ > 1)
473  row->Print();
474  }
475  if (rows_.size() < 3 || !ComputeLineSpacing())
476  return;
477  // Enforce the line spacing model on all lines that don't yet have a good
478  // baseline.
479  // Start by finding the row that is best fitted to the model.
480  int best_row = 0;
481  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
482  line_spacing_, line_offset_);
483  for (int r = 1; r < rows_.size(); ++r) {
484  double error = SpacingModelError(rows_[r]->PerpDisp(direction),
485  line_spacing_, line_offset_);
486  if (error < best_error) {
487  best_error = error;
488  best_row = r;
489  }
490  }
491  // Starting at the best fitting row, work outwards, syncing the offset.
492  double offset = line_offset_;
493  for (int r = best_row + 1; r < rows_.size(); ++r) {
494  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
495  line_spacing_, offset);
496  }
497  offset = line_offset_;
498  for (int r = best_row - 1; r >= 0; --r) {
499  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
500  line_spacing_, offset);
501  }
502 }

◆ PrepareForSplineFitting()

void tesseract::BaselineBlock::PrepareForSplineFitting ( ICOORD  page_tr,
bool  remove_noise 
)

Definition at line 532 of file baselinedetect.cpp.

532  {
533  if (non_text_block_) return;
534  if (remove_noise) {
535  vigorous_noise_removal(block_);
536  }
537  FCOORD rotation(1.0f, 0.0f);
538  double gradient = tan(skew_angle_);
539  separate_underlines(block_, gradient, rotation, true);
540  pre_associate_blobs(page_tr, block_, rotation, true);
541 }

◆ SetupBlockParameters()

void tesseract::BaselineBlock::SetupBlockParameters ( ) const

Definition at line 505 of file baselinedetect.cpp.

505  {
506  if (line_spacing_ > 0.0) {
507  // Where was block_line_spacing set before?
508  float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_));
509  if (min_spacing < block_->line_size)
510  block_->line_size = min_spacing;
511  block_->line_spacing = line_spacing_;
512  block_->baseline_offset = line_offset_;
513  block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
514  }
515  // Setup the parameters on all the rows.
516  TO_ROW_IT row_it(block_->get_rows());
517  for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
518  BaselineRow* row = rows_[r];
519  TO_ROW* to_row = row_it.data();
520  row->SetupOldLineParameters(to_row);
521  }
522 }

◆ skew_angle()

double tesseract::BaselineBlock::skew_angle ( ) const
inline

Definition at line 136 of file baselinedetect.h.

136  {
137  return skew_angle_;
138  }

◆ SpacingModelError()

double tesseract::BaselineBlock::SpacingModelError ( double  perp_disp,
double  line_spacing,
double  line_offset 
)
static

Definition at line 422 of file baselinedetect.cpp.

423  {
424  // Round to the nearest multiple of line_spacing + line offset.
425  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
426  double model_y = line_spacing * multiple + line_offset;
427  return fabs(perp_disp - model_y);
428 }

The documentation for this class was generated from the following files:
ScrollView
Definition: scrollview.h:97
TO_BLOCK::baseline_offset
float baseline_offset
Definition: blobbox.h:786
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:58
BLOCK::set_xheight
void set_xheight(int32_t height)
set char size
Definition: ocrblock.h:67
create_to_win
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:42
separate_underlines
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1772
TO_ROW::line_m
float line_m() const
Definition: blobbox.h:570
QSPLINE
Definition: quspline.h:31
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
plot_parallel_row
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:117
IntCastRounded
int IntCastRounded(double x)
Definition: helpers.h:173
FCOORD
Definition: points.h:187
TO_BLOCK::underlines
BLOBNBOX_LIST underlines
Definition: blobbox.h:772
blob_x_order
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2573
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
TO_BLOCK::block
BLOCK * block
Definition: blobbox.h:776
ScrollView::MAGENTA
Definition: scrollview.h:109
TO_BLOCK::xheight
float xheight
Definition: blobbox.h:787
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
ScrollView::YELLOW
Definition: scrollview.h:105
ScrollView::WHITE
Definition: scrollview.h:103
tesseract::BaselineBlock::SpacingModelError
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
Definition: baselinedetect.cpp:422
textord_restore_underlines
bool textord_restore_underlines
Definition: underlin.cpp:21
draw_meanlines
void draw_meanlines(TO_BLOCK *block, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:200
TO_BLOCK::line_spacing
float line_spacing
Definition: blobbox.h:778
TO_ROW::line_c
float line_c() const
Definition: blobbox.h:573
ScrollView::RED
Definition: scrollview.h:104
GenericVector< double >
TO_BLOCK::get_rows
TO_ROW_LIST * get_rows()
Definition: blobbox.h:703
tesseract::BaselineBlock::block
TO_BLOCK * block() const
Definition: baselinedetect.h:133
kMaxBlobSizeMultiple
const double kMaxBlobSizeMultiple
Definition: baselinedetect.cpp:55
TBOX::left
int16_t left() const
Definition: rect.h:71
TBOX::right
int16_t right() const
Definition: rect.h:78
restore_underlined_blobs
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:28
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
BLOCK::classify_rotation
FCOORD classify_rotation() const
Definition: ocrblock.h:139
TO_ROW
Definition: blobbox.h:543
ScrollView::CORAL
Definition: scrollview.h:119
ScrollView::Color
Color
Definition: scrollview.h:100
MedianOfCircularValues
T MedianOfCircularValues(T modulus, GenericVector< T > *v)
Definition: linlsq.h:112
TO_ROW::baseline
QSPLINE baseline
Definition: blobbox.h:669
TO_BLOCK::max_blob_size
float max_blob_size
Definition: blobbox.h:785
vigorous_noise_removal
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:466
pre_associate_blobs
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1845
plot_blob_list
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1068
TBOX
Definition: rect.h:33
TO_BLOCK::line_size
float line_size
Definition: blobbox.h:784