All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::BaselineBlock Class Reference

#include <baselinedetect.h>

Public Member Functions

 BaselineBlock (int debug_level, bool non_text, TO_BLOCK *block)
 
TO_BLOCKblock () const
 
double skew_angle () const
 
bool FitBaselinesAndFindSkew (bool use_box_bottoms)
 
void ParallelizeBaselines (double default_block_skew)
 
void SetupBlockParameters () const
 
void PrepareForSplineFitting (ICOORD page_tr, bool remove_noise)
 
void FitBaselineSplines (bool enable_splines, bool show_final_rows, Textord *textord)
 
void DrawFinalRows (const ICOORD &page_tr)
 
void DrawPixSpline (Pix *pix_in)
 

Static Public Member Functions

static double SpacingModelError (double perp_disp, double line_spacing, double line_offset)
 

Detailed Description

Definition at line 129 of file baselinedetect.h.

Constructor & Destructor Documentation

tesseract::BaselineBlock::BaselineBlock ( int  debug_level,
bool  non_text,
TO_BLOCK block 
)

Definition at line 403 of file baselinedetect.cpp.

404  : block_(block), debug_level_(debug_level), non_text_block_(non_text),
405  good_skew_angle_(false), skew_angle_(0.0),
406  line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
407  TO_ROW_IT row_it(block_->get_rows());
408  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
409  // Sort the blobs on the rows.
410  row_it.data()->blob_list()->sort(blob_x_order);
411  rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
412  }
413 }
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2605
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float line_spacing
Definition: blobbox.h:775

Member Function Documentation

TO_BLOCK* tesseract::BaselineBlock::block ( ) const
inline

Definition at line 133 of file baselinedetect.h.

133  {
134  return block_;
135  }
void tesseract::BaselineBlock::DrawFinalRows ( const ICOORD page_tr)

Definition at line 572 of file baselinedetect.cpp.

572  {
573 #ifndef GRAPHICS_DISABLED
574  if (non_text_block_) return;
575  double gradient = tan(skew_angle_);
576  FCOORD rotation(1.0f, 0.0f);
577  int left_edge = block_->block->bounding_box().left();
578  ScrollView* win = create_to_win(page_tr);
580  TO_ROW_IT row_it = block_->get_rows();
581  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
582  plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
583  colour = static_cast<ScrollView::Color>(colour + 1);
584  if (colour > ScrollView::MAGENTA)
585  colour = ScrollView::RED;
586  }
588  // Show discarded blobs.
589  plot_blob_list(win, &block_->underlines,
591  if (block_->blobs.length() > 0)
592  tprintf("%d blobs discarded as noise\n", block_->blobs.length());
593  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
594 #endif
595 }
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
#define tprintf(...)
Definition: tprintf.h:31
BLOBNBOX_LIST underlines
Definition: blobbox.h:769
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
void draw_meanlines(TO_BLOCK *block, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:210
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1080
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: points.h:189
BLOCK * block
Definition: blobbox.h:773
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void tesseract::BaselineBlock::DrawPixSpline ( Pix *  pix_in)

Definition at line 597 of file baselinedetect.cpp.

597  {
598  if (non_text_block_) return;
599  TO_ROW_IT row_it = block_->get_rows();
600  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
601  row_it.data()->baseline.plot(pix_in);
602  }
603 }
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
bool tesseract::BaselineBlock::FitBaselinesAndFindSkew ( bool  use_box_bottoms)

Definition at line 429 of file baselinedetect.cpp.

429  {
430  if (non_text_block_) return false;
431  GenericVector<double> angles;
432  for (int r = 0; r < rows_.size(); ++r) {
433  BaselineRow* row = rows_[r];
434  if (row->FitBaseline(use_box_bottoms)) {
435  double angle = row->BaselineAngle();
436  angles.push_back(angle);
437  }
438  if (debug_level_ > 1)
439  row->Print();
440  }
441 
442  if (!angles.empty()) {
443  skew_angle_ = MedianOfCircularValues(M_PI, &angles);
444  good_skew_angle_ = true;
445  } else {
446  skew_angle_ = 0.0f;
447  good_skew_angle_ = false;
448  }
449  if (debug_level_ > 0) {
450  tprintf("Initial block skew angle = %g, good = %d\n",
451  skew_angle_, good_skew_angle_);
452  }
453  return good_skew_angle_;
454 }
T MedianOfCircularValues(T modulus, GenericVector< T > *v)
Definition: linlsq.h:111
int push_back(T object)
#define tprintf(...)
Definition: tprintf.h:31
bool empty() const
Definition: genericvector.h:84
void tesseract::BaselineBlock::FitBaselineSplines ( bool  enable_splines,
bool  show_final_rows,
Textord textord 
)

Definition at line 543 of file baselinedetect.cpp.

545  {
546  double gradient = tan(skew_angle_);
547  FCOORD rotation(1.0f, 0.0f);
548 
549  if (enable_splines) {
550  textord->make_spline_rows(block_, gradient, show_final_rows);
551  } else {
552  // Make a fake spline from the existing line.
553  TBOX block_box= block_->block->bounding_box();
554  TO_ROW_IT row_it = block_->get_rows();
555  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
556  TO_ROW* row = row_it.data();
557  inT32 xstarts[2] = { block_box.left(), block_box.right() };
558  double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
559  row->baseline = QSPLINE(1, xstarts, coeffs);
560  textord->compute_row_xheight(row, block_->block->classify_rotation(),
561  row->line_m(), block_->line_size);
562  }
563  }
564  textord->compute_block_xheight(block_, gradient);
565  block_->block->set_xheight(block_->xheight);
566  if (textord_restore_underlines) // fix underlines
567  restore_underlined_blobs(block_);
568 }
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:38
float line_c() const
Definition: blobbox.h:569
void set_xheight(inT32 height)
set char size
Definition: ocrblock.h:72
QSPLINE baseline
Definition: blobbox.h:666
inT16 right() const
Definition: rect.h:75
float xheight
Definition: blobbox.h:784
float line_m() const
Definition: blobbox.h:566
FCOORD classify_rotation() const
Definition: ocrblock.h:144
inT16 left() const
Definition: rect.h:68
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
EXTERN bool textord_restore_underlines
Definition: underlin.cpp:30
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: rect.h:30
Definition: points.h:189
BLOCK * block
Definition: blobbox.h:773
float line_size
Definition: blobbox.h:781
int inT32
Definition: host.h:102
void tesseract::BaselineBlock::ParallelizeBaselines ( double  default_block_skew)

Definition at line 458 of file baselinedetect.cpp.

458  {
459  if (non_text_block_) return;
460  if (!good_skew_angle_) skew_angle_ = default_block_skew;
461  if (debug_level_ > 0)
462  tprintf("Adjusting block to skew angle %g\n", skew_angle_);
463  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
464  for (int r = 0; r < rows_.size(); ++r) {
465  BaselineRow* row = rows_[r];
466  row->AdjustBaselineToParallel(debug_level_, direction);
467  if (debug_level_ > 1)
468  row->Print();
469  }
470  if (rows_.size() < 3 || !ComputeLineSpacing())
471  return;
472  // Enforce the line spacing model on all lines that don't yet have a good
473  // baseline.
474  // Start by finding the row that is best fitted to the model.
475  int best_row = 0;
476  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
477  line_spacing_, line_offset_);
478  for (int r = 1; r < rows_.size(); ++r) {
479  double error = SpacingModelError(rows_[r]->PerpDisp(direction),
480  line_spacing_, line_offset_);
481  if (error < best_error) {
482  best_error = error;
483  best_row = r;
484  }
485  }
486  // Starting at the best fitting row, work outwards, syncing the offset.
487  double offset = line_offset_;
488  for (int r = best_row + 1; r < rows_.size(); ++r) {
489  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
490  line_spacing_, offset);
491  }
492  offset = line_offset_;
493  for (int r = best_row - 1; r >= 0; --r) {
494  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
495  line_spacing_, offset);
496  }
497 }
#define tprintf(...)
Definition: tprintf.h:31
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
Definition: points.h:189
void tesseract::BaselineBlock::PrepareForSplineFitting ( ICOORD  page_tr,
bool  remove_noise 
)

Definition at line 527 of file baselinedetect.cpp.

527  {
528  if (non_text_block_) return;
529  if (remove_noise) {
530  vigorous_noise_removal(block_);
531  }
532  FCOORD rotation(1.0f, 0.0f);
533  double gradient = tan(skew_angle_);
534  separate_underlines(block_, gradient, rotation, true);
535  pre_associate_blobs(page_tr, block_, rotation, true);
536 }
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:1803
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:1876
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:473
Definition: points.h:189
void tesseract::BaselineBlock::SetupBlockParameters ( ) const

Definition at line 500 of file baselinedetect.cpp.

500  {
501  if (line_spacing_ > 0.0) {
502  // Where was block_line_spacing set before?
503  float min_spacing = MIN(block_->line_spacing, line_spacing_);
504  if (min_spacing < block_->line_size)
505  block_->line_size = min_spacing;
506  block_->line_spacing = line_spacing_;
507  block_->baseline_offset = line_offset_;
508  block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
509  }
510  // Setup the parameters on all the rows.
511  TO_ROW_IT row_it(block_->get_rows());
512  for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
513  BaselineRow* row = rows_[r];
514  TO_ROW* to_row = row_it.data();
515  row->SetupOldLineParameters(to_row);
516  }
517 }
#define MIN(x, y)
Definition: ndminx.h:28
float baseline_offset
Definition: blobbox.h:783
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
const double kMaxBlobSizeMultiple
float line_spacing
Definition: blobbox.h:775
float max_blob_size
Definition: blobbox.h:782
float line_size
Definition: blobbox.h:781
double tesseract::BaselineBlock::skew_angle ( ) const
inline

Definition at line 136 of file baselinedetect.h.

136  {
137  return skew_angle_;
138  }
double tesseract::BaselineBlock::SpacingModelError ( double  perp_disp,
double  line_spacing,
double  line_offset 
)
static

Definition at line 417 of file baselinedetect.cpp.

418  {
419  // Round to the nearest multiple of line_spacing + line offset.
420  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
421  double model_y = line_spacing * multiple + line_offset;
422  return fabs(perp_disp - model_y);
423 }
int IntCastRounded(double x)
Definition: helpers.h:172

The documentation for this class was generated from the following files: