All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::BaselineRow Class Reference

#include <baselinedetect.h>

Public Member Functions

 BaselineRow (double line_size, TO_ROW *to_row)
 
const TBOXbounding_box () const
 
void SetupOldLineParameters (TO_ROW *row) const
 
void Print () const
 
double BaselineAngle () const
 
double SpaceBetween (const BaselineRow &other) const
 
double PerpDisp (const FCOORD &direction) const
 
double StraightYAtX (double x) const
 
bool FitBaseline (bool use_box_bottoms)
 
void AdjustBaselineToParallel (int debug, const FCOORD &direction)
 
double AdjustBaselineToGrid (int debug, const FCOORD &direction, double line_spacing, double line_offset)
 

Detailed Description

Definition at line 40 of file baselinedetect.h.

Constructor & Destructor Documentation

tesseract::BaselineRow::BaselineRow ( double  line_size,
TO_ROW to_row 
)

Definition at line 65 of file baselinedetect.cpp.

66  : blobs_(to_row->blob_list()),
67  baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
68  baseline_error_(0.0), good_baseline_(false) {
69  ComputeBoundingBox();
70  // Compute a scale factor for rounding to ints.
71  disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
72  fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
73  max_baseline_error_ = kMaxBaselineError * line_spacing;
74 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
const double kOffsetQuantizationFactor
const double kMaxBaselineError
const double kFitHalfrangeFactor

Member Function Documentation

double tesseract::BaselineRow::AdjustBaselineToGrid ( int  debug,
const FCOORD direction,
double  line_spacing,
double  line_offset 
)

Definition at line 229 of file baselinedetect.cpp.

232  {
233  if (blobs_->empty()) {
234  if (debug > 1) {
235  tprintf("Row empty at:");
236  bounding_box_.print();
237  }
238  return line_offset;
239  }
240  // Find the displacement_modes_ entry nearest to the grid.
241  double best_error = 0.0;
242  int best_index = -1;
243  for (int i = 0; i < displacement_modes_.size(); ++i) {
244  double blob_y = displacement_modes_[i];
245  double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
246  line_offset);
247  if (debug > 1) {
248  tprintf("Mode at %g has error %g from model \n", blob_y, error);
249  }
250  if (best_index < 0 || error < best_error) {
251  best_error = error;
252  best_index = i;
253  }
254  }
255  // We will move the baseline only if the chosen mode is close enough to the
256  // model.
257  double model_margin = max_baseline_error_ - best_error;
258  if (best_index >= 0 && model_margin > 0.0) {
259  // But if the current baseline is already close to the mode there is no
260  // point, and only the potential to damage accuracy by changing its angle.
261  double perp_disp = PerpDisp(direction);
262  double shift = displacement_modes_[best_index] - perp_disp;
263  if (fabs(shift) > max_baseline_error_) {
264  if (debug > 1) {
265  tprintf("Attempting linespacing model fit with mode %g to row at:",
266  displacement_modes_[best_index]);
267  bounding_box_.print();
268  }
269  FitConstrainedIfBetter(debug, direction, model_margin,
270  displacement_modes_[best_index]);
271  } else if (debug > 1) {
272  tprintf("Linespacing model only moves current line by %g for row at:",
273  shift);
274  bounding_box_.print();
275  }
276  } else if (debug > 1) {
277  tprintf("Linespacing model not close enough to any mode for row at:");
278  bounding_box_.print();
279  }
280  return fmod(PerpDisp(direction), line_spacing);
281 }
int size() const
Definition: genericvector.h:72
#define tprintf(...)
Definition: tprintf.h:31
void print() const
Definition: rect.h:270
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
double PerpDisp(const FCOORD &direction) const
void tesseract::BaselineRow::AdjustBaselineToParallel ( int  debug,
const FCOORD direction 
)

Definition at line 214 of file baselinedetect.cpp.

215  {
216  SetupBlobDisplacements(direction);
217  if (displacement_modes_.empty())
218  return;
219 #ifdef kDebugYCoord
220  if (bounding_box_.bottom() < kDebugYCoord &&
221  bounding_box_.top() > kDebugYCoord && debug < 3)
222  debug = 3;
223 #endif
224  FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
225 }
inT16 bottom() const
Definition: rect.h:61
bool empty() const
Definition: genericvector.h:84
inT16 top() const
Definition: rect.h:54
double tesseract::BaselineRow::BaselineAngle ( ) const

Definition at line 98 of file baselinedetect.cpp.

98  {
99  FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
100  double angle = baseline_dir.angle();
101  // Baseline directions are only unique in a range of pi so constrain to
102  // [-pi/2, pi/2].
103  return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
104 }
Definition: points.h:189
const TBOX& tesseract::BaselineRow::bounding_box ( ) const
inline

Definition at line 44 of file baselinedetect.h.

44  {
45  return bounding_box_;
46  }
bool tesseract::BaselineRow::FitBaseline ( bool  use_box_bottoms)

Definition at line 142 of file baselinedetect.cpp.

142  {
143  // Deterministic fitting is used wherever possible.
144  fitter_.Clear();
145  // Linear least squares is a backup if the DetLineFit produces a bad line.
146  LLSQ llsq;
147  BLOBNBOX_IT blob_it(blobs_);
148 
149  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
150  BLOBNBOX* blob = blob_it.data();
151  if (!use_box_bottoms) blob->EstimateBaselinePosition();
152  const TBOX& box = blob->bounding_box();
153  int x_middle = (box.left() + box.right()) / 2;
154 #ifdef kDebugYCoord
155  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
156  tprintf("Box bottom = %d, baseline pos=%d for box at:",
157  box.bottom(), blob->baseline_position());
158  box.print();
159  }
160 #endif
161  fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
162  llsq.add(x_middle, blob->baseline_position());
163  }
164  // Fit the line.
165  ICOORD pt1, pt2;
166  baseline_error_ = fitter_.Fit(&pt1, &pt2);
167  baseline_pt1_ = pt1;
168  baseline_pt2_ = pt2;
169  if (baseline_error_ > max_baseline_error_ &&
171  // The fit was bad but there were plenty of points, so try skipping
172  // the first and last few, and use the new line if it dramatically improves
173  // the error of fit.
174  double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
175  if (error < baseline_error_ / 2.0) {
176  baseline_error_ = error;
177  baseline_pt1_ = pt1;
178  baseline_pt2_ = pt2;
179  }
180  }
181  int debug = 0;
182 #ifdef kDebugYCoord
183  Print();
184  debug = bounding_box_.bottom() < kDebugYCoord &&
185  bounding_box_.top() > kDebugYCoord
186  ? 3 : 2;
187 #endif
188  // Now we obtained a direction from that fit, see if we can improve the
189  // fit using the same direction and some other start point.
190  FCOORD direction(pt2 - pt1);
191  double target_offset = direction * pt1;
192  good_baseline_ = false;
193  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
194  // Wild lines can be produced because DetLineFit allows vertical lines, but
195  // vertical text has been rotated so angles over pi/4 should be disallowed.
196  // Near vertical lines can still be produced by vertically aligned components
197  // on very short lines.
198  double angle = BaselineAngle();
199  if (fabs(angle) > M_PI * 0.25) {
200  // Use the llsq fit as a backup.
201  baseline_pt1_ = llsq.mean_point();
202  baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
203  // TODO(rays) get rid of this when m and c are no longer used.
204  double m = llsq.m();
205  double c = llsq.c(m);
206  baseline_error_ = llsq.rms(m, c);
207  good_baseline_ = false;
208  }
209  return good_baseline_;
210 }
double BaselineAngle() const
#define tprintf(...)
Definition: tprintf.h:31
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
void print() const
Definition: rect.h:270
bool SufficientPointsForIndependentFit() const
Definition: detlinefit.cpp:163
double rms(double m, double c) const
Definition: linlsq.cpp:131
inT16 right() const
Definition: rect.h:75
Definition: linlsq.h:26
double m() const
Definition: linlsq.cpp:101
void EstimateBaselinePosition()
Definition: blobbox.cpp:350
inT16 left() const
Definition: rect.h:68
double c(double m) const
Definition: linlsq.cpp:117
int baseline_position() const
Definition: blobbox.h:374
void add(double x, double y)
Definition: linlsq.cpp:49
const int kNumSkipPoints
integer coordinate
Definition: points.h:30
inT16 bottom() const
Definition: rect.h:61
inT16 width() const
Definition: rect.h:111
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
FCOORD mean_point() const
Definition: linlsq.cpp:167
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
Definition: points.h:189
double tesseract::BaselineRow::PerpDisp ( const FCOORD direction) const

Definition at line 121 of file baselinedetect.cpp.

121  {
122  float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
123  FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
124  return direction * middle_pos / direction.length();
125 }
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
float length() const
find length
Definition: points.h:230
double StraightYAtX(double x) const
Definition: points.h:189
void tesseract::BaselineRow::Print ( ) const

Definition at line 87 of file baselinedetect.cpp.

87  {
88  tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
89  baseline_pt1_.x(), baseline_pt1_.y(),
90  baseline_pt2_.x(), baseline_pt2_.y(),
91  BaselineAngle(), StraightYAtX(0.0));
92  tprintf("Quant factor=%g, error=%g, good=%d, box:",
93  disp_quant_factor_, baseline_error_, good_baseline_);
94  bounding_box_.print();
95 }
float x() const
Definition: points.h:209
double BaselineAngle() const
#define tprintf(...)
Definition: tprintf.h:31
void print() const
Definition: rect.h:270
float y() const
Definition: points.h:212
double StraightYAtX(double x) const
void tesseract::BaselineRow::SetupOldLineParameters ( TO_ROW row) const

Definition at line 77 of file baselinedetect.cpp.

77  {
78  // TODO(rays) get rid of this when m and c are no longer used.
79  double gradient = tan(BaselineAngle());
80  // para_c is the actual intercept of the baseline on the y-axis.
81  float para_c = StraightYAtX(0.0);
82  row->set_line(gradient, para_c, baseline_error_);
83  row->set_parallel_line(gradient, para_c, baseline_error_);
84 }
double BaselineAngle() const
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:607
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:599
double StraightYAtX(double x) const
double tesseract::BaselineRow::SpaceBetween ( const BaselineRow other) const

Definition at line 108 of file baselinedetect.cpp.

108  {
109  // Find the x-centre of overlap of the lines.
110  float x = (MAX(bounding_box_.left(), other.bounding_box_.left()) +
111  MIN(bounding_box_.right(), other.bounding_box_.right())) / 2.0f;
112  // Find the vertical centre between them.
113  float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
114  // Find the perpendicular distance of (x,y) from each line.
115  FCOORD pt(x, y);
116  return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
117 }
#define MAX(x, y)
Definition: ndminx.h:24
#define MIN(x, y)
Definition: ndminx.h:28
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
double StraightYAtX(double x) const
Definition: points.h:189
double tesseract::BaselineRow::StraightYAtX ( double  x) const

Definition at line 129 of file baselinedetect.cpp.

129  {
130  double denominator = baseline_pt2_.x() - baseline_pt1_.x();
131  if (denominator == 0.0)
132  return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
133  return baseline_pt1_.y() +
134  (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
135  denominator;
136 }
float x() const
Definition: points.h:209
float y() const
Definition: points.h:212

The documentation for this class was generated from the following files: