tesseract  5.0.0-alpha-619-ge9db
baselinedetect.cpp
Go to the documentation of this file.
1 // File: baselinedetect.cpp
3 // Description: Initial Baseline Determination.
4 // Copyright 2012 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #define _USE_MATH_DEFINES // for M_PI
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "baselinedetect.h"
26 
27 #include <algorithm>
28 #include <cfloat> // for FLT_MAX
29 #include <cmath> // for M_PI
30 #include "allheaders.h"
31 #include "blobbox.h"
32 #include "detlinefit.h"
33 #include "drawtord.h"
34 #include <tesseract/helpers.h>
35 #include "linlsq.h"
36 #include "makerow.h"
37 #include "textord.h"
38 #include "tprintf.h"
39 #include "underlin.h"
40 
41 // Number of displacement modes kept in displacement_modes_;
42 const int kMaxDisplacementsModes = 3;
43 // Number of points to skip when retrying initial fit.
44 const int kNumSkipPoints = 3;
45 // Max angle deviation (in radians) allowed to keep the independent baseline.
46 const double kMaxSkewDeviation = 1.0 / 64;
47 // Fraction of line spacing estimate for quantization of blob displacements.
48 const double kOffsetQuantizationFactor = 3.0 / 64;
49 // Fraction of line spacing estimate for computing blob fit error.
50 const double kFitHalfrangeFactor = 6.0 / 64;
51 // Max fraction of line spacing allowed before a baseline counts as badly fitting.
52 const double kMaxBaselineError = 3.0 / 64;
53 // Multiple of linespacing that sets max_blob_size in TO_BLOCK.
54 // Copied from textord_excess_blobsize.
55 const double kMaxBlobSizeMultiple = 1.3;
56 // Min fraction of linespacing gaps that should be close to the model before
57 // we will force the linespacing model on all the lines.
58 const double kMinFittingLinespacings = 0.25;
59 // A y-coordinate within a textline that is to be debugged.
60 //#define kDebugYCoord 1525
61 
62 namespace tesseract {
63 
64 BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
65  : blobs_(to_row->blob_list()),
66  baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
67  baseline_error_(0.0), good_baseline_(false) {
68  ComputeBoundingBox();
69  // Compute a scale factor for rounding to ints.
70  disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
71  fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
72  max_baseline_error_ = kMaxBaselineError * line_spacing;
73 }
74 
75 // Sets the TO_ROW with the output straight line.
77  // TODO(rays) get rid of this when m and c are no longer used.
78  double gradient = tan(BaselineAngle());
79  // para_c is the actual intercept of the baseline on the y-axis.
80  float para_c = StraightYAtX(0.0);
81  row->set_line(gradient, para_c, baseline_error_);
82  row->set_parallel_line(gradient, para_c, baseline_error_);
83 }
84 
85 // Outputs diagnostic information.
86 void BaselineRow::Print() const {
87  tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
88  baseline_pt1_.x(), baseline_pt1_.y(),
89  baseline_pt2_.x(), baseline_pt2_.y(),
90  BaselineAngle(), StraightYAtX(0.0));
91  tprintf("Quant factor=%g, error=%g, good=%d, box:",
92  disp_quant_factor_, baseline_error_, good_baseline_);
93  bounding_box_.print();
94 }
95 
96 // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
98  FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
99  double angle = baseline_dir.angle();
100  // Baseline directions are only unique in a range of pi so constrain to
101  // [-pi/2, pi/2].
102  return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
103 }
104 
105 // Computes and returns the linespacing at the middle of the overlap
106 // between this and other.
107 double BaselineRow::SpaceBetween(const BaselineRow& other) const {
108  // Find the x-centre of overlap of the lines.
109  float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) +
110  std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f;
111  // Find the vertical centre between them.
112  float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
113  // Find the perpendicular distance of (x,y) from each line.
114  FCOORD pt(x, y);
115  return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
116 }
117 
118 // Computes and returns the displacement of the center of the line
119 // perpendicular to the given direction.
120 double BaselineRow::PerpDisp(const FCOORD& direction) const {
121  float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
122  FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
123  return direction * middle_pos / direction.length();
124 }
125 
126 // Computes the y coordinate at the given x using the straight baseline
127 // defined by baseline_pt1_ and baseline_pt2__.
128 double BaselineRow::StraightYAtX(double x) const {
129  double denominator = baseline_pt2_.x() - baseline_pt1_.x();
130  if (denominator == 0.0)
131  return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
132  return baseline_pt1_.y() +
133  (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
134  denominator;
135 }
136 
137 // Fits a straight baseline to the points. Returns true if it had enough
138 // points to be reasonably sure of the fitted baseline.
139 // If use_box_bottoms is false, baselines positions are formed by
140 // considering the outlines of the blobs.
141 bool BaselineRow::FitBaseline(bool use_box_bottoms) {
142  // Deterministic fitting is used wherever possible.
143  fitter_.Clear();
144  // Linear least squares is a backup if the DetLineFit produces a bad line.
145  LLSQ llsq;
146  BLOBNBOX_IT blob_it(blobs_);
147 
148  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
149  BLOBNBOX* blob = blob_it.data();
150  if (!use_box_bottoms) blob->EstimateBaselinePosition();
151  const TBOX& box = blob->bounding_box();
152  int x_middle = (box.left() + box.right()) / 2;
153 #ifdef kDebugYCoord
154  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
155  tprintf("Box bottom = %d, baseline pos=%d for box at:",
156  box.bottom(), blob->baseline_position());
157  box.print();
158  }
159 #endif
160  fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
161  llsq.add(x_middle, blob->baseline_position());
162  }
163  // Fit the line.
164  ICOORD pt1, pt2;
165  baseline_error_ = fitter_.Fit(&pt1, &pt2);
166  baseline_pt1_ = pt1;
167  baseline_pt2_ = pt2;
168  if (baseline_error_ > max_baseline_error_ &&
170  // The fit was bad but there were plenty of points, so try skipping
171  // the first and last few, and use the new line if it dramatically improves
172  // the error of fit.
173  double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
174  if (error < baseline_error_ / 2.0) {
175  baseline_error_ = error;
176  baseline_pt1_ = pt1;
177  baseline_pt2_ = pt2;
178  }
179  }
180  int debug = 0;
181 #ifdef kDebugYCoord
182  Print();
183  debug = bounding_box_.bottom() < kDebugYCoord &&
184  bounding_box_.top() > kDebugYCoord
185  ? 3 : 2;
186 #endif
187  // Now we obtained a direction from that fit, see if we can improve the
188  // fit using the same direction and some other start point.
189  FCOORD direction(pt2 - pt1);
190  double target_offset = direction * pt1;
191  good_baseline_ = false;
192  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
193  // Wild lines can be produced because DetLineFit allows vertical lines, but
194  // vertical text has been rotated so angles over pi/4 should be disallowed.
195  // Near vertical lines can still be produced by vertically aligned components
196  // on very short lines.
197  double angle = BaselineAngle();
198  if (fabs(angle) > M_PI * 0.25) {
199  // Use the llsq fit as a backup.
200  baseline_pt1_ = llsq.mean_point();
201  baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
202  // TODO(rays) get rid of this when m and c are no longer used.
203  double m = llsq.m();
204  double c = llsq.c(m);
205  baseline_error_ = llsq.rms(m, c);
206  good_baseline_ = false;
207  }
208  return good_baseline_;
209 }
210 
211 // Modifies an existing result of FitBaseline to be parallel to the given
212 // direction vector if that produces a better result.
214  const FCOORD& direction) {
215  SetupBlobDisplacements(direction);
216  if (displacement_modes_.empty())
217  return;
218 #ifdef kDebugYCoord
219  if (bounding_box_.bottom() < kDebugYCoord &&
220  bounding_box_.top() > kDebugYCoord && debug < 3)
221  debug = 3;
222 #endif
223  FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
224 }
225 
226 // Modifies the baseline to snap to the textline grid if the existing
227 // result is not good enough.
229  const FCOORD& direction,
230  double line_spacing,
231  double line_offset) {
232  if (blobs_->empty()) {
233  if (debug > 1) {
234  tprintf("Row empty at:");
235  bounding_box_.print();
236  }
237  return line_offset;
238  }
239  // Find the displacement_modes_ entry nearest to the grid.
240  double best_error = 0.0;
241  int best_index = -1;
242  for (int i = 0; i < displacement_modes_.size(); ++i) {
243  double blob_y = displacement_modes_[i];
244  double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
245  line_offset);
246  if (debug > 1) {
247  tprintf("Mode at %g has error %g from model \n", blob_y, error);
248  }
249  if (best_index < 0 || error < best_error) {
250  best_error = error;
251  best_index = i;
252  }
253  }
254  // We will move the baseline only if the chosen mode is close enough to the
255  // model.
256  double model_margin = max_baseline_error_ - best_error;
257  if (best_index >= 0 && model_margin > 0.0) {
258  // But if the current baseline is already close to the mode there is no
259  // point, and only the potential to damage accuracy by changing its angle.
260  double perp_disp = PerpDisp(direction);
261  double shift = displacement_modes_[best_index] - perp_disp;
262  if (fabs(shift) > max_baseline_error_) {
263  if (debug > 1) {
264  tprintf("Attempting linespacing model fit with mode %g to row at:",
265  displacement_modes_[best_index]);
266  bounding_box_.print();
267  }
268  FitConstrainedIfBetter(debug, direction, model_margin,
269  displacement_modes_[best_index]);
270  } else if (debug > 1) {
271  tprintf("Linespacing model only moves current line by %g for row at:",
272  shift);
273  bounding_box_.print();
274  }
275  } else if (debug > 1) {
276  tprintf("Linespacing model not close enough to any mode for row at:");
277  bounding_box_.print();
278  }
279  return fmod(PerpDisp(direction), line_spacing);
280 }
281 
282 // Sets up displacement_modes_ with the top few modes of the perpendicular
283 // distance of each blob from the given direction vector, after rounding.
284 void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
285  // Set of perpendicular displacements of the blob bottoms from the required
286  // baseline direction.
287  GenericVector<double> perp_blob_dists;
288  displacement_modes_.truncate(0);
289  // Gather the skew-corrected position of every blob.
290  double min_dist = FLT_MAX;
291  double max_dist = -FLT_MAX;
292  BLOBNBOX_IT blob_it(blobs_);
293 #ifdef kDebugYCoord
294  bool debug = false;
295 #endif
296  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
297  BLOBNBOX* blob = blob_it.data();
298  const TBOX& box = blob->bounding_box();
299 #ifdef kDebugYCoord
300  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
301 #endif
302  FCOORD blob_pos((box.left() + box.right()) / 2.0f,
303  blob->baseline_position());
304  double offset = direction * blob_pos;
305  perp_blob_dists.push_back(offset);
306 #ifdef kDebugYCoord
307  if (debug) {
308  tprintf("Displacement %g for blob at:", offset);
309  box.print();
310  }
311 #endif
312  UpdateRange(offset, &min_dist, &max_dist);
313  }
314  // Set up a histogram using disp_quant_factor_ as the bucket size.
315  STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
316  IntCastRounded(max_dist / disp_quant_factor_) + 1);
317  for (int i = 0; i < perp_blob_dists.size(); ++i) {
318  dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
319  }
321  dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
322 #ifdef kDebugYCoord
323  if (debug) {
324  for (int i = 0; i < scaled_modes.size(); ++i) {
325  tprintf("Top mode = %g * %d\n",
326  scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
327  }
328  }
329 #endif
330  for (int i = 0; i < scaled_modes.size(); ++i)
331  displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
332 }
333 
334 // Fits a line in the given direction to blobs that are close to the given
335 // target_offset perpendicular displacement from the direction. The fit
336 // error is allowed to be cheat_allowance worse than the existing fit, and
337 // will still be used.
338 // If cheat_allowance > 0, the new fit will be good and replace the current
339 // fit if it has better fit (with cheat) OR its error is below
340 // max_baseline_error_ and the old fit is marked bad.
341 // Otherwise the new fit will only replace the old if it is really better,
342 // or the old fit is marked bad and the new fit has sufficient points, as
343 // well as being within the max_baseline_error_.
344 void BaselineRow::FitConstrainedIfBetter(int debug,
345  const FCOORD& direction,
346  double cheat_allowance,
347  double target_offset) {
348  double halfrange = fit_halfrange_ * direction.length();
349  double min_dist = target_offset - halfrange;
350  double max_dist = target_offset + halfrange;
351  ICOORD line_pt;
352  double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
353  debug > 2, &line_pt);
354  // Allow cheat_allowance off the new error
355  new_error -= cheat_allowance;
356  double old_angle = BaselineAngle();
357  double new_angle = direction.angle();
358  if (debug > 1) {
359  tprintf("Constrained error = %g, original = %g",
360  new_error, baseline_error_);
361  tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
362  old_angle, new_angle,
363  new_angle - old_angle, kMaxSkewDeviation);
364  }
365  bool new_good_baseline = new_error <= max_baseline_error_ &&
366  (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
367  // The new will replace the old if any are true:
368  // 1. the new error is better
369  // 2. the old is NOT good, but the new is
370  // 3. there is a wild angular difference between them (assuming that the new
371  // is a better guess at the angle.)
372  if (new_error <= baseline_error_ ||
373  (!good_baseline_ && new_good_baseline) ||
374  fabs(new_angle - old_angle) > kMaxSkewDeviation) {
375  baseline_error_ = new_error;
376  baseline_pt1_ = line_pt;
377  baseline_pt2_ = baseline_pt1_ + direction;
378  good_baseline_ = new_good_baseline;
379  if (debug > 1) {
380  tprintf("Replacing with constrained baseline, good = %d\n",
381  good_baseline_);
382  }
383  } else if (debug > 1) {
384  tprintf("Keeping old baseline\n");
385  }
386 }
387 
388 // Returns the perpendicular distance of the point from the straight
389 // baseline.
390 double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
391  FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
392  FCOORD offset_vector(pt - baseline_pt1_);
393  double distance = baseline_vector * offset_vector;
394  return sqrt(distance * distance / baseline_vector.sqlength());
395 }
396 
397 // Computes the bounding box of the row.
398 void BaselineRow::ComputeBoundingBox() {
399  BLOBNBOX_IT it(blobs_);
400  TBOX box;
401  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
402  box += it.data()->bounding_box();
403  }
404  bounding_box_ = box;
405 }
406 
407 
408 BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
409  : block_(block), debug_level_(debug_level), non_text_block_(non_text),
410  good_skew_angle_(false), skew_angle_(0.0),
411  line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
412  TO_ROW_IT row_it(block_->get_rows());
413  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
414  // Sort the blobs on the rows.
415  row_it.data()->blob_list()->sort(blob_x_order);
416  rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
417  }
418 }
419 
420 // Computes and returns the absolute error of the given perp_disp from the
421 // given linespacing model.
422 double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
423  double line_offset) {
424  // Round to the nearest multiple of line_spacing + line offset.
425  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
426  double model_y = line_spacing * multiple + line_offset;
427  return fabs(perp_disp - model_y);
428 }
429 
430 // Fits straight line baselines and computes the skew angle from the
431 // median angle. Returns true if a good angle is found.
432 // If use_box_bottoms is false, baseline positions are formed by
433 // considering the outlines of the blobs.
434 bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
435  if (non_text_block_) return false;
436  GenericVector<double> angles;
437  for (int r = 0; r < rows_.size(); ++r) {
438  BaselineRow* row = rows_[r];
439  if (row->FitBaseline(use_box_bottoms)) {
440  double angle = row->BaselineAngle();
441  angles.push_back(angle);
442  }
443  if (debug_level_ > 1)
444  row->Print();
445  }
446 
447  if (!angles.empty()) {
448  skew_angle_ = MedianOfCircularValues(M_PI, &angles);
449  good_skew_angle_ = true;
450  } else {
451  skew_angle_ = 0.0f;
452  good_skew_angle_ = false;
453  }
454  if (debug_level_ > 0) {
455  tprintf("Initial block skew angle = %g, good = %d\n",
456  skew_angle_, good_skew_angle_);
457  }
458  return good_skew_angle_;
459 }
460 
461 // Refits the baseline to a constrained angle, using the stored block
462 // skew if good enough, otherwise the supplied default skew.
463 void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
464  if (non_text_block_) return;
465  if (!good_skew_angle_) skew_angle_ = default_block_skew;
466  if (debug_level_ > 0)
467  tprintf("Adjusting block to skew angle %g\n", skew_angle_);
468  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
469  for (int r = 0; r < rows_.size(); ++r) {
470  BaselineRow* row = rows_[r];
471  row->AdjustBaselineToParallel(debug_level_, direction);
472  if (debug_level_ > 1)
473  row->Print();
474  }
475  if (rows_.size() < 3 || !ComputeLineSpacing())
476  return;
477  // Enforce the line spacing model on all lines that don't yet have a good
478  // baseline.
479  // Start by finding the row that is best fitted to the model.
480  int best_row = 0;
481  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
482  line_spacing_, line_offset_);
483  for (int r = 1; r < rows_.size(); ++r) {
484  double error = SpacingModelError(rows_[r]->PerpDisp(direction),
485  line_spacing_, line_offset_);
486  if (error < best_error) {
487  best_error = error;
488  best_row = r;
489  }
490  }
491  // Starting at the best fitting row, work outwards, syncing the offset.
492  double offset = line_offset_;
493  for (int r = best_row + 1; r < rows_.size(); ++r) {
494  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
495  line_spacing_, offset);
496  }
497  offset = line_offset_;
498  for (int r = best_row - 1; r >= 0; --r) {
499  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
500  line_spacing_, offset);
501  }
502 }
503 
504 // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
506  if (line_spacing_ > 0.0) {
507  // Where was block_line_spacing set before?
508  float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_));
509  if (min_spacing < block_->line_size)
510  block_->line_size = min_spacing;
511  block_->line_spacing = line_spacing_;
512  block_->baseline_offset = line_offset_;
513  block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
514  }
515  // Setup the parameters on all the rows.
516  TO_ROW_IT row_it(block_->get_rows());
517  for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
518  BaselineRow* row = rows_[r];
519  TO_ROW* to_row = row_it.data();
520  row->SetupOldLineParameters(to_row);
521  }
522 }
523 
524 // Processing that is required before fitting baseline splines, but requires
525 // linear baselines in order to be successful:
526 // Removes noise if required
527 // Separates out underlines
528 // Pre-associates blob fragments.
529 // TODO(rays/joeliu) This entire section of code is inherited from the past
530 // and could be improved/eliminated.
531 // page_tr is used to size a debug window.
532 void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
533  if (non_text_block_) return;
534  if (remove_noise) {
535  vigorous_noise_removal(block_);
536  }
537  FCOORD rotation(1.0f, 0.0f);
538  double gradient = tan(skew_angle_);
539  separate_underlines(block_, gradient, rotation, true);
540  pre_associate_blobs(page_tr, block_, rotation, true);
541 }
542 
543 // Fits splines to the textlines, or creates fake QSPLINES from the straight
544 // baselines that are already on the TO_ROWs.
545 // As a side-effect, computes the xheights of the rows and the block.
546 // Although x-height estimation is conceptually separate, it is part of
547 // detecting perspective distortion and therefore baseline fitting.
548 void BaselineBlock::FitBaselineSplines(bool enable_splines,
549  bool show_final_rows,
550  Textord* textord) {
551  double gradient = tan(skew_angle_);
552  FCOORD rotation(1.0f, 0.0f);
553 
554  if (enable_splines) {
555  textord->make_spline_rows(block_, gradient, show_final_rows);
556  } else {
557  // Make a fake spline from the existing line.
558  TBOX block_box= block_->block->pdblk.bounding_box();
559  TO_ROW_IT row_it = block_->get_rows();
560  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
561  TO_ROW* row = row_it.data();
562  int32_t xstarts[2] = { block_box.left(), block_box.right() };
563  double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
564  row->baseline = QSPLINE(1, xstarts, coeffs);
565  textord->compute_row_xheight(row, block_->block->classify_rotation(),
566  row->line_m(), block_->line_size);
567  }
568  }
569  textord->compute_block_xheight(block_, gradient);
570  block_->block->set_xheight(block_->xheight);
571  if (textord_restore_underlines) // fix underlines
572  restore_underlined_blobs(block_);
573 }
574 
575 // Draws the (straight) baselines and final blobs colored according to
576 // what was discarded as noise and what is associated with each row.
577 void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) {
578 #ifndef GRAPHICS_DISABLED
579  if (non_text_block_) return;
580  double gradient = tan(skew_angle_);
581  FCOORD rotation(1.0f, 0.0f);
582  int left_edge = block_->block->pdblk.bounding_box().left();
583  ScrollView* win = create_to_win(page_tr);
585  TO_ROW_IT row_it = block_->get_rows();
586  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
587  plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
588  colour = static_cast<ScrollView::Color>(colour + 1);
589  if (colour > ScrollView::MAGENTA)
590  colour = ScrollView::RED;
591  }
593  // Show discarded blobs.
594  plot_blob_list(win, &block_->underlines,
596  if (block_->blobs.length() > 0)
597  tprintf("%d blobs discarded as noise\n", block_->blobs.length());
598  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
599 #endif
600 }
601 
602 void BaselineBlock::DrawPixSpline(Pix* pix_in) {
603  if (non_text_block_) return;
604  TO_ROW_IT row_it = block_->get_rows();
605  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
606  row_it.data()->baseline.plot(pix_in);
607  }
608 }
609 
610 // Top-level line-spacing calculation. Computes an estimate of the line-
611 // spacing, using the current baselines in the TO_ROWS of the block, and
612 // then refines it by fitting a regression line to the baseline positions
613 // as a function of their integer index.
614 // Returns true if it seems that the model is a reasonable fit to the
615 // observations.
616 bool BaselineBlock::ComputeLineSpacing() {
617  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
618  GenericVector<double> row_positions;
619  ComputeBaselinePositions(direction, &row_positions);
620  if (row_positions.size() < 2) return false;
621  EstimateLineSpacing();
622  RefineLineSpacing(row_positions);
623  // Verify that the model is reasonable.
624  double max_baseline_error = kMaxBaselineError * line_spacing_;
625  int non_trivial_gaps = 0;
626  int fitting_gaps = 0;
627  for (int i = 1; i < row_positions.size(); ++i) {
628  double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
629  if (row_gap > max_baseline_error) {
630  ++non_trivial_gaps;
631  if (fabs(row_gap - line_spacing_) <= max_baseline_error)
632  ++fitting_gaps;
633  }
634  }
635  if (debug_level_ > 0) {
636  tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
637  line_spacing_, row_positions.size(), fitting_gaps,
638  non_trivial_gaps);
639  }
640  return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
641 }
642 
643 // Computes the deskewed vertical position of each baseline in the block and
644 // stores them in the given vector.
645 // This is calculated as the perpendicular distance of the middle of each
646 // baseline (in case it has a different skew angle) from the line passing
647 // through the origin parallel to the block baseline angle.
648 // NOTE that "distance" above is a signed quantity so we can tell which side
649 // of the block baseline a line sits, hence the function and argument name
650 // positions not distances.
651 void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
652  GenericVector<double>* positions) {
653  positions->clear();
654  for (int r = 0; r < rows_.size(); ++r) {
655  BaselineRow* row = rows_[r];
656  const TBOX& row_box = row->bounding_box();
657  float x_middle = (row_box.left() + row_box.right()) / 2.0f;
658  FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
659  float offset = direction * row_pos;
660  positions->push_back(offset);
661  }
662 }
663 
664 // Computes an estimate of the line spacing of the block from the median
665 // of the spacings between adjacent overlapping textlines.
666 void BaselineBlock::EstimateLineSpacing() {
667  GenericVector<float> spacings;
668  for (int r = 0; r < rows_.size(); ++r) {
669  BaselineRow* row = rows_[r];
670  // Exclude silly lines.
671  if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
672  // Find the first row after row that overlaps it significantly.
673  const TBOX& row_box = row->bounding_box();
674  int r2;
675  for (r2 = r + 1; r2 < rows_.size() &&
676  !row_box.major_x_overlap(rows_[r2]->bounding_box());
677  ++r2);
678  if (r2 < rows_.size()) {
679  BaselineRow* row2 = rows_[r2];
680  // Exclude silly lines.
681  if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
682  float spacing = row->SpaceBetween(*row2);
683  spacings.push_back(spacing);
684  }
685  }
686  // If we have at least one value, use it, otherwise leave the previous
687  // value unchanged.
688  if (!spacings.empty()) {
689  line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
690  if (debug_level_ > 1)
691  tprintf("Estimate of linespacing = %g\n", line_spacing_);
692  }
693 }
694 
695 // Refines the line spacing of the block by fitting a regression
696 // line to the deskewed y-position of each baseline as a function of its
697 // estimated line index, allowing for a small error in the initial linespacing
698 // and choosing the best available model.
699 void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
700  double spacings[3], offsets[3], errors[3];
701  int index_range;
702  errors[0] = FitLineSpacingModel(positions, line_spacing_,
703  &spacings[0], &offsets[0], &index_range);
704  if (index_range > 1) {
705  double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
706  // Try the hypotheses that there might be index_range +/- 1 line spaces.
707  errors[1] = FitLineSpacingModel(positions, spacing_plus,
708  &spacings[1], &offsets[1], nullptr);
709  double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
710  errors[2] = FitLineSpacingModel(positions, spacing_minus,
711  &spacings[2], &offsets[2], nullptr);
712  for (int i = 1; i <= 2; ++i) {
713  if (errors[i] < errors[0]) {
714  spacings[0] = spacings[i];
715  offsets[0] = offsets[i];
716  errors[0] = errors[i];
717  }
718  }
719  }
720  if (spacings[0] > 0.0) {
721  line_spacing_ = spacings[0];
722  line_offset_ = offsets[0];
723  model_error_ = errors[0];
724  if (debug_level_ > 0) {
725  tprintf("Final linespacing model = %g + offset %g, error %g\n",
726  line_spacing_, line_offset_, model_error_);
727  }
728  }
729 }
730 
731 // Given an initial estimate of line spacing (m_in) and the positions of each
732 // baseline, computes the line spacing of the block more accurately in m_out,
733 // and the corresponding intercept in c_out, and the number of spacings seen
734 // in index_delta. Returns the error of fit to the line spacing model.
735 // Uses a simple linear regression, but optimized the offset using the median.
736 double BaselineBlock::FitLineSpacingModel(
737  const GenericVector<double>& positions, double m_in,
738  double* m_out, double* c_out, int* index_delta) {
739  if (m_in == 0.0f || positions.size() < 2) {
740  *m_out = m_in;
741  *c_out = 0.0;
742  if (index_delta != nullptr) *index_delta = 0;
743  return 0.0;
744  }
745  GenericVector<double> offsets;
746  // Get the offset (remainder) linespacing for each line and choose the median.
747  for (int i = 0; i < positions.size(); ++i)
748  offsets.push_back(fmod(positions[i], m_in));
749  // Get the median offset.
750  double median_offset = MedianOfCircularValues(m_in, &offsets);
751  // Now fit a line to quantized line number and offset.
752  LLSQ llsq;
753  int min_index = INT32_MAX;
754  int max_index = -INT32_MAX;
755  for (int i = 0; i < positions.size(); ++i) {
756  double y_pos = positions[i];
757  int row_index = IntCastRounded((y_pos - median_offset) / m_in);
758  UpdateRange(row_index, &min_index, &max_index);
759  llsq.add(row_index, y_pos);
760  }
761  // Get the refined line spacing.
762  *m_out = llsq.m();
763  // Use the median offset rather than the mean.
764  offsets.truncate(0);
765  for (int i = 0; i < positions.size(); ++i)
766  offsets.push_back(fmod(positions[i], *m_out));
767  // Get the median offset.
768  if (debug_level_ > 2) {
769  for (int i = 0; i < offsets.size(); ++i)
770  tprintf("%d: %g\n", i, offsets[i]);
771  }
772  *c_out = MedianOfCircularValues(*m_out, &offsets);
773  if (debug_level_ > 1) {
774  tprintf("Median offset = %g, compared to mean of %g.\n",
775  *c_out, llsq.c(*m_out));
776  }
777  // Index_delta is the number of hypothesized line gaps present.
778  if (index_delta != nullptr)
779  *index_delta = max_index - min_index;
780  // Use the regression model's intercept to compute the error, as it may be
781  // a full line-spacing in disagreement with the median.
782  double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
783  if (debug_level_ > 1) {
784  tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
785  m_in, median_offset, *m_out, *c_out, rms_error);
786  }
787  return rms_error;
788 }
789 
790 BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
791  TO_BLOCK_LIST* blocks)
792  : page_skew_(page_skew), debug_level_(debug_level) {
793  TO_BLOCK_IT it(blocks);
794  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
795  TO_BLOCK* to_block = it.data();
796  BLOCK* block = to_block->block;
797  POLY_BLOCK* pb = block->pdblk.poly_block();
798  // A note about non-text blocks.
799  // On output, non-text blocks are supposed to contain a single empty word
800  // in each incoming text line. These mark out the polygonal bounds of the
801  // block. Ideally no baselines should be required, but currently
802  // make_words crashes if a baseline and xheight are not provided, so we
803  // include non-text blocks here, but flag them for special treatment.
804  bool non_text = pb != nullptr && !pb->IsText();
805  blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
806  }
807 }
808 
809 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
810 // block-wise and page-wise data to smooth small blocks/rows, and applies
811 // smoothing based on block/page-level skew and block-level linespacing.
812 void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
813  GenericVector<double> block_skew_angles;
814  for (int i = 0; i < blocks_.size(); ++i) {
815  BaselineBlock* bl_block = blocks_[i];
816  if (debug_level_ > 0)
817  tprintf("Fitting initial baselines...\n");
818  if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
819  block_skew_angles.push_back(bl_block->skew_angle());
820  }
821  }
822  // Compute a page-wide default skew for blocks with too little information.
823  double default_block_skew = page_skew_.angle();
824  if (!block_skew_angles.empty()) {
825  default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
826  }
827  if (debug_level_ > 0) {
828  tprintf("Page skew angle = %g\n", default_block_skew);
829  }
830  // Set bad lines in each block to the default block skew and then force fit
831  // a linespacing model where it makes sense to do so.
832  for (int i = 0; i < blocks_.size(); ++i) {
833  BaselineBlock* bl_block = blocks_[i];
834  bl_block->ParallelizeBaselines(default_block_skew);
835  bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
836  }
837 }
838 
839 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
840 // other associated side-effects, including pre-associating blobs, computing
841 // x-heights and displaying debug information.
842 // NOTE that ComputeStraightBaselines must have been called first as this
843 // sets up data in the TO_ROWs upon which this function depends.
845  bool enable_splines,
846  bool remove_noise,
847  bool show_final_rows,
848  Textord* textord) {
849  for (int i = 0; i < blocks_.size(); ++i) {
850  BaselineBlock* bl_block = blocks_[i];
851  if (enable_splines)
852  bl_block->PrepareForSplineFitting(page_tr, remove_noise);
853  bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
854  if (show_final_rows) {
855  bl_block->DrawFinalRows(page_tr);
856  }
857  }
858 }
859 
860 } // namespace tesseract.
kMaxBaselineError
const double kMaxBaselineError
Definition: baselinedetect.cpp:52
ScrollView
Definition: scrollview.h:97
tesseract::BaselineBlock::FitBaselinesAndFindSkew
bool FitBaselinesAndFindSkew(bool use_box_bottoms)
Definition: baselinedetect.cpp:434
kNumSkipPoints
const int kNumSkipPoints
Definition: baselinedetect.cpp:44
FCOORD::angle
float angle() const
find angle
Definition: points.h:246
TO_BLOCK::baseline_offset
float baseline_offset
Definition: blobbox.h:786
LLSQ::add
void add(double x, double y)
Definition: linlsq.cpp:45
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:58
LLSQ
Definition: linlsq.h:27
tesseract::BaselineBlock::skew_angle
double skew_angle() const
Definition: baselinedetect.h:136
BLOCK::set_xheight
void set_xheight(int32_t height)
set char size
Definition: ocrblock.h:67
create_to_win
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:42
POLY_BLOCK::IsText
bool IsText() const
Definition: polyblk.h:62
separate_underlines
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1772
LLSQ::mean_point
FCOORD mean_point() const
Definition: linlsq.cpp:158
kMinFittingLinespacings
const double kMinFittingLinespacings
Definition: baselinedetect.cpp:58
TO_ROW::set_parallel_line
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:611
tesseract::BaselineDetect::BaselineDetect
BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
Definition: baselinedetect.cpp:790
blobbox.h
FCOORD::y
float y() const
Definition: points.h:209
LLSQ::c
double c(double m) const
Definition: linlsq.cpp:110
ICOORD
integer coordinate
Definition: points.h:30
tesseract::BaselineBlock::DrawFinalRows
void DrawFinalRows(const ICOORD &page_tr)
Definition: baselinedetect.cpp:577
TO_ROW::line_m
float line_m() const
Definition: blobbox.h:570
TBOX::print
void print() const
Definition: rect.h:277
baselinedetect.h
QSPLINE
Definition: quspline.h:31
FCOORD::x
float x() const
Definition: points.h:206
TBOX::top
int16_t top() const
Definition: rect.h:57
GenericVector::choose_nth_item
int choose_nth_item(int target_index)
Definition: genericvector.h:289
underlin.h
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
TO_BLOCK
Definition: blobbox.h:691
tesseract::BaselineRow
Definition: baselinedetect.h:40
kMaxSkewDeviation
const double kMaxSkewDeviation
Definition: baselinedetect.cpp:46
plot_parallel_row
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:117
detlinefit.h
IntCastRounded
int IntCastRounded(double x)
Definition: helpers.h:173
tesseract::BaselineRow::AdjustBaselineToGrid
double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, double line_offset)
Definition: baselinedetect.cpp:228
tesseract::BaselineBlock::SetupBlockParameters
void SetupBlockParameters() const
Definition: baselinedetect.cpp:505
LLSQ::m
double m() const
Definition: linlsq.cpp:95
FCOORD
Definition: points.h:187
BLOBNBOX
Definition: blobbox.h:142
tesseract::BaselineBlock::ParallelizeBaselines
void ParallelizeBaselines(double default_block_skew)
Definition: baselinedetect.cpp:463
TO_BLOCK::underlines
BLOBNBOX_LIST underlines
Definition: blobbox.h:772
tesseract::Textord::compute_row_xheight
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1366
tesseract::BaselineRow::SetupOldLineParameters
void SetupOldLineParameters(TO_ROW *row) const
Definition: baselinedetect.cpp:76
tesseract::DetLineFit::Clear
void Clear()
Definition: detlinefit.cpp:45
LLSQ::rms
double rms(double m, double c) const
Definition: linlsq.cpp:123
blob_x_order
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2573
tesseract::DetLineFit::SufficientPointsForIndependentFit
bool SufficientPointsForIndependentFit() const
Definition: detlinefit.cpp:162
FCOORD::length
float length() const
find length
Definition: points.h:227
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
BLOCK
Definition: ocrblock.h:28
tesseract::BaselineRow::FitBaseline
bool FitBaseline(bool use_box_bottoms)
Definition: baselinedetect.cpp:141
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
makerow.h
TBOX::major_x_overlap
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:403
kOffsetQuantizationFactor
const double kOffsetQuantizationFactor
Definition: baselinedetect.cpp:48
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
tesseract::BaselineBlock::DrawPixSpline
void DrawPixSpline(Pix *pix_in)
Definition: baselinedetect.cpp:602
tesseract::BaselineBlock::BaselineBlock
BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
Definition: baselinedetect.cpp:408
TO_BLOCK::block
BLOCK * block
Definition: blobbox.h:776
ScrollView::MAGENTA
Definition: scrollview.h:109
tesseract::DetLineFit::ConstrainedFit
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:130
TO_BLOCK::xheight
float xheight
Definition: blobbox.h:787
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
TBOX::width
int16_t width() const
Definition: rect.h:114
ScrollView::YELLOW
Definition: scrollview.h:105
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
textord.h
ScrollView::WHITE
Definition: scrollview.h:103
linlsq.h
tesseract::BaselineBlock::SpacingModelError
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
Definition: baselinedetect.cpp:422
helpers.h
tesseract::BaselineRow::BaselineRow
BaselineRow(double line_size, TO_ROW *to_row)
Definition: baselinedetect.cpp:64
tesseract
Definition: baseapi.h:65
distance
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44
textord_restore_underlines
bool textord_restore_underlines
Definition: underlin.cpp:21
draw_meanlines
void draw_meanlines(TO_BLOCK *block, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:200
kFitHalfrangeFactor
const double kFitHalfrangeFactor
Definition: baselinedetect.cpp:50
TO_BLOCK::line_spacing
float line_spacing
Definition: blobbox.h:778
TO_ROW::line_c
float line_c() const
Definition: blobbox.h:573
ScrollView::RED
Definition: scrollview.h:104
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
tprintf.h
tesseract::BaselineRow::SpaceBetween
double SpaceBetween(const BaselineRow &other) const
Definition: baselinedetect.cpp:107
drawtord.h
GenericVector< double >
tesseract::BaselineBlock
Definition: baselinedetect.h:129
tesseract::BaselineRow::StraightYAtX
double StraightYAtX(double x) const
Definition: baselinedetect.cpp:128
tesseract::BaselineDetect::ComputeStraightBaselines
void ComputeStraightBaselines(bool use_box_bottoms)
Definition: baselinedetect.cpp:812
TO_BLOCK::get_rows
TO_ROW_LIST * get_rows()
Definition: blobbox.h:703
tesseract::BaselineBlock::block
TO_BLOCK * block() const
Definition: baselinedetect.h:133
BLOBNBOX::baseline_position
int baseline_position() const
Definition: blobbox.h:388
tesseract::BaselineRow::Print
void Print() const
Definition: baselinedetect.cpp:86
BLOBNBOX::EstimateBaselinePosition
void EstimateBaselinePosition()
Definition: blobbox.cpp:355
kMaxBlobSizeMultiple
const double kMaxBlobSizeMultiple
Definition: baselinedetect.cpp:55
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:132
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::Textord
Definition: textord.h:68
tesseract::BaselineBlock::FitBaselineSplines
void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord)
Definition: baselinedetect.cpp:548
GenericVector::clear
void clear()
Definition: genericvector.h:857
tesseract::DetLineFit::Fit
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
tesseract::Textord::make_spline_rows
void make_spline_rows(TO_BLOCK *block, float gradient, bool testing_on)
Definition: makerow.cpp:2003
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::BaselineDetect::ComputeBaselineSplinesAndXheights
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
Definition: baselinedetect.cpp:844
tesseract::DetLineFit::Add
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
restore_underlined_blobs
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:28
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
BLOCK::classify_rotation
FCOORD classify_rotation() const
Definition: ocrblock.h:139
POLY_BLOCK
Definition: polyblk.h:26
TO_ROW
Definition: blobbox.h:543
kMaxDisplacementsModes
const int kMaxDisplacementsModes
Definition: baselinedetect.cpp:42
TO_ROW::set_line
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:603
tesseract::BaselineRow::BaselineAngle
double BaselineAngle() const
Definition: baselinedetect.cpp:97
tesseract::BaselineBlock::PrepareForSplineFitting
void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise)
Definition: baselinedetect.cpp:532
tesseract::BaselineRow::PerpDisp
double PerpDisp(const FCOORD &direction) const
Definition: baselinedetect.cpp:120
ScrollView::CORAL
Definition: scrollview.h:119
ScrollView::Color
Color
Definition: scrollview.h:100
UpdateRange
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:118
MedianOfCircularValues
T MedianOfCircularValues(T modulus, GenericVector< T > *v)
Definition: linlsq.h:112
tesseract::kMaxBaselineError
const double kMaxBaselineError
Definition: colpartition.cpp:70
TO_ROW::baseline
QSPLINE baseline
Definition: blobbox.h:669
GenericVector::size
int size() const
Definition: genericvector.h:71
TO_BLOCK::max_blob_size
float max_blob_size
Definition: blobbox.h:785
vigorous_noise_removal
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:466
tesseract::BaselineRow::AdjustBaselineToParallel
void AdjustBaselineToParallel(int debug, const FCOORD &direction)
Definition: baselinedetect.cpp:213
pre_associate_blobs
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1845
plot_blob_list
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1068
tesseract::Textord::compute_block_xheight
void compute_block_xheight(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:1254
TBOX
Definition: rect.h:33
TO_BLOCK::line_size
float line_size
Definition: blobbox.h:784