tesseract  4.0.0-1-g2a2b
baselinedetect.cpp
Go to the documentation of this file.
1 // File: baselinedetect.cpp
3 // Description: Initial Baseline Determination.
4 // Copyright 2012 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 // Created: Mon Apr 30 10:15:31 PDT 2012
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef _MSC_VER
21 #define _USE_MATH_DEFINES
22 #endif // _MSC_VER
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config_auto.h"
26 #endif
27 
28 #include "baselinedetect.h"
29 
30 #include <algorithm>
31 #include <cfloat> // for FLT_MAX
32 #include <cmath>
33 #include "allheaders.h"
34 #include "blobbox.h"
35 #include "detlinefit.h"
36 #include "drawtord.h"
37 #include "helpers.h"
38 #include "linlsq.h"
39 #include "makerow.h"
40 #include "textord.h"
41 #include "tprintf.h"
42 #include "underlin.h"
43 
44 // Number of displacement modes kept in displacement_modes_;
45 const int kMaxDisplacementsModes = 3;
46 // Number of points to skip when retrying initial fit.
47 const int kNumSkipPoints = 3;
48 // Max angle deviation (in radians) allowed to keep the independent baseline.
49 const double kMaxSkewDeviation = 1.0 / 64;
50 // Fraction of line spacing estimate for quantization of blob displacements.
51 const double kOffsetQuantizationFactor = 3.0 / 64;
52 // Fraction of line spacing estimate for computing blob fit error.
53 const double kFitHalfrangeFactor = 6.0 / 64;
54 // Max fraction of line spacing allowed before a baseline counts as badly fitting.
55 const double kMaxBaselineError = 3.0 / 64;
56 // Multiple of linespacing that sets max_blob_size in TO_BLOCK.
57 // Copied from textord_excess_blobsize.
58 const double kMaxBlobSizeMultiple = 1.3;
59 // Min fraction of linespacing gaps that should be close to the model before
60 // we will force the linespacing model on all the lines.
61 const double kMinFittingLinespacings = 0.25;
62 // A y-coordinate within a textline that is to be debugged.
63 //#define kDebugYCoord 1525
64 
65 namespace tesseract {
66 
67 BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
68  : blobs_(to_row->blob_list()),
69  baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
70  baseline_error_(0.0), good_baseline_(false) {
71  ComputeBoundingBox();
72  // Compute a scale factor for rounding to ints.
73  disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
74  fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
75  max_baseline_error_ = kMaxBaselineError * line_spacing;
76 }
77 
78 // Sets the TO_ROW with the output straight line.
80  // TODO(rays) get rid of this when m and c are no longer used.
81  double gradient = tan(BaselineAngle());
82  // para_c is the actual intercept of the baseline on the y-axis.
83  float para_c = StraightYAtX(0.0);
84  row->set_line(gradient, para_c, baseline_error_);
85  row->set_parallel_line(gradient, para_c, baseline_error_);
86 }
87 
88 // Outputs diagnostic information.
89 void BaselineRow::Print() const {
90  tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
91  baseline_pt1_.x(), baseline_pt1_.y(),
92  baseline_pt2_.x(), baseline_pt2_.y(),
93  BaselineAngle(), StraightYAtX(0.0));
94  tprintf("Quant factor=%g, error=%g, good=%d, box:",
95  disp_quant_factor_, baseline_error_, good_baseline_);
96  bounding_box_.print();
97 }
98 
99 // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
101  FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
102  double angle = baseline_dir.angle();
103  // Baseline directions are only unique in a range of pi so constrain to
104  // [-pi/2, pi/2].
105  return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
106 }
107 
108 // Computes and returns the linespacing at the middle of the overlap
109 // between this and other.
110 double BaselineRow::SpaceBetween(const BaselineRow& other) const {
111  // Find the x-centre of overlap of the lines.
112  float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) +
113  std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f;
114  // Find the vertical centre between them.
115  float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
116  // Find the perpendicular distance of (x,y) from each line.
117  FCOORD pt(x, y);
118  return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
119 }
120 
121 // Computes and returns the displacement of the center of the line
122 // perpendicular to the given direction.
123 double BaselineRow::PerpDisp(const FCOORD& direction) const {
124  float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
125  FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
126  return direction * middle_pos / direction.length();
127 }
128 
129 // Computes the y coordinate at the given x using the straight baseline
130 // defined by baseline_pt1_ and baseline_pt2__.
131 double BaselineRow::StraightYAtX(double x) const {
132  double denominator = baseline_pt2_.x() - baseline_pt1_.x();
133  if (denominator == 0.0)
134  return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
135  return baseline_pt1_.y() +
136  (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
137  denominator;
138 }
139 
140 // Fits a straight baseline to the points. Returns true if it had enough
141 // points to be reasonably sure of the fitted baseline.
142 // If use_box_bottoms is false, baselines positions are formed by
143 // considering the outlines of the blobs.
144 bool BaselineRow::FitBaseline(bool use_box_bottoms) {
145  // Deterministic fitting is used wherever possible.
146  fitter_.Clear();
147  // Linear least squares is a backup if the DetLineFit produces a bad line.
148  LLSQ llsq;
149  BLOBNBOX_IT blob_it(blobs_);
150 
151  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
152  BLOBNBOX* blob = blob_it.data();
153  if (!use_box_bottoms) blob->EstimateBaselinePosition();
154  const TBOX& box = blob->bounding_box();
155  int x_middle = (box.left() + box.right()) / 2;
156 #ifdef kDebugYCoord
157  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
158  tprintf("Box bottom = %d, baseline pos=%d for box at:",
159  box.bottom(), blob->baseline_position());
160  box.print();
161  }
162 #endif
163  fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
164  llsq.add(x_middle, blob->baseline_position());
165  }
166  // Fit the line.
167  ICOORD pt1, pt2;
168  baseline_error_ = fitter_.Fit(&pt1, &pt2);
169  baseline_pt1_ = pt1;
170  baseline_pt2_ = pt2;
171  if (baseline_error_ > max_baseline_error_ &&
173  // The fit was bad but there were plenty of points, so try skipping
174  // the first and last few, and use the new line if it dramatically improves
175  // the error of fit.
176  double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
177  if (error < baseline_error_ / 2.0) {
178  baseline_error_ = error;
179  baseline_pt1_ = pt1;
180  baseline_pt2_ = pt2;
181  }
182  }
183  int debug = 0;
184 #ifdef kDebugYCoord
185  Print();
186  debug = bounding_box_.bottom() < kDebugYCoord &&
187  bounding_box_.top() > kDebugYCoord
188  ? 3 : 2;
189 #endif
190  // Now we obtained a direction from that fit, see if we can improve the
191  // fit using the same direction and some other start point.
192  FCOORD direction(pt2 - pt1);
193  double target_offset = direction * pt1;
194  good_baseline_ = false;
195  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
196  // Wild lines can be produced because DetLineFit allows vertical lines, but
197  // vertical text has been rotated so angles over pi/4 should be disallowed.
198  // Near vertical lines can still be produced by vertically aligned components
199  // on very short lines.
200  double angle = BaselineAngle();
201  if (fabs(angle) > M_PI * 0.25) {
202  // Use the llsq fit as a backup.
203  baseline_pt1_ = llsq.mean_point();
204  baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
205  // TODO(rays) get rid of this when m and c are no longer used.
206  double m = llsq.m();
207  double c = llsq.c(m);
208  baseline_error_ = llsq.rms(m, c);
209  good_baseline_ = false;
210  }
211  return good_baseline_;
212 }
213 
214 // Modifies an existing result of FitBaseline to be parallel to the given
215 // direction vector if that produces a better result.
217  const FCOORD& direction) {
218  SetupBlobDisplacements(direction);
219  if (displacement_modes_.empty())
220  return;
221 #ifdef kDebugYCoord
222  if (bounding_box_.bottom() < kDebugYCoord &&
223  bounding_box_.top() > kDebugYCoord && debug < 3)
224  debug = 3;
225 #endif
226  FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
227 }
228 
229 // Modifies the baseline to snap to the textline grid if the existing
230 // result is not good enough.
232  const FCOORD& direction,
233  double line_spacing,
234  double line_offset) {
235  if (blobs_->empty()) {
236  if (debug > 1) {
237  tprintf("Row empty at:");
238  bounding_box_.print();
239  }
240  return line_offset;
241  }
242  // Find the displacement_modes_ entry nearest to the grid.
243  double best_error = 0.0;
244  int best_index = -1;
245  for (int i = 0; i < displacement_modes_.size(); ++i) {
246  double blob_y = displacement_modes_[i];
247  double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
248  line_offset);
249  if (debug > 1) {
250  tprintf("Mode at %g has error %g from model \n", blob_y, error);
251  }
252  if (best_index < 0 || error < best_error) {
253  best_error = error;
254  best_index = i;
255  }
256  }
257  // We will move the baseline only if the chosen mode is close enough to the
258  // model.
259  double model_margin = max_baseline_error_ - best_error;
260  if (best_index >= 0 && model_margin > 0.0) {
261  // But if the current baseline is already close to the mode there is no
262  // point, and only the potential to damage accuracy by changing its angle.
263  double perp_disp = PerpDisp(direction);
264  double shift = displacement_modes_[best_index] - perp_disp;
265  if (fabs(shift) > max_baseline_error_) {
266  if (debug > 1) {
267  tprintf("Attempting linespacing model fit with mode %g to row at:",
268  displacement_modes_[best_index]);
269  bounding_box_.print();
270  }
271  FitConstrainedIfBetter(debug, direction, model_margin,
272  displacement_modes_[best_index]);
273  } else if (debug > 1) {
274  tprintf("Linespacing model only moves current line by %g for row at:",
275  shift);
276  bounding_box_.print();
277  }
278  } else if (debug > 1) {
279  tprintf("Linespacing model not close enough to any mode for row at:");
280  bounding_box_.print();
281  }
282  return fmod(PerpDisp(direction), line_spacing);
283 }
284 
285 // Sets up displacement_modes_ with the top few modes of the perpendicular
286 // distance of each blob from the given direction vector, after rounding.
287 void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
288  // Set of perpendicular displacements of the blob bottoms from the required
289  // baseline direction.
290  GenericVector<double> perp_blob_dists;
291  displacement_modes_.truncate(0);
292  // Gather the skew-corrected position of every blob.
293  double min_dist = FLT_MAX;
294  double max_dist = -FLT_MAX;
295  BLOBNBOX_IT blob_it(blobs_);
296 #ifdef kDebugYCoord
297  bool debug = false;
298 #endif
299  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
300  BLOBNBOX* blob = blob_it.data();
301  const TBOX& box = blob->bounding_box();
302 #ifdef kDebugYCoord
303  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
304 #endif
305  FCOORD blob_pos((box.left() + box.right()) / 2.0f,
306  blob->baseline_position());
307  double offset = direction * blob_pos;
308  perp_blob_dists.push_back(offset);
309 #ifdef kDebugYCoord
310  if (debug) {
311  tprintf("Displacement %g for blob at:", offset);
312  box.print();
313  }
314 #endif
315  UpdateRange(offset, &min_dist, &max_dist);
316  }
317  // Set up a histogram using disp_quant_factor_ as the bucket size.
318  STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
319  IntCastRounded(max_dist / disp_quant_factor_) + 1);
320  for (int i = 0; i < perp_blob_dists.size(); ++i) {
321  dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
322  }
324  dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
325 #ifdef kDebugYCoord
326  if (debug) {
327  for (int i = 0; i < scaled_modes.size(); ++i) {
328  tprintf("Top mode = %g * %d\n",
329  scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
330  }
331  }
332 #endif
333  for (int i = 0; i < scaled_modes.size(); ++i)
334  displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
335 }
336 
337 // Fits a line in the given direction to blobs that are close to the given
338 // target_offset perpendicular displacement from the direction. The fit
339 // error is allowed to be cheat_allowance worse than the existing fit, and
340 // will still be used.
341 // If cheat_allowance > 0, the new fit will be good and replace the current
342 // fit if it has better fit (with cheat) OR its error is below
343 // max_baseline_error_ and the old fit is marked bad.
344 // Otherwise the new fit will only replace the old if it is really better,
345 // or the old fit is marked bad and the new fit has sufficient points, as
346 // well as being within the max_baseline_error_.
347 void BaselineRow::FitConstrainedIfBetter(int debug,
348  const FCOORD& direction,
349  double cheat_allowance,
350  double target_offset) {
351  double halfrange = fit_halfrange_ * direction.length();
352  double min_dist = target_offset - halfrange;
353  double max_dist = target_offset + halfrange;
354  ICOORD line_pt;
355  double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
356  debug > 2, &line_pt);
357  // Allow cheat_allowance off the new error
358  new_error -= cheat_allowance;
359  double old_angle = BaselineAngle();
360  double new_angle = direction.angle();
361  if (debug > 1) {
362  tprintf("Constrained error = %g, original = %g",
363  new_error, baseline_error_);
364  tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
365  old_angle, new_angle,
366  new_angle - old_angle, kMaxSkewDeviation);
367  }
368  bool new_good_baseline = new_error <= max_baseline_error_ &&
369  (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
370  // The new will replace the old if any are true:
371  // 1. the new error is better
372  // 2. the old is NOT good, but the new is
373  // 3. there is a wild angular difference between them (assuming that the new
374  // is a better guess at the angle.)
375  if (new_error <= baseline_error_ ||
376  (!good_baseline_ && new_good_baseline) ||
377  fabs(new_angle - old_angle) > kMaxSkewDeviation) {
378  baseline_error_ = new_error;
379  baseline_pt1_ = line_pt;
380  baseline_pt2_ = baseline_pt1_ + direction;
381  good_baseline_ = new_good_baseline;
382  if (debug > 1) {
383  tprintf("Replacing with constrained baseline, good = %d\n",
384  good_baseline_);
385  }
386  } else if (debug > 1) {
387  tprintf("Keeping old baseline\n");
388  }
389 }
390 
391 // Returns the perpendicular distance of the point from the straight
392 // baseline.
393 double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
394  FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
395  FCOORD offset_vector(pt - baseline_pt1_);
396  double distance = baseline_vector * offset_vector;
397  return sqrt(distance * distance / baseline_vector.sqlength());
398 }
399 
400 // Computes the bounding box of the row.
401 void BaselineRow::ComputeBoundingBox() {
402  BLOBNBOX_IT it(blobs_);
403  TBOX box;
404  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
405  box += it.data()->bounding_box();
406  }
407  bounding_box_ = box;
408 }
409 
410 
411 BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
412  : block_(block), debug_level_(debug_level), non_text_block_(non_text),
413  good_skew_angle_(false), skew_angle_(0.0),
414  line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
415  TO_ROW_IT row_it(block_->get_rows());
416  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
417  // Sort the blobs on the rows.
418  row_it.data()->blob_list()->sort(blob_x_order);
419  rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
420  }
421 }
422 
423 // Computes and returns the absolute error of the given perp_disp from the
424 // given linespacing model.
425 double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
426  double line_offset) {
427  // Round to the nearest multiple of line_spacing + line offset.
428  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
429  double model_y = line_spacing * multiple + line_offset;
430  return fabs(perp_disp - model_y);
431 }
432 
433 // Fits straight line baselines and computes the skew angle from the
434 // median angle. Returns true if a good angle is found.
435 // If use_box_bottoms is false, baseline positions are formed by
436 // considering the outlines of the blobs.
437 bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
438  if (non_text_block_) return false;
439  GenericVector<double> angles;
440  for (int r = 0; r < rows_.size(); ++r) {
441  BaselineRow* row = rows_[r];
442  if (row->FitBaseline(use_box_bottoms)) {
443  double angle = row->BaselineAngle();
444  angles.push_back(angle);
445  }
446  if (debug_level_ > 1)
447  row->Print();
448  }
449 
450  if (!angles.empty()) {
451  skew_angle_ = MedianOfCircularValues(M_PI, &angles);
452  good_skew_angle_ = true;
453  } else {
454  skew_angle_ = 0.0f;
455  good_skew_angle_ = false;
456  }
457  if (debug_level_ > 0) {
458  tprintf("Initial block skew angle = %g, good = %d\n",
459  skew_angle_, good_skew_angle_);
460  }
461  return good_skew_angle_;
462 }
463 
464 // Refits the baseline to a constrained angle, using the stored block
465 // skew if good enough, otherwise the supplied default skew.
466 void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
467  if (non_text_block_) return;
468  if (!good_skew_angle_) skew_angle_ = default_block_skew;
469  if (debug_level_ > 0)
470  tprintf("Adjusting block to skew angle %g\n", skew_angle_);
471  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
472  for (int r = 0; r < rows_.size(); ++r) {
473  BaselineRow* row = rows_[r];
474  row->AdjustBaselineToParallel(debug_level_, direction);
475  if (debug_level_ > 1)
476  row->Print();
477  }
478  if (rows_.size() < 3 || !ComputeLineSpacing())
479  return;
480  // Enforce the line spacing model on all lines that don't yet have a good
481  // baseline.
482  // Start by finding the row that is best fitted to the model.
483  int best_row = 0;
484  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
485  line_spacing_, line_offset_);
486  for (int r = 1; r < rows_.size(); ++r) {
487  double error = SpacingModelError(rows_[r]->PerpDisp(direction),
488  line_spacing_, line_offset_);
489  if (error < best_error) {
490  best_error = error;
491  best_row = r;
492  }
493  }
494  // Starting at the best fitting row, work outwards, syncing the offset.
495  double offset = line_offset_;
496  for (int r = best_row + 1; r < rows_.size(); ++r) {
497  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
498  line_spacing_, offset);
499  }
500  offset = line_offset_;
501  for (int r = best_row - 1; r >= 0; --r) {
502  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
503  line_spacing_, offset);
504  }
505 }
506 
507 // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
509  if (line_spacing_ > 0.0) {
510  // Where was block_line_spacing set before?
511  float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_));
512  if (min_spacing < block_->line_size)
513  block_->line_size = min_spacing;
514  block_->line_spacing = line_spacing_;
515  block_->baseline_offset = line_offset_;
516  block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
517  }
518  // Setup the parameters on all the rows.
519  TO_ROW_IT row_it(block_->get_rows());
520  for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
521  BaselineRow* row = rows_[r];
522  TO_ROW* to_row = row_it.data();
523  row->SetupOldLineParameters(to_row);
524  }
525 }
526 
527 // Processing that is required before fitting baseline splines, but requires
528 // linear baselines in order to be successful:
529 // Removes noise if required
530 // Separates out underlines
531 // Pre-associates blob fragments.
532 // TODO(rays/joeliu) This entire section of code is inherited from the past
533 // and could be improved/eliminated.
534 // page_tr is used to size a debug window.
535 void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
536  if (non_text_block_) return;
537  if (remove_noise) {
538  vigorous_noise_removal(block_);
539  }
540  FCOORD rotation(1.0f, 0.0f);
541  double gradient = tan(skew_angle_);
542  separate_underlines(block_, gradient, rotation, true);
543  pre_associate_blobs(page_tr, block_, rotation, true);
544 }
545 
546 // Fits splines to the textlines, or creates fake QSPLINES from the straight
547 // baselines that are already on the TO_ROWs.
548 // As a side-effect, computes the xheights of the rows and the block.
549 // Although x-height estimation is conceptually separate, it is part of
550 // detecting perspective distortion and therefore baseline fitting.
551 void BaselineBlock::FitBaselineSplines(bool enable_splines,
552  bool show_final_rows,
553  Textord* textord) {
554  double gradient = tan(skew_angle_);
555  FCOORD rotation(1.0f, 0.0f);
556 
557  if (enable_splines) {
558  textord->make_spline_rows(block_, gradient, show_final_rows);
559  } else {
560  // Make a fake spline from the existing line.
561  TBOX block_box= block_->block->pdblk.bounding_box();
562  TO_ROW_IT row_it = block_->get_rows();
563  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
564  TO_ROW* row = row_it.data();
565  int32_t xstarts[2] = { block_box.left(), block_box.right() };
566  double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
567  row->baseline = QSPLINE(1, xstarts, coeffs);
568  textord->compute_row_xheight(row, block_->block->classify_rotation(),
569  row->line_m(), block_->line_size);
570  }
571  }
572  textord->compute_block_xheight(block_, gradient);
573  block_->block->set_xheight(block_->xheight);
574  if (textord_restore_underlines) // fix underlines
575  restore_underlined_blobs(block_);
576 }
577 
578 // Draws the (straight) baselines and final blobs colored according to
579 // what was discarded as noise and what is associated with each row.
580 void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) {
581 #ifndef GRAPHICS_DISABLED
582  if (non_text_block_) return;
583  double gradient = tan(skew_angle_);
584  FCOORD rotation(1.0f, 0.0f);
585  int left_edge = block_->block->pdblk.bounding_box().left();
586  ScrollView* win = create_to_win(page_tr);
588  TO_ROW_IT row_it = block_->get_rows();
589  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
590  plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
591  colour = static_cast<ScrollView::Color>(colour + 1);
592  if (colour > ScrollView::MAGENTA)
593  colour = ScrollView::RED;
594  }
596  // Show discarded blobs.
597  plot_blob_list(win, &block_->underlines,
599  if (block_->blobs.length() > 0)
600  tprintf("%d blobs discarded as noise\n", block_->blobs.length());
601  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
602 #endif
603 }
604 
605 void BaselineBlock::DrawPixSpline(Pix* pix_in) {
606  if (non_text_block_) return;
607  TO_ROW_IT row_it = block_->get_rows();
608  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
609  row_it.data()->baseline.plot(pix_in);
610  }
611 }
612 
613 // Top-level line-spacing calculation. Computes an estimate of the line-
614 // spacing, using the current baselines in the TO_ROWS of the block, and
615 // then refines it by fitting a regression line to the baseline positions
616 // as a function of their integer index.
617 // Returns true if it seems that the model is a reasonable fit to the
618 // observations.
619 bool BaselineBlock::ComputeLineSpacing() {
620  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
621  GenericVector<double> row_positions;
622  ComputeBaselinePositions(direction, &row_positions);
623  if (row_positions.size() < 2) return false;
624  EstimateLineSpacing();
625  RefineLineSpacing(row_positions);
626  // Verify that the model is reasonable.
627  double max_baseline_error = kMaxBaselineError * line_spacing_;
628  int non_trivial_gaps = 0;
629  int fitting_gaps = 0;
630  for (int i = 1; i < row_positions.size(); ++i) {
631  double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
632  if (row_gap > max_baseline_error) {
633  ++non_trivial_gaps;
634  if (fabs(row_gap - line_spacing_) <= max_baseline_error)
635  ++fitting_gaps;
636  }
637  }
638  if (debug_level_ > 0) {
639  tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
640  line_spacing_, row_positions.size(), fitting_gaps,
641  non_trivial_gaps);
642  }
643  return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
644 }
645 
646 // Computes the deskewed vertical position of each baseline in the block and
647 // stores them in the given vector.
648 // This is calculated as the perpendicular distance of the middle of each
649 // baseline (in case it has a different skew angle) from the line passing
650 // through the origin parallel to the block baseline angle.
651 // NOTE that "distance" above is a signed quantity so we can tell which side
652 // of the block baseline a line sits, hence the function and argument name
653 // positions not distances.
654 void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
655  GenericVector<double>* positions) {
656  positions->clear();
657  for (int r = 0; r < rows_.size(); ++r) {
658  BaselineRow* row = rows_[r];
659  const TBOX& row_box = row->bounding_box();
660  float x_middle = (row_box.left() + row_box.right()) / 2.0f;
661  FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
662  float offset = direction * row_pos;
663  positions->push_back(offset);
664  }
665 }
666 
667 // Computes an estimate of the line spacing of the block from the median
668 // of the spacings between adjacent overlapping textlines.
669 void BaselineBlock::EstimateLineSpacing() {
670  GenericVector<float> spacings;
671  for (int r = 0; r < rows_.size(); ++r) {
672  BaselineRow* row = rows_[r];
673  // Exclude silly lines.
674  if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
675  // Find the first row after row that overlaps it significantly.
676  const TBOX& row_box = row->bounding_box();
677  int r2;
678  for (r2 = r + 1; r2 < rows_.size() &&
679  !row_box.major_x_overlap(rows_[r2]->bounding_box());
680  ++r2);
681  if (r2 < rows_.size()) {
682  BaselineRow* row2 = rows_[r2];
683  // Exclude silly lines.
684  if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
685  float spacing = row->SpaceBetween(*row2);
686  spacings.push_back(spacing);
687  }
688  }
689  // If we have at least one value, use it, otherwise leave the previous
690  // value unchanged.
691  if (!spacings.empty()) {
692  line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
693  if (debug_level_ > 1)
694  tprintf("Estimate of linespacing = %g\n", line_spacing_);
695  }
696 }
697 
698 // Refines the line spacing of the block by fitting a regression
699 // line to the deskewed y-position of each baseline as a function of its
700 // estimated line index, allowing for a small error in the initial linespacing
701 // and choosing the best available model.
702 void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
703  double spacings[3], offsets[3], errors[3];
704  int index_range;
705  errors[0] = FitLineSpacingModel(positions, line_spacing_,
706  &spacings[0], &offsets[0], &index_range);
707  if (index_range > 1) {
708  double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
709  // Try the hypotheses that there might be index_range +/- 1 line spaces.
710  errors[1] = FitLineSpacingModel(positions, spacing_plus,
711  &spacings[1], &offsets[1], nullptr);
712  double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
713  errors[2] = FitLineSpacingModel(positions, spacing_minus,
714  &spacings[2], &offsets[2], nullptr);
715  for (int i = 1; i <= 2; ++i) {
716  if (errors[i] < errors[0]) {
717  spacings[0] = spacings[i];
718  offsets[0] = offsets[i];
719  errors[0] = errors[i];
720  }
721  }
722  }
723  if (spacings[0] > 0.0) {
724  line_spacing_ = spacings[0];
725  line_offset_ = offsets[0];
726  model_error_ = errors[0];
727  if (debug_level_ > 0) {
728  tprintf("Final linespacing model = %g + offset %g, error %g\n",
729  line_spacing_, line_offset_, model_error_);
730  }
731  }
732 }
733 
734 // Given an initial estimate of line spacing (m_in) and the positions of each
735 // baseline, computes the line spacing of the block more accurately in m_out,
736 // and the corresponding intercept in c_out, and the number of spacings seen
737 // in index_delta. Returns the error of fit to the line spacing model.
738 // Uses a simple linear regression, but optimized the offset using the median.
739 double BaselineBlock::FitLineSpacingModel(
740  const GenericVector<double>& positions, double m_in,
741  double* m_out, double* c_out, int* index_delta) {
742  if (m_in == 0.0f || positions.size() < 2) {
743  *m_out = m_in;
744  *c_out = 0.0;
745  if (index_delta != nullptr) *index_delta = 0;
746  return 0.0;
747  }
748  GenericVector<double> offsets;
749  // Get the offset (remainder) linespacing for each line and choose the median.
750  for (int i = 0; i < positions.size(); ++i)
751  offsets.push_back(fmod(positions[i], m_in));
752  // Get the median offset.
753  double median_offset = MedianOfCircularValues(m_in, &offsets);
754  // Now fit a line to quantized line number and offset.
755  LLSQ llsq;
756  int min_index = INT32_MAX;
757  int max_index = -INT32_MAX;
758  for (int i = 0; i < positions.size(); ++i) {
759  double y_pos = positions[i];
760  int row_index = IntCastRounded((y_pos - median_offset) / m_in);
761  UpdateRange(row_index, &min_index, &max_index);
762  llsq.add(row_index, y_pos);
763  }
764  // Get the refined line spacing.
765  *m_out = llsq.m();
766  // Use the median offset rather than the mean.
767  offsets.truncate(0);
768  for (int i = 0; i < positions.size(); ++i)
769  offsets.push_back(fmod(positions[i], *m_out));
770  // Get the median offset.
771  if (debug_level_ > 2) {
772  for (int i = 0; i < offsets.size(); ++i)
773  tprintf("%d: %g\n", i, offsets[i]);
774  }
775  *c_out = MedianOfCircularValues(*m_out, &offsets);
776  if (debug_level_ > 1) {
777  tprintf("Median offset = %g, compared to mean of %g.\n",
778  *c_out, llsq.c(*m_out));
779  }
780  // Index_delta is the number of hypothesized line gaps present.
781  if (index_delta != nullptr)
782  *index_delta = max_index - min_index;
783  // Use the regression model's intercept to compute the error, as it may be
784  // a full line-spacing in disagreement with the median.
785  double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
786  if (debug_level_ > 1) {
787  tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
788  m_in, median_offset, *m_out, *c_out, rms_error);
789  }
790  return rms_error;
791 }
792 
793 BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
794  TO_BLOCK_LIST* blocks)
795  : page_skew_(page_skew), debug_level_(debug_level) {
796  TO_BLOCK_IT it(blocks);
797  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
798  TO_BLOCK* to_block = it.data();
799  BLOCK* block = to_block->block;
800  POLY_BLOCK* pb = block->pdblk.poly_block();
801  // A note about non-text blocks.
802  // On output, non-text blocks are supposed to contain a single empty word
803  // in each incoming text line. These mark out the polygonal bounds of the
804  // block. Ideally no baselines should be required, but currently
805  // make_words crashes if a baseline and xheight are not provided, so we
806  // include non-text blocks here, but flag them for special treatment.
807  bool non_text = pb != nullptr && !pb->IsText();
808  blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
809  }
810 }
811 
812 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
813 // block-wise and page-wise data to smooth small blocks/rows, and applies
814 // smoothing based on block/page-level skew and block-level linespacing.
815 void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
816  GenericVector<double> block_skew_angles;
817  for (int i = 0; i < blocks_.size(); ++i) {
818  BaselineBlock* bl_block = blocks_[i];
819  if (debug_level_ > 0)
820  tprintf("Fitting initial baselines...\n");
821  if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
822  block_skew_angles.push_back(bl_block->skew_angle());
823  }
824  }
825  // Compute a page-wide default skew for blocks with too little information.
826  double default_block_skew = page_skew_.angle();
827  if (!block_skew_angles.empty()) {
828  default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
829  }
830  if (debug_level_ > 0) {
831  tprintf("Page skew angle = %g\n", default_block_skew);
832  }
833  // Set bad lines in each block to the default block skew and then force fit
834  // a linespacing model where it makes sense to do so.
835  for (int i = 0; i < blocks_.size(); ++i) {
836  BaselineBlock* bl_block = blocks_[i];
837  bl_block->ParallelizeBaselines(default_block_skew);
838  bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
839  }
840 }
841 
842 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
843 // other associated side-effects, including pre-associating blobs, computing
844 // x-heights and displaying debug information.
845 // NOTE that ComputeStraightBaselines must have been called first as this
846 // sets up data in the TO_ROWs upon which this function depends.
848  bool enable_splines,
849  bool remove_noise,
850  bool show_final_rows,
851  Textord* textord) {
852  for (int i = 0; i < blocks_.size(); ++i) {
853  BaselineBlock* bl_block = blocks_[i];
854  if (enable_splines)
855  bl_block->PrepareForSplineFitting(page_tr, remove_noise);
856  bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
857  if (show_final_rows) {
858  bl_block->DrawFinalRows(page_tr);
859  }
860  }
861 }
862 
863 } // namespace tesseract.
QSPLINE baseline
Definition: blobbox.h:683
BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
double c(double m) const
Definition: linlsq.cpp:116
const double kMaxBaselineError
bool FitBaselinesAndFindSkew(bool use_box_bottoms)
void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise)
int size() const
Definition: genericvector.h:71
float line_m() const
Definition: blobbox.h:583
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1847
void print() const
Definition: rect.h:278
void DrawPixSpline(Pix *pix_in)
Definition: rect.h:34
bool FitBaseline(bool use_box_bottoms)
const int kMaxDisplacementsModes
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1368
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:34
void SetupOldLineParameters(TO_ROW *row) const
const double kFitHalfrangeFactor
float angle() const
find angle
Definition: points.h:248
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
double PerpDisp(const FCOORD &direction) const
const double kOffsetQuantizationFactor
const double kMinFittingLinespacings
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1087
double BaselineAngle() const
void draw_meanlines(TO_BLOCK *block, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:209
Definition: statistc.h:33
const double kMaxBaselineError
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
void compute_block_xheight(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:1256
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2575
float line_spacing
Definition: blobbox.h:792
double SpaceBetween(const BaselineRow &other) const
int16_t width() const
Definition: rect.h:115
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:616
void add(double x, double y)
Definition: linlsq.cpp:48
bool SufficientPointsForIndependentFit() const
Definition: detlinefit.cpp:162
float max_blob_size
Definition: blobbox.h:799
FCOORD mean_point() const
Definition: linlsq.cpp:166
int16_t left() const
Definition: rect.h:72
float baseline_offset
Definition: blobbox.h:800
int16_t top() const
Definition: rect.h:58
float line_c() const
Definition: blobbox.h:586
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:624
void AdjustBaselineToParallel(int debug, const FCOORD &direction)
integer coordinate
Definition: points.h:32
BaselineRow(double line_size, TO_ROW *to_row)
void EstimateBaselinePosition()
Definition: blobbox.cpp:358
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:412
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
int IntCastRounded(double x)
Definition: helpers.h:168
double m() const
Definition: linlsq.cpp:100
FCOORD classify_rotation() const
Definition: ocrblock.h:142
float xheight
Definition: blobbox.h:801
double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, double line_offset)
POLY_BLOCK * poly_block() const
Definition: pdblock.h:56
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1774
Definition: linlsq.h:28
int choose_nth_item(int target_index)
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
bool empty() const
Definition: genericvector.h:90
void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord)
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool IsText() const
Definition: polyblk.h:49
Definition: ocrblock.h:30
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:124
EXTERN bool textord_restore_underlines
Definition: underlin.cpp:26
BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
int push_back(T object)
const double kMaxSkewDeviation
BLOCK * block
Definition: blobbox.h:790
void make_spline_rows(TO_BLOCK *block, float gradient, bool testing_on)
Definition: makerow.cpp:2005
void ComputeStraightBaselines(bool use_box_bottoms)
double rms(double m, double c) const
Definition: linlsq.cpp:130
void set_xheight(int32_t height)
set char size
Definition: ocrblock.h:70
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:60
TO_BLOCK * block() const
Definition: points.h:189
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:467
const TBOX & bounding_box() const
Definition: blobbox.h:231
void ParallelizeBaselines(double default_block_skew)
void DrawFinalRows(const ICOORD &page_tr)
int16_t right() const
Definition: rect.h:79
float x() const
Definition: points.h:208
int baseline_position() const
Definition: blobbox.h:390
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
void truncate(int size)
double StraightYAtX(double x) const
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:46
T MedianOfCircularValues(T modulus, GenericVector< T > *v)
Definition: linlsq.h:113
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:130
int16_t bottom() const
Definition: rect.h:65
BLOBNBOX_LIST underlines
Definition: blobbox.h:786
PDBLK pdblk
Definition: ocrblock.h:192
const double kMaxBlobSizeMultiple
float y() const
Definition: points.h:211
const int kNumSkipPoints
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:121
float line_size
Definition: blobbox.h:798