tesseract  5.0.0-alpha-619-ge9db
cjkpitch.cpp
Go to the documentation of this file.
1 // File: cjkpitch.cpp
3 // Description: Code to determine fixed pitchness and the pitch if fixed,
4 // for CJK text.
5 // Author: takenaka@google.com (Hiroshi Takenaka)
6 //
7 // Copyright 2011 Google Inc. All Rights Reserved.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "cjkpitch.h"
22 #include "topitch.h"
23 #include "tovars.h"
24 
25 #include <algorithm>
26 #include <vector> // for std::vector
27 
28 static BOOL_VAR(textord_space_size_is_variable, false,
29  "If true, word delimiter spaces are assumed to have "
30  "variable width, even though characters have fixed pitch.");
31 
32 namespace {
33 
34 // Allow +/-10% error for character pitch / body size.
35 static const float kFPTolerance = 0.1;
36 
37 // Minimum ratio of "good" character pitch for a row to be considered
38 // to be fixed-pitch.
39 static const float kFixedPitchThreshold = 0.35;
40 
41 // rank statistics for a small collection of float values.
42 class SimpleStats {
43  public:
44  SimpleStats(): finalized_(false), values_() { }
45  ~SimpleStats() { }
46 
47  void Clear() {
48  values_.clear();
49  finalized_ = false;
50  }
51 
52  void Add(float value) {
53  values_.push_back(value);
54  finalized_ = false;
55  }
56 
57  void Finish() {
58  values_.sort(float_compare);
59  finalized_ = true;
60  }
61 
62  float ile(double frac) {
63  if (!finalized_) Finish();
64  if (values_.empty()) return 0.0;
65  if (frac >= 1.0) return values_.back();
66  if (frac <= 0.0 || values_.size() == 1) return values_[0];
67  int index = static_cast<int>((values_.size() - 1) * frac);
68  float reminder = (values_.size() - 1) * frac - index;
69 
70  return values_[index] * (1.0 - reminder) +
71  values_[index + 1] * reminder;
72  }
73 
74  float median() {
75  return ile(0.5);
76  }
77 
78  float minimum() {
79  if (!finalized_) Finish();
80  if (values_.empty()) return 0.0;
81  return values_[0];
82  }
83 
84  int size() const {
85  return values_.size();
86  }
87 
88  private:
89  static int float_compare(const void* a, const void* b) {
90  const auto* f_a = static_cast<const float*>(a);
91  const auto* f_b = static_cast<const float*>(b);
92  return (*f_a > *f_b) ? 1 : ((*f_a < *f_b) ? -1 : 0);
93  }
94 
95  bool finalized_;
96  GenericVector<float> values_;
97 };
98 
99 // statistics for a small collection of float pairs (x, y).
100 // EstimateYFor(x, r) returns the estimated y at x, based on
101 // existing samples between x*(1-r) ~ x*(1+r).
102 class LocalCorrelation {
103  public:
104  struct float_pair {
105  float x, y;
106  int vote;
107  };
108 
109  LocalCorrelation(): finalized_(false) { }
110  ~LocalCorrelation() { }
111 
112  void Finish() {
113  values_.sort(float_pair_compare);
114  finalized_ = true;
115  }
116 
117  void Clear() {
118  finalized_ = false;
119  }
120 
121  void Add(float x, float y, int v) {
122  struct float_pair value;
123  value.x = x;
124  value.y = y;
125  value.vote = v;
126  values_.push_back(value);
127  finalized_ = false;
128  }
129 
130  float EstimateYFor(float x, float r) {
131  ASSERT_HOST(finalized_);
132  int start = 0, end = values_.size();
133  // Because the number of samples (used_) is assumed to be small,
134  // just use linear search to find values within the range.
135  while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++;
136  while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--;
137 
138  // Fall back to the global average if there are no data within r
139  // of x.
140  if (start >= end) {
141  start = 0;
142  end = values_.size();
143  }
144 
145  // Compute weighted average of the values.
146  float rc = 0;
147  int vote = 0;
148  for (int i = start; i < end; i++) {
149  rc += values_[i].vote * x * values_[i].y / values_[i].x;
150  vote += values_[i].vote;
151  }
152 
153  return rc / vote;
154  }
155 
156  private:
157  static int float_pair_compare(const void* a, const void* b) {
158  const auto* f_a = static_cast<const float_pair*>(a);
159  const auto* f_b = static_cast<const float_pair*>(b);
160  return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0);
161  }
162 
163  bool finalized_;
165 };
166 
167 // Class to represent a character on a fixed pitch row. A FPChar may
168 // consist of multiple blobs (BLOBNBOX's).
169 class FPChar {
170  public:
171  enum Alignment {
172  ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD
173  };
174 
175  FPChar(): box_(), real_body_(),
176  from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0),
177  final_(false), alignment_(ALIGN_UNKNOWN),
178  merge_to_prev_(false), delete_flag_(false) {
179  }
180 
181  // Initialize from blob.
182  void Init(BLOBNBOX *blob) {
183  box_ = blob->bounding_box();
184  real_body_ = box_;
185  from_ = to_ = blob;
186  num_blobs_ = 1;
187  }
188 
189  // Merge this character with "next". The "next" character should
190  // consist of succeeding blobs on the same row.
191  void Merge(const FPChar &next) {
192  int gap = real_body_.x_gap(next.real_body_);
193  if (gap > max_gap_) max_gap_ = gap;
194 
195  box_ += next.box_;
196  real_body_ += next.real_body_;
197  to_ = next.to_;
198  num_blobs_ += next.num_blobs_;
199  }
200 
201  // Accessors.
202  const TBOX &box() const { return box_; }
203  void set_box(const TBOX &box) {
204  box_ = box;
205  }
206  const TBOX &real_body() const { return real_body_; }
207 
208  bool is_final() const { return final_; }
209  void set_final(bool flag) {
210  final_ = flag;
211  }
212 
213  const Alignment& alignment() const {
214  return alignment_;
215  }
216  void set_alignment(Alignment alignment) {
217  alignment_ = alignment;
218  }
219 
220  bool merge_to_prev() const {
221  return merge_to_prev_;
222  }
223  void set_merge_to_prev(bool flag) {
224  merge_to_prev_ = flag;
225  }
226 
227  bool delete_flag() const {
228  return delete_flag_;
229  }
230  void set_delete_flag(bool flag) {
231  delete_flag_ = flag;
232  }
233 
234  int max_gap() const {
235  return max_gap_;
236  }
237 
238  int num_blobs() const {
239  return num_blobs_;
240  }
241 
242  private:
243  TBOX box_; // Rectangle region considered to be occupied by this
244  // character. It could be bigger than the bounding box.
245  TBOX real_body_; // Real bounding box of this character.
246  BLOBNBOX *from_; // The first blob of this character.
247  BLOBNBOX *to_; // The last blob of this character.
248  int num_blobs_; // Number of blobs that belong to this character.
249  int max_gap_; // Maximum x gap between the blobs.
250 
251  bool final_; // True if alignment/fragmentation decision for this
252  // character is finalized.
253 
254  Alignment alignment_; // Alignment status.
255  bool merge_to_prev_; // True if this is a fragmented blob that
256  // needs to be merged to the previous
257  // character.
258 
259  int delete_flag_; // True if this character is merged to another
260  // one and needs to be deleted.
261 };
262 
263 // Class to represent a fixed pitch row, as a linear collection of
264 // FPChar's.
265 class FPRow {
266  public:
267  FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(),
268  heights_(), characters_() {
269  }
270 
271  ~FPRow() { }
272 
273  // Initialize from TD_ROW.
274  void Init(TO_ROW *row);
275 
276  // Estimate character pitch of this row, based on current alignment
277  // status of underlying FPChar's. The argument pass1 can be set to
278  // true if the function is called after Pass1Analyze(), to eliminate
279  // some redundant computation.
280  void EstimatePitch(bool pass1);
281 
282  // Check each character if it has good character pitches between its
283  // predecessor and its successor and set its alignment status. If
284  // we already calculated the estimated pitch for this row, the value
285  // is used. If we didn't, a character is considered to be good, if
286  // the pitches between its predecessor and its successor are almost
287  // equal.
288  void Pass1Analyze();
289 
290  // Find characters that fit nicely into one imaginary body next to a
291  // character which is already finalized. Then mark them as character
292  // fragments.
293  bool Pass2Analyze();
294 
295  // Merge FPChars marked as character fragments into one.
296  void MergeFragments();
297 
298  // Finalize characters that are already large enough and cannot be
299  // merged with others any more.
300  void FinalizeLargeChars();
301 
302  // Output pitch estimation results to attributes of TD_ROW.
303  void OutputEstimations();
304 
305  void DebugOutputResult(int row_index);
306 
307  int good_pitches() {
308  return good_pitches_.size();
309  }
310 
311  float pitch() {
312  return pitch_;
313  }
314 
315  float estimated_pitch() {
316  return estimated_pitch_;
317  }
318 
319  void set_estimated_pitch(float v) {
320  estimated_pitch_ = v;
321  }
322 
323  float height() {
324  return height_;
325  }
326 
327  float height_pitch_ratio() {
328  if (good_pitches_.size() < 2) return -1.0;
329  return height_ / good_pitches_.median();
330  }
331 
332  float gap() {
333  return gap_;
334  }
335 
336  size_t num_chars() {
337  return characters_.size();
338  }
339  FPChar *character(int i) {
340  return &characters_[i];
341  }
342 
343  const TBOX &box(int i) {
344  return characters_[i].box();
345  }
346 
347  const TBOX &real_body(int i) {
348  return characters_[i].real_body();
349  }
350 
351  bool is_box_modified(int i) {
352  return !(characters_[i].box() == characters_[i].real_body());
353  }
354 
355  float center_x(int i) {
356  return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
357  }
358 
359  bool is_final(int i) {
360  return characters_[i].is_final();
361  }
362 
363  void finalize(int i) {
364  characters_[i].set_final(true);
365  }
366 
367  bool is_good(int i) {
368  return characters_[i].alignment() == FPChar::ALIGN_GOOD;
369  }
370 
371  void mark_good(int i) {
372  characters_[i].set_alignment(FPChar::ALIGN_GOOD);
373  }
374 
375  void mark_bad(int i) {
376  characters_[i].set_alignment(FPChar::ALIGN_BAD);
377  }
378 
379  void clear_alignment(int i) {
380  characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
381  }
382 
383  private:
384  static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) {
385  if (std::min(box1.width(), box2.width()) == 0) return 0.0;
386  return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
387  }
388 
389  static bool mostly_overlap(const TBOX& box1, const TBOX& box2) {
390  return x_overlap_fraction(box1, box2) > 0.9;
391  }
392 
393  static bool significant_overlap(const TBOX& box1, const TBOX& box2) {
394  if (std::min(box1.width(), box2.width()) == 0) return false;
395  int overlap = -box1.x_gap(box2);
396  return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
397  }
398 
399  static float box_pitch(const TBOX& ref, const TBOX& box) {
400  return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
401  }
402 
403  // Check if two neighboring characters satisfy the fixed pitch model.
404  static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) {
405  // Character box shouldn't exceed pitch.
406  if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
407  box2.width() >= pitch * (1.0 + kFPTolerance) ||
408  box1.height() >= pitch * (1.0 + kFPTolerance) ||
409  box2.height() >= pitch * (1.0 + kFPTolerance)) return false;
410 
411  const float real_pitch = box_pitch(box1, box2);
412  if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true;
413 
414  if (textord_space_size_is_variable) {
415  // Hangul characters usually have fixed pitch, but words are
416  // delimited by space which can be narrower than characters.
417  if (real_pitch > pitch && real_pitch < pitch * 2.0 &&
418  real_pitch - box1.x_gap(box2) < pitch) {
419  return true;
420  }
421  }
422  return false;
423  }
424 
425  static bool is_interesting_blob(const BLOBNBOX *blob) {
426  return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
427  }
428 
429  // Cleanup chars that are already merged to others.
430  void DeleteChars() {
431  int index = 0;
432  for (int i = 0; i < characters_.size(); ++i) {
433  if (!characters_[i].delete_flag()) {
434  if (index != i) characters_[index] = characters_[i];
435  index++;
436  }
437  }
438  characters_.truncate(index);
439  }
440 
441  float pitch_ = 0.0f; // Character pitch.
442  float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered
443  // to be good enough.
444  float height_ = 0.0f; // Character height.
445  float gap_ = 0.0f; // Minimum gap between characters.
446 
447  // Pitches between any two successive characters.
448  SimpleStats all_pitches_;
449  // Gaps between any two successive characters.
450  SimpleStats all_gaps_;
451  // Pitches between any two successive characters that are consistent
452  // with the fixed pitch model.
453  SimpleStats good_pitches_;
454  // Gaps between any two successive characters that are consistent
455  // with the fixed pitch model.
456  SimpleStats good_gaps_;
457 
458  SimpleStats heights_;
459 
460  GenericVector<FPChar> characters_;
461  TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row.
462 };
463 
464 void FPRow::Init(TO_ROW *row) {
465  ASSERT_HOST(row != nullptr);
466  ASSERT_HOST(row->xheight > 0);
467  real_row_ = row;
468  real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
469 
470  BLOBNBOX_IT blob_it = row->blob_list();
471  // Initialize characters_ and compute the initial estimation of
472  // character height.
473  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
474  if (is_interesting_blob(blob_it.data())) {
475  FPChar fp_char;
476  fp_char.Init(blob_it.data());
477  // Merge unconditionally if two blobs overlap.
478  if (!characters_.empty() &&
479  significant_overlap(fp_char.box(), characters_.back().box())) {
480  characters_.back().Merge(fp_char);
481  } else {
482  characters_.push_back(fp_char);
483  }
484  TBOX bound = blob_it.data()->bounding_box();
485  if (bound.height() * 3.0 > bound.width()) {
486  heights_.Add(bound.height());
487  }
488  }
489  }
490  heights_.Finish();
491  height_ = heights_.ile(0.875);
492 }
493 
494 void FPRow::OutputEstimations() {
495  if (good_pitches_.size() == 0) {
496  pitch_ = 0.0f;
497  real_row_->pitch_decision = PITCH_CORR_PROP;
498  return;
499  }
500 
501  pitch_ = good_pitches_.median();
502  real_row_->fixed_pitch = pitch_;
503  // good_gaps_.ile(0.125) can be large if most characters on the row
504  // are skinny. Use pitch_ - height_ instead if it's smaller, but
505  // positive.
506  real_row_->kern_size = real_row_->pr_nonsp =
507  std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
508  real_row_->body_size = pitch_ - real_row_->kern_size;
509 
510  if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
511  // If more than half of the characters of a line don't fit to the
512  // fixed pitch model, consider the line to be proportional. 50%
513  // seems to be a good threshold in practice as well.
514  // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
515  // real_row_ as a partial estimation result and try to use them in the
516  // normalization process.
517  real_row_->pitch_decision = PITCH_CORR_PROP;
518  return;
519  } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
520  real_row_->pitch_decision = PITCH_DEF_FIXED;
521  } else {
522  real_row_->pitch_decision = PITCH_CORR_FIXED;
523  }
524 
525  real_row_->space_size = real_row_->pr_space = pitch_;
526  // Set min_space to 50% of character pitch so that we can break CJK
527  // text at a half-width space after punctuation.
528  real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
529 
530  // Don't consider a quarter space as a real space, because it's used
531  // for line justification in traditional Japanese books.
532  real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(),
533  static_cast<double>(good_gaps_.ile(0.875)));
534 
535  int space_threshold =
536  std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
537  static_cast<int>(real_row_->xheight));
538 
539  // Make max_nonspace larger than any intra-character gap so that
540  // make_prop_words() won't break a row at the middle of a character.
541  for (size_t i = 0; i < num_chars(); ++i) {
542  if (characters_[i].max_gap() > real_row_->max_nonspace) {
543  real_row_->max_nonspace = characters_[i].max_gap();
544  }
545  }
546  real_row_->space_threshold =
547  std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
548  static_cast<int>(real_row_->xheight));
549  real_row_->used_dm_model = false;
550 
551  // Setup char_cells.
552  ICOORDELT_IT cell_it = &real_row_->char_cells;
553  auto *cell = new ICOORDELT(real_body(0).left(), 0);
554  cell_it.add_after_then_move(cell);
555 
556  int right = real_body(0).right();
557  for (size_t i = 1; i < num_chars(); ++i) {
558  // Put a word break if gap between two characters is bigger than
559  // space_threshold. Don't break if none of two characters
560  // couldn't be "finalized", because maybe they need to be merged
561  // to one character.
562  if ((is_final(i - 1) || is_final(i)) &&
563  real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
564  cell = new ICOORDELT(right + 1, 0);
565  cell_it.add_after_then_move(cell);
566  while (right + pitch_ < box(i).left()) {
567  right += pitch_;
568  cell = new ICOORDELT(right + 1, 0);
569  cell_it.add_after_then_move(cell);
570  }
571  right = box(i).left();
572  }
573  cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
574  cell_it.add_after_then_move(cell);
575  right = real_body(i).right();
576  }
577 
578  cell = new ICOORDELT(right + 1, 0);
579  cell_it.add_after_then_move(cell);
580 
581  // TODO(takenaka): add code to store alignment/fragmentation
582  // information to blobs so that it can be reused later, e.g. in
583  // recognition phase.
584 }
585 
586 void FPRow::EstimatePitch(bool pass1) {
587  good_pitches_.Clear();
588  all_pitches_.Clear();
589  good_gaps_.Clear();
590  all_gaps_.Clear();
591  heights_.Clear();
592  if (num_chars() == 0) return;
593 
594  int32_t cx0, cx1;
595  bool prev_was_good = is_good(0);
596  cx0 = center_x(0);
597 
598  heights_.Add(box(0).height());
599  for (size_t i = 1; i < num_chars(); i++) {
600  cx1 = center_x(i);
601  int32_t pitch = cx1 - cx0;
602  int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
603 
604  heights_.Add(box(i).height());
605  // Ignore if the pitch is too close. But don't ignore wide pitch
606  // may be the result of large tracking.
607  if (pitch > height_ * 0.5) {
608  all_pitches_.Add(pitch);
609  all_gaps_.Add(gap);
610  if (is_good(i)) {
611  // In pass1 (after Pass1Analyze()), all characters marked as
612  // "good" have a good consistent pitch with their previous
613  // characters. However, it's not true in pass2 and a good
614  // character may have a good pitch only between its successor.
615  // So we collect only pitch values between two good
616  // characters. and within tolerance in pass2.
617  if (pass1 || (prev_was_good &&
618  fabs(estimated_pitch_ - pitch) <
619  kFPTolerance * estimated_pitch_)) {
620  good_pitches_.Add(pitch);
621  if (!is_box_modified(i - 1) && !is_box_modified(i)) {
622  good_gaps_.Add(gap);
623  }
624  }
625  prev_was_good = true;
626  } else {
627  prev_was_good = false;
628  }
629  }
630  cx0 = cx1;
631  }
632 
633  good_pitches_.Finish();
634  all_pitches_.Finish();
635  good_gaps_.Finish();
636  all_gaps_.Finish();
637  heights_.Finish();
638 
639  height_ = heights_.ile(0.875);
640  if (all_pitches_.size() == 0) {
641  pitch_ = 0.0f;
642  gap_ = 0.0f;
643  } else if (good_pitches_.size() < 2) {
644  // We don't have enough data to estimate the pitch of this row yet.
645  // Use median of all pitches as the initial guess.
646  pitch_ = all_pitches_.median();
647  ASSERT_HOST(pitch_ > 0.0f);
648  gap_ = all_gaps_.ile(0.125);
649  } else {
650  pitch_ = good_pitches_.median();
651  ASSERT_HOST(pitch_ > 0.0f);
652  gap_ = good_gaps_.ile(0.125);
653  }
654 }
655 
656 void FPRow::DebugOutputResult(int row_index) {
657  if (num_chars() > 0) {
658  tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
659  "space_size=%f, space_threshold=%d, xheight=%f\n",
660  row_index, static_cast<int>(real_row_->pitch_decision),
661  real_row_->fixed_pitch, real_row_->max_nonspace,
662  real_row_->space_size, real_row_->space_threshold,
663  real_row_->xheight);
664 
665  for (unsigned i = 0; i < num_chars(); i++) {
666  tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ",
667  i, is_final(i), is_good(i), character(i)->num_blobs());
668  box(i).print();
669  }
670  }
671 }
672 
673 void FPRow::Pass1Analyze() {
674  if (num_chars() < 2) return;
675 
676  if (estimated_pitch_ > 0.0f) {
677  for (size_t i = 2; i < num_chars(); i++) {
678  if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) &&
679  is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
680  mark_good(i - 1);
681  }
682  }
683  } else {
684  for (size_t i = 2; i < num_chars(); i++) {
685  if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) {
686  mark_good(i - 1);
687  }
688  }
689  }
690  character(0)->set_alignment(character(1)->alignment());
691  character(num_chars() - 1)->set_alignment(
692  character(num_chars() - 2)->alignment());
693 }
694 
695 bool FPRow::Pass2Analyze() {
696  bool changed = false;
697  if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
698  return false;
699  }
700  for (size_t i = 0; i < num_chars(); i++) {
701  if (is_final(i)) continue;
702 
703  FPChar::Alignment alignment = character(i)->alignment();
704  bool intersecting = false;
705  bool not_intersecting = false;
706 
707  if (i < num_chars() - 1 && is_final(i + 1)) {
708  // Next character is already finalized. Estimate the imaginary
709  // body including this character based on the character. Skip
710  // whitespace if necessary.
711  bool skipped_whitespaces = false;
712  float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
713  while (c1 > box(i).right()) {
714  skipped_whitespaces = true;
715  c1 -= estimated_pitch_;
716  }
717  TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
718 
719  // Collect all characters that mostly fit in the region.
720  // Also, their union height shouldn't be too big.
721  int j = i;
722  TBOX merged;
723  while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
724  merged.bounding_union(box(j)).height() <
725  estimated_pitch_ * (1 + kFPTolerance)) {
726  merged += box(j);
727  j--;
728  }
729 
730  if (j >= 0 && significant_overlap(ibody, box(j))) {
731  // character(j) lies on the character boundary and doesn't fit
732  // well into the imaginary body.
733  if (!is_final(j)) intersecting = true;
734  } else {
735  not_intersecting = true;
736  if (i - j > 0) {
737  // Merge character(j+1) ... character(i) because they fit
738  // into the body nicely.
739  if (i - j == 1) {
740  // Only one char in the imaginary body.
741  if (!skipped_whitespaces) mark_good(i);
742  // set ibody as bounding box of this character to get
743  // better pitch analysis result for halfwidth glyphs
744  // followed by a halfwidth space.
745  if (box(i).width() <= estimated_pitch_ * 0.5) {
746  ibody += box(i);
747  character(i)->set_box(ibody);
748  }
749  character(i)->set_merge_to_prev(false);
750  finalize(i);
751  } else {
752  for (int k = i; k > j + 1; k--) {
753  character(k)->set_merge_to_prev(true);
754  }
755  }
756  }
757  }
758  }
759  if (i > 0 && is_final(i - 1)) {
760  // Now we repeat everything from the opposite side. Previous
761  // character is already finalized. Estimate the imaginary body
762  // including this character based on the character.
763  bool skipped_whitespaces = false;
764  float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
765  while (c1 < box(i).left()) {
766  skipped_whitespaces = true;
767  c1 += estimated_pitch_;
768  }
769  TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
770 
771  size_t j = i;
772  TBOX merged;
773  while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
774  merged.bounding_union(box(j)).height() <
775  estimated_pitch_ * (1 + kFPTolerance)) {
776  merged += box(j);
777  j++;
778  }
779 
780  if (j < num_chars() && significant_overlap(ibody, box(j))) {
781  if (!is_final(j)) intersecting = true;
782  } else {
783  not_intersecting = true;
784  if (j - i > 0) {
785  if (j - i == 1) {
786  if (!skipped_whitespaces) mark_good(i);
787  if (box(i).width() <= estimated_pitch_ * 0.5) {
788  ibody += box(i);
789  character(i)->set_box(ibody);
790  }
791  character(i)->set_merge_to_prev(false);
792  finalize(i);
793  } else {
794  for (size_t k = i + 1; k < j; k++) {
795  character(k)->set_merge_to_prev(true);
796  }
797  }
798  }
799  }
800  }
801 
802  // This character doesn't fit well into the estimated imaginary
803  // bodies. Mark it as bad.
804  if (intersecting && !not_intersecting) mark_bad(i);
805  if (character(i)->alignment() != alignment ||
806  character(i)->merge_to_prev()) {
807  changed = true;
808  }
809  }
810 
811  return changed;
812 }
813 
814 void FPRow::MergeFragments() {
815  int last_char = 0;
816 
817  for (size_t j = 0; j < num_chars(); ++j) {
818  if (character(j)->merge_to_prev()) {
819  character(last_char)->Merge(*character(j));
820  character(j)->set_delete_flag(true);
821  clear_alignment(last_char);
822  character(j-1)->set_merge_to_prev(false);
823  } else {
824  last_char = j;
825  }
826  }
827  DeleteChars();
828 }
829 
830 void FPRow::FinalizeLargeChars() {
831  float row_pitch = estimated_pitch();
832  for (size_t i = 0; i < num_chars(); i++) {
833  if (is_final(i)) continue;
834 
835  // Finalize if both neighbors are finalized. We have no other choice.
836  if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
837  finalize(i);
838  continue;
839  }
840 
841  float cx = center_x(i);
842  TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
843  if (i > 0) {
844  // The preceding character significantly intersects with the
845  // imaginary body of this character. Let Pass2Analyze() handle
846  // this case.
847  if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue;
848  if (!is_final(i - 1)) {
849  TBOX merged = box(i);
850  merged += box(i - 1);
851  if (merged.width() < row_pitch) continue;
852  // This character cannot be finalized yet because it can be
853  // merged with the previous one. Again, let Pass2Analyze()
854  // handle this case.
855  }
856  }
857  if (i < num_chars() - 1) {
858  if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue;
859  if (!is_final(i + 1)) {
860  TBOX merged = box(i);
861  merged += box(i + 1);
862  if (merged.width() < row_pitch) continue;
863  }
864  }
865  finalize(i);
866  }
867 
868  // Update alignment decision. We only consider finalized characters
869  // in pass2. E.g. if a finalized character C has another finalized
870  // character L on its left and a not-finalized character R on its
871  // right, we mark C as good if the pitch between C and L is good,
872  // regardless of the pitch between C and R.
873  for (size_t i = 0; i < num_chars(); i++) {
874  if (!is_final(i)) continue;
875  bool good_pitch = false;
876  bool bad_pitch = false;
877  if (i > 0 && is_final(i - 1)) {
878  if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
879  good_pitch = true;
880  } else {
881  bad_pitch = true;
882  }
883  }
884  if (i < num_chars() - 1 && is_final(i + 1)) {
885  if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
886  good_pitch = true;
887  } else {
888  bad_pitch = true;
889  }
890  }
891  if (good_pitch && !bad_pitch) mark_good(i);
892  else if (!good_pitch && bad_pitch) mark_bad(i);
893  }
894 }
895 
896 class FPAnalyzer {
897  public:
898  FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
899  ~FPAnalyzer() { }
900 
901  void Pass1Analyze() {
902  for (auto & row : rows_) row.Pass1Analyze();
903  }
904 
905  // Estimate character pitch for each row. The argument pass1 can be
906  // set to true if the function is called after Pass1Analyze(), to
907  // eliminate some redundant computation.
908  void EstimatePitch(bool pass1);
909 
910  bool maybe_fixed_pitch() {
911  if (rows_.empty() ||
912  rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false;
913  return true;
914  }
915 
916  void MergeFragments() {
917  for (auto & row : rows_) row.MergeFragments();
918  }
919 
920  void FinalizeLargeChars() {
921  for (auto & row : rows_) row.FinalizeLargeChars();
922  }
923 
924  bool Pass2Analyze() {
925  bool changed = false;
926  for (auto & row : rows_) {
927  if (row.Pass2Analyze()) {
928  changed = true;
929  }
930  }
931  return changed;
932  }
933 
934  void OutputEstimations() {
935  for (auto & row : rows_) row.OutputEstimations();
936  // Don't we need page-level estimation of gaps/spaces?
937  }
938 
939  void DebugOutputResult() {
940  tprintf("FPAnalyzer: final result\n");
941  for (size_t i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i);
942  }
943 
944  size_t num_rows() {
945  return rows_.size();
946  }
947 
948  // Returns the upper limit for pass2 loop iteration.
949  unsigned max_iteration() {
950  // We're fixing at least one character per iteration. So basically
951  // we shouldn't require more than max_chars_per_row_ iterations.
952  return max_chars_per_row_ + 100;
953  }
954 
955  private:
956  ICOORD page_tr_;
957  std::vector<FPRow> rows_;
958  unsigned num_tall_rows_;
959  unsigned num_bad_rows_;
960  // TODO: num_empty_rows_ is incremented, but never used otherwise.
961  unsigned num_empty_rows_;
962  unsigned max_chars_per_row_;
963 };
964 
965 FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
966 : page_tr_(page_tr),
967  num_tall_rows_(0),
968  num_bad_rows_(0),
969  num_empty_rows_(0),
970  max_chars_per_row_(0)
971 {
972  TO_BLOCK_IT block_it(port_blocks);
973 
974  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
975  block_it.forward()) {
976  TO_BLOCK *block = block_it.data();
977  if (!block->get_rows()->empty()) {
978  ASSERT_HOST(block->xheight > 0);
979  find_repeated_chars(block, false);
980  }
981  }
982 
983  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
984  block_it.forward()) {
985  TO_ROW_IT row_it = block_it.data()->get_rows();
986  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
987  FPRow row;
988  row.Init(row_it.data());
989  rows_.push_back(row);
990  size_t num_chars = rows_.back().num_chars();
991  if (num_chars <= 1) num_empty_rows_++;
992  if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
993  }
994  }
995 }
996 
997 void FPAnalyzer::EstimatePitch(bool pass1) {
998  LocalCorrelation pitch_height_stats;
999 
1000  num_tall_rows_ = 0;
1001  num_bad_rows_ = 0;
1002  pitch_height_stats.Clear();
1003  for (auto & row : rows_) {
1004  row.EstimatePitch(pass1);
1005  if (row.good_pitches()) {
1006  pitch_height_stats.Add(row.height() + row.gap(),
1007  row.pitch(), row.good_pitches());
1008  if (row.height_pitch_ratio() > 1.1) num_tall_rows_++;
1009  } else {
1010  num_bad_rows_++;
1011  }
1012  }
1013 
1014  pitch_height_stats.Finish();
1015  for (auto & row : rows_) {
1016  if (row.good_pitches() >= 5) {
1017  // We have enough evidences. Just use the pitch estimation
1018  // from this row.
1019  row.set_estimated_pitch(row.pitch());
1020  } else if (row.num_chars() > 1) {
1021  float estimated_pitch =
1022  pitch_height_stats.EstimateYFor(row.height() + row.gap(),
1023  0.1);
1024  // CJK characters are more likely to be fragmented than poorly
1025  // chopped. So trust the page-level estimation of character
1026  // pitch only if it's larger than row-level estimation or
1027  // row-level estimation is too large (2x bigger than row height).
1028  if (estimated_pitch > row.pitch() ||
1029  row.pitch() > row.height() * 2.0) {
1030  row.set_estimated_pitch(estimated_pitch);
1031  } else {
1032  row.set_estimated_pitch(row.pitch());
1033  }
1034  }
1035  }
1036 }
1037 
1038 } // namespace
1039 
1041  TO_BLOCK_LIST *port_blocks) {
1042  FPAnalyzer analyzer(page_tr, port_blocks);
1043  if (analyzer.num_rows() == 0) return;
1044 
1045  analyzer.Pass1Analyze();
1046  analyzer.EstimatePitch(true);
1047 
1048  // Perform pass1 analysis again with the initial estimation of row
1049  // pitches, for better estimation.
1050  analyzer.Pass1Analyze();
1051  analyzer.EstimatePitch(true);
1052 
1053  // Early exit if the page doesn't seem to contain fixed pitch rows.
1054  if (!analyzer.maybe_fixed_pitch()) {
1056  tprintf("Page doesn't seem to contain fixed pitch rows\n");
1057  }
1058  return;
1059  }
1060 
1061  unsigned iteration = 0;
1062  do {
1063  analyzer.MergeFragments();
1064  analyzer.FinalizeLargeChars();
1065  analyzer.EstimatePitch(false);
1066  iteration++;
1067  } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1068 
1070  tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n",
1071  iteration, analyzer.max_iteration());
1072  }
1073 
1074  analyzer.OutputEstimations();
1075  if (textord_debug_pitch_test) analyzer.DebugOutputResult();
1076 }
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
ICOORD
integer coordinate
Definition: points.h:30
TBOX::print
void print() const
Definition: rect.h:277
TBOX::bounding_union
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:124
TO_BLOCK
Definition: blobbox.h:691
PITCH_CORR_PROP
Definition: blobbox.h:51
PITCH_DEF_FIXED
Definition: blobbox.h:46
TO_ROW::pitch_decision
PITCH_TYPE pitch_decision
Definition: blobbox.h:649
BLOBNBOX
Definition: blobbox.h:142
BTFT_LEADER
Definition: blobbox.h:120
TBOX::height
int16_t height() const
Definition: rect.h:107
tovars.h
genericvector.h
topitch.h
PITCH_CORR_FIXED
Definition: blobbox.h:50
cjkpitch.h
TO_BLOCK::xheight
float xheight
Definition: blobbox.h:787
BLOBNBOX::joined_to_prev
bool joined_to_prev() const
Definition: blobbox.h:255
TBOX::width
int16_t width() const
Definition: rect.h:114
BOOL_VAR
#define BOOL_VAR(name, val, comment)
Definition: params.h:303
character
Definition: mfoutline.h:62
TO_ROW::xheight
float xheight
Definition: blobbox.h:656
compute_fixed_pitch_cjk
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1040
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
GenericVector< float >
TO_BLOCK::get_rows
TO_ROW_LIST * get_rows()
Definition: blobbox.h:703
BLOBNBOX::flow
BlobTextFlowType flow() const
Definition: blobbox.h:294
TBOX::left
int16_t left() const
Definition: rect.h:71
find_repeated_chars
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1739
TBOX::right
int16_t right() const
Definition: rect.h:78
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
TO_ROW
Definition: blobbox.h:543
ICOORDELT
Definition: points.h:160
textord_debug_pitch_test
bool textord_debug_pitch_test
Definition: topitch.cpp:38
TBOX::x_gap
int x_gap(const TBOX &box) const
Definition: rect.h:224
TO_ROW::blob_list
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:599
TBOX
Definition: rect.h:33