tesseract  4.0.0-1-g2a2b
cjkpitch.cpp
Go to the documentation of this file.
1 // File: cjkpitch.cpp
3 // Description: Code to determine fixed pitchness and the pitch if fixed,
4 // for CJK text.
5 // Author: takenaka@google.com (Hiroshi Takenaka)
6 // Created: Mon Jun 27 12:48:35 JST 2011
7 //
8 // Copyright 2011 Google Inc. All Rights Reserved.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #include "cjkpitch.h"
22 #include "genericvector.h"
23 #include "topitch.h"
24 #include "tovars.h"
25 
26 #include <algorithm>
27 #include <vector> // for std::vector
28 
30  "If true, word delimiter spaces are assumed to have "
31  "variable width, even though characters have fixed pitch.");
32 
33 namespace {
34 
35 // Allow +/-10% error for character pitch / body size.
36 static const float kFPTolerance = 0.1;
37 
38 // Minimum ratio of "good" character pitch for a row to be considered
39 // to be fixed-pitch.
40 static const float kFixedPitchThreshold = 0.35;
41 
42 // rank statistics for a small collection of float values.
43 class SimpleStats {
44  public:
45  SimpleStats(): finalized_(false), values_() { }
46  ~SimpleStats() { }
47 
48  void Clear() {
49  values_.clear();
50  finalized_ = false;
51  }
52 
53  void Add(float value) {
54  values_.push_back(value);
55  finalized_ = false;
56  }
57 
58  void Finish() {
59  values_.sort(float_compare);
60  finalized_ = true;
61  }
62 
63  float ile(double frac) {
64  if (!finalized_) Finish();
65  if (values_.empty()) return 0.0;
66  if (frac >= 1.0) return values_.back();
67  if (frac <= 0.0 || values_.size() == 1) return values_[0];
68  int index = static_cast<int>((values_.size() - 1) * frac);
69  float reminder = (values_.size() - 1) * frac - index;
70 
71  return values_[index] * (1.0 - reminder) +
72  values_[index + 1] * reminder;
73  }
74 
75  float median() {
76  return ile(0.5);
77  }
78 
79  float maximum() {
80  if (!finalized_) Finish();
81  if (values_.empty()) return 0.0;
82  return values_.back();
83  }
84 
85  float minimum() {
86  if (!finalized_) Finish();
87  if (values_.empty()) return 0.0;
88  return values_[0];
89  }
90 
91  int size() const {
92  return values_.size();
93  }
94 
95  private:
96  static int float_compare(const void* a, const void* b) {
97  const float* f_a = static_cast<const float*>(a);
98  const float* f_b = static_cast<const float*>(b);
99  return (*f_a > *f_b) ? 1 : ((*f_a < *f_b) ? -1 : 0);
100  }
101 
102  bool finalized_;
103  GenericVector<float> values_;
104 };
105 
106 // statistics for a small collection of float pairs (x, y).
107 // EstimateYFor(x, r) returns the estimated y at x, based on
108 // existing samples between x*(1-r) ~ x*(1+r).
109 class LocalCorrelation {
110  public:
111  struct float_pair {
112  float x, y;
113  int vote;
114  };
115 
116  LocalCorrelation(): finalized_(false) { }
117  ~LocalCorrelation() { }
118 
119  void Finish() {
120  values_.sort(float_pair_compare);
121  finalized_ = true;
122  }
123 
124  void Clear() {
125  finalized_ = false;
126  }
127 
128  void Add(float x, float y, int v) {
129  struct float_pair value;
130  value.x = x;
131  value.y = y;
132  value.vote = v;
133  values_.push_back(value);
134  finalized_ = false;
135  }
136 
137  float EstimateYFor(float x, float r) {
138  ASSERT_HOST(finalized_);
139  int start = 0, end = values_.size();
140  // Because the number of samples (used_) is assumed to be small,
141  // just use linear search to find values within the range.
142  while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++;
143  while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--;
144 
145  // Fall back to the global average if there are no data within r
146  // of x.
147  if (start >= end) {
148  start = 0;
149  end = values_.size();
150  }
151 
152  // Compute weighted average of the values.
153  float rc = 0;
154  int vote = 0;
155  for (int i = start; i < end; i++) {
156  rc += values_[i].vote * x * values_[i].y / values_[i].x;
157  vote += values_[i].vote;
158  }
159 
160  return rc / vote;
161  }
162 
163  private:
164  static int float_pair_compare(const void* a, const void* b) {
165  const float_pair* f_a = static_cast<const float_pair*>(a);
166  const float_pair* f_b = static_cast<const float_pair*>(b);
167  return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0);
168  }
169 
170  bool finalized_;
172 };
173 
174 // Class to represent a character on a fixed pitch row. A FPChar may
175 // consist of multiple blobs (BLOBNBOX's).
176 class FPChar {
177  public:
178  enum Alignment {
179  ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD
180  };
181 
182  FPChar(): box_(), real_body_(),
183  from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0),
184  final_(false), alignment_(ALIGN_UNKNOWN),
185  merge_to_prev_(false), delete_flag_(false) {
186  }
187 
188  // Initialize from blob.
189  void Init(BLOBNBOX *blob) {
190  box_ = blob->bounding_box();
191  real_body_ = box_;
192  from_ = to_ = blob;
193  num_blobs_ = 1;
194  }
195 
196  // Merge this character with "next". The "next" character should
197  // consist of succeeding blobs on the same row.
198  void Merge(const FPChar &next) {
199  int gap = real_body_.x_gap(next.real_body_);
200  if (gap > max_gap_) max_gap_ = gap;
201 
202  box_ += next.box_;
203  real_body_ += next.real_body_;
204  to_ = next.to_;
205  num_blobs_ += next.num_blobs_;
206  }
207 
208  // Accessors.
209  const TBOX &box() const { return box_; }
210  void set_box(const TBOX &box) {
211  box_ = box;
212  }
213  const TBOX &real_body() const { return real_body_; }
214 
215  bool is_final() const { return final_; }
216  void set_final(bool flag) {
217  final_ = flag;
218  }
219 
220  const Alignment& alignment() const {
221  return alignment_;
222  }
223  void set_alignment(Alignment alignment) {
224  alignment_ = alignment;
225  }
226 
227  bool merge_to_prev() const {
228  return merge_to_prev_;
229  }
230  void set_merge_to_prev(bool flag) {
231  merge_to_prev_ = flag;
232  }
233 
234  bool delete_flag() const {
235  return delete_flag_;
236  }
237  void set_delete_flag(bool flag) {
238  delete_flag_ = flag;
239  }
240 
241  int max_gap() const {
242  return max_gap_;
243  }
244 
245  int num_blobs() const {
246  return num_blobs_;
247  }
248 
249  private:
250  TBOX box_; // Rectangle region considered to be occupied by this
251  // character. It could be bigger than the bounding box.
252  TBOX real_body_; // Real bounding box of this character.
253  BLOBNBOX *from_; // The first blob of this character.
254  BLOBNBOX *to_; // The last blob of this character.
255  int num_blobs_; // Number of blobs that belong to this character.
256  int max_gap_; // Maximum x gap between the blobs.
257 
258  bool final_; // True if alignment/fragmentation decision for this
259  // character is finalized.
260 
261  Alignment alignment_; // Alignment status.
262  bool merge_to_prev_; // True if this is a fragmented blob that
263  // needs to be merged to the previous
264  // character.
265 
266  int delete_flag_; // True if this character is merged to another
267  // one and needs to be deleted.
268 };
269 
270 // Class to represent a fixed pitch row, as a linear collection of
271 // FPChar's.
272 class FPRow {
273  public:
274  FPRow() : pitch_(0.0f), estimated_pitch_(0.0f),
275  all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(),
276  heights_(), characters_(), real_row_(nullptr) {
277  }
278 
279  ~FPRow() { }
280 
281  // Initialize from TD_ROW.
282  void Init(TO_ROW *row);
283 
284  // Estimate character pitch of this row, based on current alignment
285  // status of underlying FPChar's. The argument pass1 can be set to
286  // true if the function is called after Pass1Analyze(), to eliminate
287  // some redundant computation.
288  void EstimatePitch(bool pass1);
289 
290  // Check each character if it has good character pitches between its
291  // predecessor and its successor and set its alignment status. If
292  // we already calculated the estimated pitch for this row, the value
293  // is used. If we didn't, a character is considered to be good, if
294  // the pitches between its predecessor and its successor are almost
295  // equal.
296  void Pass1Analyze();
297 
298  // Find characters that fit nicely into one imaginary body next to a
299  // character which is already finalized. Then mark them as character
300  // fragments.
301  bool Pass2Analyze();
302 
303  // Merge FPChars marked as character fragments into one.
304  void MergeFragments();
305 
306  // Finalize characters that are already large enough and cannot be
307  // merged with others any more.
308  void FinalizeLargeChars();
309 
310  // Output pitch estimation results to attributes of TD_ROW.
311  void OutputEstimations();
312 
313  void DebugOutputResult(int row_index);
314 
315  int good_pitches() {
316  return good_pitches_.size();
317  }
318 
319  int good_gaps() {
320  return good_gaps_.size();
321  }
322 
323  float pitch() {
324  return pitch_;
325  }
326 
327  float estimated_pitch() {
328  return estimated_pitch_;
329  }
330 
331  void set_estimated_pitch(float v) {
332  estimated_pitch_ = v;
333  }
334 
335  float height() {
336  return height_;
337  }
338 
339  float height_pitch_ratio() {
340  if (good_pitches_.size() < 2) return -1.0;
341  return height_ / good_pitches_.median();
342  }
343 
344  float gap() {
345  return gap_;
346  }
347 
348  size_t num_chars() {
349  return characters_.size();
350  }
351  FPChar *character(int i) {
352  return &characters_[i];
353  }
354 
355  const TBOX &box(int i) {
356  return characters_[i].box();
357  }
358 
359  const TBOX &real_body(int i) {
360  return characters_[i].real_body();
361  }
362 
363  bool is_box_modified(int i) {
364  return !(characters_[i].box() == characters_[i].real_body());
365  }
366 
367  float center_x(int i) {
368  return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
369  }
370 
371  bool is_final(int i) {
372  return characters_[i].is_final();
373  }
374 
375  void finalize(int i) {
376  characters_[i].set_final(true);
377  }
378 
379  bool is_good(int i) {
380  return characters_[i].alignment() == FPChar::ALIGN_GOOD;
381  }
382 
383  bool is_bad(int i) {
384  return characters_[i].alignment() == FPChar::ALIGN_BAD;
385  }
386 
387  bool is_unknown(int i) {
388  return characters_[i].alignment() == FPChar::ALIGN_UNKNOWN;
389  }
390 
391  void mark_good(int i) {
392  characters_[i].set_alignment(FPChar::ALIGN_GOOD);
393  }
394 
395  void mark_bad(int i) {
396  characters_[i].set_alignment(FPChar::ALIGN_BAD);
397  }
398 
399  void clear_alignment(int i) {
400  characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
401  }
402 
403  private:
404  static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) {
405  if (std::min(box1.width(), box2.width()) == 0) return 0.0;
406  return -box1.x_gap(box2) / (float)std::min(box1.width(), box2.width());
407  }
408 
409  static bool mostly_overlap(const TBOX& box1, const TBOX& box2) {
410  return x_overlap_fraction(box1, box2) > 0.9;
411  }
412 
413  static bool significant_overlap(const TBOX& box1, const TBOX& box2) {
414  if (std::min(box1.width(), box2.width()) == 0) return false;
415  int overlap = -box1.x_gap(box2);
416  return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
417  }
418 
419  static float box_pitch(const TBOX& ref, const TBOX& box) {
420  return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
421  }
422 
423  // Check if two neighboring characters satisfy the fixed pitch model.
424  static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) {
425  // Character box shouldn't exceed pitch.
426  if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
427  box2.width() >= pitch * (1.0 + kFPTolerance) ||
428  box1.height() >= pitch * (1.0 + kFPTolerance) ||
429  box2.height() >= pitch * (1.0 + kFPTolerance)) return false;
430 
431  const float real_pitch = box_pitch(box1, box2);
432  if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true;
433 
435  // Hangul characters usually have fixed pitch, but words are
436  // delimited by space which can be narrower than characters.
437  if (real_pitch > pitch && real_pitch < pitch * 2.0 &&
438  real_pitch - box1.x_gap(box2) < pitch) {
439  return true;
440  }
441  }
442  return false;
443  }
444 
445  static bool is_interesting_blob(const BLOBNBOX *blob) {
446  return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
447  }
448 
449  // Cleanup chars that are already merged to others.
450  void DeleteChars() {
451  int index = 0;
452  for (int i = 0; i < characters_.size(); ++i) {
453  if (!characters_[i].delete_flag()) {
454  if (index != i) characters_[index] = characters_[i];
455  index++;
456  }
457  }
458  characters_.truncate(index);
459  }
460 
461  float pitch_; // Character pitch.
462  float estimated_pitch_; // equal to pitch_ if pitch_ is considered
463  // to be good enough.
464  float height_; // Character height.
465  float gap_; // Minimum gap between characters.
466 
467  // Pitches between any two successive characters.
468  SimpleStats all_pitches_;
469  // Gaps between any two successive characters.
470  SimpleStats all_gaps_;
471  // Pitches between any two successive characters that are consistent
472  // with the fixed pitch model.
473  SimpleStats good_pitches_;
474  // Gaps between any two successive characters that are consistent
475  // with the fixed pitch model.
476  SimpleStats good_gaps_;
477 
478  SimpleStats heights_;
479 
480  GenericVector<FPChar> characters_;
481  TO_ROW *real_row_; // Underlying TD_ROW for this row.
482 };
483 
484 void FPRow::Init(TO_ROW *row) {
485  ASSERT_HOST(row != nullptr);
486  ASSERT_HOST(row->xheight > 0);
487  real_row_ = row;
488  real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
489 
490  BLOBNBOX_IT blob_it = row->blob_list();
491  // Initialize characters_ and compute the initial estimation of
492  // character height.
493  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
494  if (is_interesting_blob(blob_it.data())) {
495  FPChar fp_char;
496  fp_char.Init(blob_it.data());
497  // Merge unconditionally if two blobs overlap.
498  if (!characters_.empty() &&
499  significant_overlap(fp_char.box(), characters_.back().box())) {
500  characters_.back().Merge(fp_char);
501  } else {
502  characters_.push_back(fp_char);
503  }
504  TBOX bound = blob_it.data()->bounding_box();
505  if (bound.height() * 3.0 > bound.width()) {
506  heights_.Add(bound.height());
507  }
508  }
509  }
510  heights_.Finish();
511  height_ = heights_.ile(0.875);
512 }
513 
514 void FPRow::OutputEstimations() {
515  if (good_pitches_.size() == 0) {
516  pitch_ = 0.0f;
517  real_row_->pitch_decision = PITCH_CORR_PROP;
518  return;
519  }
520 
521  pitch_ = good_pitches_.median();
522  real_row_->fixed_pitch = pitch_;
523  // good_gaps_.ile(0.125) can be large if most characters on the row
524  // are skinny. Use pitch_ - height_ instead if it's smaller, but
525  // positive.
526  real_row_->kern_size = real_row_->pr_nonsp =
527  std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
528  real_row_->body_size = pitch_ - real_row_->kern_size;
529 
530  if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
531  // If more than half of the characters of a line don't fit to the
532  // fixed pitch model, consider the line to be proportional. 50%
533  // seems to be a good threshold in practice as well.
534  // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
535  // real_row_ as a partial estimation result and try to use them in the
536  // normalization process.
537  real_row_->pitch_decision = PITCH_CORR_PROP;
538  return;
539  } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
540  real_row_->pitch_decision = PITCH_DEF_FIXED;
541  } else {
542  real_row_->pitch_decision = PITCH_CORR_FIXED;
543  }
544 
545  real_row_->space_size = real_row_->pr_space = pitch_;
546  // Set min_space to 50% of character pitch so that we can break CJK
547  // text at a half-width space after punctuation.
548  real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
549 
550  // Don't consider a quarter space as a real space, because it's used
551  // for line justification in traditional Japanese books.
552  real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(),
553  (double)good_gaps_.ile(0.875));
554 
555  int space_threshold =
556  std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
557  static_cast<int>(real_row_->xheight));
558 
559  // Make max_nonspace larger than any intra-character gap so that
560  // make_prop_words() won't break a row at the middle of a character.
561  for (size_t i = 0; i < num_chars(); ++i) {
562  if (characters_[i].max_gap() > real_row_->max_nonspace) {
563  real_row_->max_nonspace = characters_[i].max_gap();
564  }
565  }
566  real_row_->space_threshold =
567  std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
568  static_cast<int>(real_row_->xheight));
569  real_row_->used_dm_model = false;
570 
571  // Setup char_cells.
572  ICOORDELT_IT cell_it = &real_row_->char_cells;
573  ICOORDELT *cell = new ICOORDELT(real_body(0).left(), 0);
574  cell_it.add_after_then_move(cell);
575 
576  int right = real_body(0).right();
577  for (size_t i = 1; i < num_chars(); ++i) {
578  // Put a word break if gap between two characters is bigger than
579  // space_threshold. Don't break if none of two characters
580  // couldn't be "finalized", because maybe they need to be merged
581  // to one character.
582  if ((is_final(i - 1) || is_final(i)) &&
583  real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
584  cell = new ICOORDELT(right + 1, 0);
585  cell_it.add_after_then_move(cell);
586  while (right + pitch_ < box(i).left()) {
587  right += pitch_;
588  cell = new ICOORDELT(right + 1, 0);
589  cell_it.add_after_then_move(cell);
590  }
591  right = box(i).left();
592  }
593  cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
594  cell_it.add_after_then_move(cell);
595  right = real_body(i).right();
596  }
597 
598  cell = new ICOORDELT(right + 1, 0);
599  cell_it.add_after_then_move(cell);
600 
601  // TODO(takenaka): add code to store alignment/fragmentation
602  // information to blobs so that it can be reused later, e.g. in
603  // recognition phase.
604 }
605 
606 void FPRow::EstimatePitch(bool pass1) {
607  good_pitches_.Clear();
608  all_pitches_.Clear();
609  good_gaps_.Clear();
610  all_gaps_.Clear();
611  heights_.Clear();
612  if (num_chars() == 0) return;
613 
614  int32_t cx0, cx1;
615  bool prev_was_good = is_good(0);
616  cx0 = center_x(0);
617 
618  heights_.Add(box(0).height());
619  for (size_t i = 1; i < num_chars(); i++) {
620  cx1 = center_x(i);
621  int32_t pitch = cx1 - cx0;
622  int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
623 
624  heights_.Add(box(i).height());
625  // Ignore if the pitch is too close. But don't ignore wide pitch
626  // may be the result of large tracking.
627  if (pitch > height_ * 0.5) {
628  all_pitches_.Add(pitch);
629  all_gaps_.Add(gap);
630  if (is_good(i)) {
631  // In pass1 (after Pass1Analyze()), all characters marked as
632  // "good" have a good consistent pitch with their previous
633  // characters. However, it's not true in pass2 and a good
634  // character may have a good pitch only between its successor.
635  // So we collect only pitch values between two good
636  // characters. and within tolerance in pass2.
637  if (pass1 || (prev_was_good &&
638  fabs(estimated_pitch_ - pitch) <
639  kFPTolerance * estimated_pitch_)) {
640  good_pitches_.Add(pitch);
641  if (!is_box_modified(i - 1) && !is_box_modified(i)) {
642  good_gaps_.Add(gap);
643  }
644  }
645  prev_was_good = true;
646  } else {
647  prev_was_good = false;
648  }
649  }
650  cx0 = cx1;
651  }
652 
653  good_pitches_.Finish();
654  all_pitches_.Finish();
655  good_gaps_.Finish();
656  all_gaps_.Finish();
657  heights_.Finish();
658 
659  height_ = heights_.ile(0.875);
660  if (all_pitches_.size() == 0) {
661  pitch_ = 0.0f;
662  gap_ = 0.0f;
663  } else if (good_pitches_.size() < 2) {
664  // We don't have enough data to estimate the pitch of this row yet.
665  // Use median of all pitches as the initial guess.
666  pitch_ = all_pitches_.median();
667  ASSERT_HOST(pitch_ > 0.0f);
668  gap_ = all_gaps_.ile(0.125);
669  } else {
670  pitch_ = good_pitches_.median();
671  ASSERT_HOST(pitch_ > 0.0f);
672  gap_ = good_gaps_.ile(0.125);
673  }
674 }
675 
676 void FPRow::DebugOutputResult(int row_index) {
677  if (num_chars() > 0) {
678  tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
679  "space_size=%f, space_threshold=%d, xheight=%f\n",
680  row_index, (int)(real_row_->pitch_decision),
681  real_row_->fixed_pitch, real_row_->max_nonspace,
682  real_row_->space_size, real_row_->space_threshold,
683  real_row_->xheight);
684 
685  for (unsigned i = 0; i < num_chars(); i++) {
686  tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ",
687  i, is_final(i), is_good(i), character(i)->num_blobs());
688  box(i).print();
689  }
690  }
691 }
692 
693 void FPRow::Pass1Analyze() {
694  if (num_chars() < 2) return;
695 
696  if (estimated_pitch_ > 0.0f) {
697  for (size_t i = 2; i < num_chars(); i++) {
698  if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) &&
699  is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
700  mark_good(i - 1);
701  }
702  }
703  } else {
704  for (size_t i = 2; i < num_chars(); i++) {
705  if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) {
706  mark_good(i - 1);
707  }
708  }
709  }
710  character(0)->set_alignment(character(1)->alignment());
711  character(num_chars() - 1)->set_alignment(
712  character(num_chars() - 2)->alignment());
713 }
714 
715 bool FPRow::Pass2Analyze() {
716  bool changed = false;
717  if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
718  return false;
719  }
720  for (size_t i = 0; i < num_chars(); i++) {
721  if (is_final(i)) continue;
722 
723  FPChar::Alignment alignment = character(i)->alignment();
724  bool intersecting = false;
725  bool not_intersecting = false;
726 
727  if (i < num_chars() - 1 && is_final(i + 1)) {
728  // Next character is already finalized. Estimate the imaginary
729  // body including this character based on the character. Skip
730  // whitespace if necessary.
731  bool skipped_whitespaces = false;
732  float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
733  while (c1 > box(i).right()) {
734  skipped_whitespaces = true;
735  c1 -= estimated_pitch_;
736  }
737  TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
738 
739  // Collect all characters that mostly fit in the region.
740  // Also, their union height shouldn't be too big.
741  int j = i;
742  TBOX merged;
743  while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
744  merged.bounding_union(box(j)).height() <
745  estimated_pitch_ * (1 + kFPTolerance)) {
746  merged += box(j);
747  j--;
748  }
749 
750  if (j >= 0 && significant_overlap(ibody, box(j))) {
751  // character(j) lies on the character boundary and doesn't fit
752  // well into the imaginary body.
753  if (!is_final(j)) intersecting = true;
754  } else {
755  not_intersecting = true;
756  if (i - j > 0) {
757  // Merge character(j+1) ... character(i) because they fit
758  // into the body nicely.
759  if (i - j == 1) {
760  // Only one char in the imaginary body.
761  if (!skipped_whitespaces) mark_good(i);
762  // set ibody as bounding box of this character to get
763  // better pitch analysis result for halfwidth glyphs
764  // followed by a halfwidth space.
765  if (box(i).width() <= estimated_pitch_ * 0.5) {
766  ibody += box(i);
767  character(i)->set_box(ibody);
768  }
769  character(i)->set_merge_to_prev(false);
770  finalize(i);
771  } else {
772  for (int k = i; k > j + 1; k--) {
773  character(k)->set_merge_to_prev(true);
774  }
775  }
776  }
777  }
778  }
779  if (i > 0 && is_final(i - 1)) {
780  // Now we repeat everything from the opposite side. Previous
781  // character is already finalized. Estimate the imaginary body
782  // including this character based on the character.
783  bool skipped_whitespaces = false;
784  float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
785  while (c1 < box(i).left()) {
786  skipped_whitespaces = true;
787  c1 += estimated_pitch_;
788  }
789  TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
790 
791  size_t j = i;
792  TBOX merged;
793  while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
794  merged.bounding_union(box(j)).height() <
795  estimated_pitch_ * (1 + kFPTolerance)) {
796  merged += box(j);
797  j++;
798  }
799 
800  if (j < num_chars() && significant_overlap(ibody, box(j))) {
801  if (!is_final(j)) intersecting = true;
802  } else {
803  not_intersecting = true;
804  if (j - i > 0) {
805  if (j - i == 1) {
806  if (!skipped_whitespaces) mark_good(i);
807  if (box(i).width() <= estimated_pitch_ * 0.5) {
808  ibody += box(i);
809  character(i)->set_box(ibody);
810  }
811  character(i)->set_merge_to_prev(false);
812  finalize(i);
813  } else {
814  for (size_t k = i + 1; k < j; k++) {
815  character(k)->set_merge_to_prev(true);
816  }
817  }
818  }
819  }
820  }
821 
822  // This character doesn't fit well into the estimated imaginary
823  // bodies. Mark it as bad.
824  if (intersecting && !not_intersecting) mark_bad(i);
825  if (character(i)->alignment() != alignment ||
826  character(i)->merge_to_prev()) {
827  changed = true;
828  }
829  }
830 
831  return changed;
832 }
833 
834 void FPRow::MergeFragments() {
835  int last_char = 0;
836 
837  for (size_t j = 0; j < num_chars(); ++j) {
838  if (character(j)->merge_to_prev()) {
839  character(last_char)->Merge(*character(j));
840  character(j)->set_delete_flag(true);
841  clear_alignment(last_char);
842  character(j-1)->set_merge_to_prev(false);
843  } else {
844  last_char = j;
845  }
846  }
847  DeleteChars();
848 }
849 
850 void FPRow::FinalizeLargeChars() {
851  float row_pitch = estimated_pitch();
852  for (size_t i = 0; i < num_chars(); i++) {
853  if (is_final(i)) continue;
854 
855  // Finalize if both neighbors are finalized. We have no other choice.
856  if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
857  finalize(i);
858  continue;
859  }
860 
861  float cx = center_x(i);
862  TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
863  if (i > 0) {
864  // The preceding character significantly intersects with the
865  // imaginary body of this character. Let Pass2Analyze() handle
866  // this case.
867  if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue;
868  if (!is_final(i - 1)) {
869  TBOX merged = box(i);
870  merged += box(i - 1);
871  if (merged.width() < row_pitch) continue;
872  // This character cannot be finalized yet because it can be
873  // merged with the previous one. Again, let Pass2Analyze()
874  // handle this case.
875  }
876  }
877  if (i < num_chars() - 1) {
878  if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue;
879  if (!is_final(i + 1)) {
880  TBOX merged = box(i);
881  merged += box(i + 1);
882  if (merged.width() < row_pitch) continue;
883  }
884  }
885  finalize(i);
886  }
887 
888  // Update alignment decision. We only consider finalized characters
889  // in pass2. E.g. if a finalized character C has another finalized
890  // character L on its left and a not-finalized character R on its
891  // right, we mark C as good if the pitch between C and L is good,
892  // regardless of the pitch between C and R.
893  for (size_t i = 0; i < num_chars(); i++) {
894  if (!is_final(i)) continue;
895  bool good_pitch = false;
896  bool bad_pitch = false;
897  if (i > 0 && is_final(i - 1)) {
898  if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
899  good_pitch = true;
900  } else {
901  bad_pitch = true;
902  }
903  }
904  if (i < num_chars() - 1 && is_final(i + 1)) {
905  if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
906  good_pitch = true;
907  } else {
908  bad_pitch = true;
909  }
910  }
911  if (good_pitch && !bad_pitch) mark_good(i);
912  else if (!good_pitch && bad_pitch) mark_bad(i);
913  }
914 }
915 
916 class FPAnalyzer {
917  public:
918  FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
919  ~FPAnalyzer() { }
920 
921  void Pass1Analyze() {
922  for (size_t i = 0; i < rows_.size(); i++) rows_[i].Pass1Analyze();
923  }
924 
925  // Estimate character pitch for each row. The argument pass1 can be
926  // set to true if the function is called after Pass1Analyze(), to
927  // eliminate some redundant computation.
928  void EstimatePitch(bool pass1);
929 
930  bool maybe_fixed_pitch() {
931  if (rows_.empty() ||
932  rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false;
933  return true;
934  }
935 
936  void MergeFragments() {
937  for (size_t i = 0; i < rows_.size(); i++) rows_[i].MergeFragments();
938  }
939 
940  void FinalizeLargeChars() {
941  for (size_t i = 0; i < rows_.size(); i++) rows_[i].FinalizeLargeChars();
942  }
943 
944  bool Pass2Analyze() {
945  bool changed = false;
946  for (size_t i = 0; i < rows_.size(); i++) {
947  if (rows_[i].Pass2Analyze()) {
948  changed = true;
949  }
950  }
951  return changed;
952  }
953 
954  void OutputEstimations() {
955  for (size_t i = 0; i < rows_.size(); i++) rows_[i].OutputEstimations();
956  // Don't we need page-level estimation of gaps/spaces?
957  }
958 
959  void DebugOutputResult() {
960  tprintf("FPAnalyzer: final result\n");
961  for (size_t i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i);
962  }
963 
964  size_t num_rows() {
965  return rows_.size();
966  }
967 
968  // Returns the upper limit for pass2 loop iteration.
969  unsigned max_iteration() {
970  // We're fixing at least one character per iteration. So basically
971  // we shouldn't require more than max_chars_per_row_ iterations.
972  return max_chars_per_row_ + 100;
973  }
974 
975  private:
976  ICOORD page_tr_;
977  std::vector<FPRow> rows_;
978  unsigned num_tall_rows_;
979  unsigned num_bad_rows_;
980  // TODO: num_empty_rows_ is incremented, but never used overwise.
981  unsigned num_empty_rows_;
982  unsigned max_chars_per_row_;
983 };
984 
985 FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
986 : page_tr_(page_tr),
987  num_tall_rows_(0),
988  num_bad_rows_(0),
989  num_empty_rows_(0),
990  max_chars_per_row_(0)
991 {
992  TO_BLOCK_IT block_it(port_blocks);
993 
994  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
995  block_it.forward()) {
996  TO_BLOCK *block = block_it.data();
997  if (!block->get_rows()->empty()) {
998  ASSERT_HOST(block->xheight > 0);
999  find_repeated_chars(block, FALSE);
1000  }
1001  }
1002 
1003  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
1004  block_it.forward()) {
1005  TO_ROW_IT row_it = block_it.data()->get_rows();
1006  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1007  FPRow row;
1008  row.Init(row_it.data());
1009  rows_.push_back(row);
1010  size_t num_chars = rows_.back().num_chars();
1011  if (num_chars <= 1) num_empty_rows_++;
1012  if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
1013  }
1014  }
1015 }
1016 
1017 void FPAnalyzer::EstimatePitch(bool pass1) {
1018  LocalCorrelation pitch_height_stats;
1019 
1020  num_tall_rows_ = 0;
1021  num_bad_rows_ = 0;
1022  pitch_height_stats.Clear();
1023  for (size_t i = 0; i < rows_.size(); i++) {
1024  rows_[i].EstimatePitch(pass1);
1025  if (rows_[i].good_pitches()) {
1026  pitch_height_stats.Add(rows_[i].height() + rows_[i].gap(),
1027  rows_[i].pitch(), rows_[i].good_pitches());
1028  if (rows_[i].height_pitch_ratio() > 1.1) num_tall_rows_++;
1029  } else {
1030  num_bad_rows_++;
1031  }
1032  }
1033 
1034  pitch_height_stats.Finish();
1035  for (size_t i = 0; i < rows_.size(); i++) {
1036  if (rows_[i].good_pitches() >= 5) {
1037  // We have enough evidences. Just use the pitch estimation
1038  // from this row.
1039  rows_[i].set_estimated_pitch(rows_[i].pitch());
1040  } else if (rows_[i].num_chars() > 1) {
1041  float estimated_pitch =
1042  pitch_height_stats.EstimateYFor(rows_[i].height() + rows_[i].gap(),
1043  0.1);
1044  // CJK characters are more likely to be fragmented than poorly
1045  // chopped. So trust the page-level estimation of character
1046  // pitch only if it's larger than row-level estimation or
1047  // row-level estimation is too large (2x bigger than row height).
1048  if (estimated_pitch > rows_[i].pitch() ||
1049  rows_[i].pitch() > rows_[i].height() * 2.0) {
1050  rows_[i].set_estimated_pitch(estimated_pitch);
1051  } else {
1052  rows_[i].set_estimated_pitch(rows_[i].pitch());
1053  }
1054  }
1055  }
1056 }
1057 
1058 } // namespace
1059 
1061  TO_BLOCK_LIST *port_blocks) {
1062  FPAnalyzer analyzer(page_tr, port_blocks);
1063  if (analyzer.num_rows() == 0) return;
1064 
1065  analyzer.Pass1Analyze();
1066  analyzer.EstimatePitch(true);
1067 
1068  // Perform pass1 analysis again with the initial estimation of row
1069  // pitches, for better estimation.
1070  analyzer.Pass1Analyze();
1071  analyzer.EstimatePitch(true);
1072 
1073  // Early exit if the page doesn't seem to contain fixed pitch rows.
1074  if (!analyzer.maybe_fixed_pitch()) {
1076  tprintf("Page doesn't seem to contain fixed pitch rows\n");
1077  }
1078  return;
1079  }
1080 
1081  unsigned iteration = 0;
1082  do {
1083  analyzer.MergeFragments();
1084  analyzer.FinalizeLargeChars();
1085  analyzer.EstimatePitch(false);
1086  iteration++;
1087  } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1088 
1090  tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n",
1091  iteration, analyzer.max_iteration());
1092  }
1093 
1094  analyzer.OutputEstimations();
1095  if (textord_debug_pitch_test) analyzer.DebugOutputResult();
1096 }
#define BOOL_VAR(name, val, comment)
Definition: params.h:279
void print() const
Definition: rect.h:278
Definition: rect.h:34
BlobTextFlowType flow() const
Definition: blobbox.h:296
int x_gap(const TBOX &box) const
Definition: rect.h:225
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1763
int16_t width() const
Definition: rect.h:115
float xheight
Definition: blobbox.h:670
int16_t left() const
Definition: rect.h:72
integer coordinate
Definition: points.h:32
#define FALSE
Definition: capi.h:52
bool joined_to_prev() const
Definition: blobbox.h:257
float xheight
Definition: blobbox.h:801
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1060
const TBOX & bounding_box() const
Definition: blobbox.h:231
bool textord_space_size_is_variable
Definition: cjkpitch.cpp:31
int16_t right() const
Definition: rect.h:79
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
int16_t height() const
Definition: rect.h:108
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
EXTERN bool textord_debug_pitch_test
Definition: topitch.cpp:42
#define ASSERT_HOST(x)
Definition: errcode.h:84
PITCH_TYPE pitch_decision
Definition: blobbox.h:663