tesseract  5.0.0-alpha-619-ge9db
topitch.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: topitch.cpp (Formerly to_pitch.c)
3  * Description: Code to determine fixed pitchness and the pitch if fixed.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1993, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "blobbox.h"
20 #include "statistc.h"
21 #include "drawtord.h"
22 #include "makerow.h"
23 #include "pitsync1.h"
24 #include "pithsync.h"
25 #include "tovars.h"
26 #include "wordseg.h"
27 #include "topitch.h"
28 #include <tesseract/helpers.h>
29 
30 // Include automatically generated configuration file if running autoconf.
31 #ifdef HAVE_CONFIG_H
32 #include "config_auto.h"
33 #endif
34 
35 #include <memory>
36 
37 static BOOL_VAR (textord_all_prop, false, "All doc is proportial text");
39 "Debug on fixed pitch test");
40 static BOOL_VAR (textord_disable_pitch_test, false,
41 "Turn off dp fixed pitch algorithm");
43 "Do even faster pitch algorithm");
45 "Write full metric stuff");
46 BOOL_VAR (textord_show_row_cuts, false, "Draw row-level cuts");
47 BOOL_VAR (textord_show_page_cuts, false, "Draw page-level cuts");
49 "Use correct answer for fixed/prop");
51 "Attempt whole doc/block fixed pitch");
52 double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");
54 "Ding rate for unbalanced char cells");
55 
56 #define BLOCK_STATS_CLUSTERS 10
57 #define MAX_ALLOWED_PITCH 100 //max pixel pitch.
58 
59 // qsort function to sort 2 floats.
60 static int sort_floats(const void *arg1, const void *arg2) {
61  float diff = *reinterpret_cast<const float*>(arg1) -
62  *reinterpret_cast<const float*>(arg2);
63  if (diff > 0) {
64  return 1;
65  } else if (diff < 0) {
66  return -1;
67  } else {
68  return 0;
69  }
70 }
71 
72 /**********************************************************************
73  * compute_fixed_pitch
74  *
75  * Decide whether each row is fixed pitch individually.
76  * Correlate definite and uncertain results to obtain an individual
77  * result for each row in the TO_ROW class.
78  **********************************************************************/
79 
80 void compute_fixed_pitch(ICOORD page_tr, // top right
81  TO_BLOCK_LIST* port_blocks, // input list
82  float gradient, // page skew
83  FCOORD rotation, // for drawing
84  bool testing_on) { // correct orientation
85  TO_BLOCK_IT block_it; //iterator
86  TO_BLOCK *block; //current block;
87  TO_ROW *row; //current row
88  int block_index; //block number
89  int row_index; //row number
90 
91 #ifndef GRAPHICS_DISABLED
92  if (textord_show_initial_words && testing_on) {
93  if (to_win == nullptr)
94  create_to_win(page_tr);
95  }
96 #endif
97 
98  block_it.set_to_list (port_blocks);
99  block_index = 1;
100  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
101  block_it.forward ()) {
102  block = block_it.data ();
103  compute_block_pitch(block, rotation, block_index, testing_on);
104  block_index++;
105  }
106 
107  if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
108  block_index = 1;
109  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
110  block_it.forward ()) {
111  block = block_it.data ();
112  if (!try_block_fixed (block, block_index))
113  try_rows_fixed(block, block_index, testing_on);
114  block_index++;
115  }
116  }
117 
118  block_index = 1;
119  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
120  block_it.forward()) {
121  block = block_it.data ();
122  POLY_BLOCK* pb = block->block->pdblk.poly_block();
123  if (pb != nullptr && !pb->IsText()) continue; // Non-text doesn't exist!
124  // row iterator
125  TO_ROW_IT row_it(block->get_rows());
126  row_index = 1;
127  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
128  row = row_it.data ();
129  fix_row_pitch(row, block, port_blocks, row_index, block_index);
130  row_index++;
131  }
132  block_index++;
133  }
134 #ifndef GRAPHICS_DISABLED
135  if (textord_show_initial_words && testing_on) {
137  }
138 #endif
139 }
140 
141 
142 /**********************************************************************
143  * fix_row_pitch
144  *
145  * Get a pitch_decision for this row by voting among similar rows in the
146  * block, then similar rows over all the page, or any other rows at all.
147  **********************************************************************/
148 
149 void fix_row_pitch(TO_ROW *bad_row, // row to fix
150  TO_BLOCK *bad_block, // block of bad_row
151  TO_BLOCK_LIST *blocks, // blocks to scan
152  int32_t row_target, // number of row
153  int32_t block_target) { // number of block
154  int16_t mid_cuts;
155  int block_votes; //votes in block
156  int like_votes; //votes over page
157  int other_votes; //votes of unlike blocks
158  int block_index; //number of block
159  int row_index; //number of row
160  int maxwidth; //max pitch
161  TO_BLOCK_IT block_it = blocks; //block iterator
162  TO_BLOCK *block; //current block
163  TO_ROW *row; //current row
164  float sp_sd; //space deviation
165  STATS block_stats; //pitches in block
166  STATS like_stats; //pitches in page
167 
168  block_votes = like_votes = other_votes = 0;
169  maxwidth = static_cast<int32_t>(ceil (bad_row->xheight * textord_words_maxspace));
170  if (bad_row->pitch_decision != PITCH_DEF_FIXED
171  && bad_row->pitch_decision != PITCH_DEF_PROP) {
172  block_stats.set_range (0, maxwidth);
173  like_stats.set_range (0, maxwidth);
174  block_index = 1;
175  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
176  block_it.forward()) {
177  block = block_it.data();
178  POLY_BLOCK* pb = block->block->pdblk.poly_block();
179  if (pb != nullptr && !pb->IsText()) continue; // Non text doesn't exist!
180  row_index = 1;
181  TO_ROW_IT row_it(block->get_rows());
182  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
183  row_it.forward ()) {
184  row = row_it.data ();
185  if ((bad_row->all_caps
186  && row->xheight + row->ascrise
187  <
188  (bad_row->xheight + bad_row->ascrise) * (1 +
190  && row->xheight + row->ascrise >
191  (bad_row->xheight + bad_row->ascrise) * (1 -
193  || (!bad_row->all_caps
194  && row->xheight <
195  bad_row->xheight * (1 + textord_pitch_rowsimilarity)
196  && row->xheight >
197  bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
198  if (block_index == block_target) {
199  if (row->pitch_decision == PITCH_DEF_FIXED) {
200  block_votes += textord_words_veto_power;
201  block_stats.add (static_cast<int32_t>(row->fixed_pitch),
203  }
204  else if (row->pitch_decision == PITCH_MAYBE_FIXED
205  || row->pitch_decision == PITCH_CORR_FIXED) {
206  block_votes++;
207  block_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
208  }
209  else if (row->pitch_decision == PITCH_DEF_PROP)
210  block_votes -= textord_words_veto_power;
211  else if (row->pitch_decision == PITCH_MAYBE_PROP
212  || row->pitch_decision == PITCH_CORR_PROP)
213  block_votes--;
214  }
215  else {
216  if (row->pitch_decision == PITCH_DEF_FIXED) {
217  like_votes += textord_words_veto_power;
218  like_stats.add (static_cast<int32_t>(row->fixed_pitch),
220  }
221  else if (row->pitch_decision == PITCH_MAYBE_FIXED
222  || row->pitch_decision == PITCH_CORR_FIXED) {
223  like_votes++;
224  like_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
225  }
226  else if (row->pitch_decision == PITCH_DEF_PROP)
227  like_votes -= textord_words_veto_power;
228  else if (row->pitch_decision == PITCH_MAYBE_PROP
229  || row->pitch_decision == PITCH_CORR_PROP)
230  like_votes--;
231  }
232  }
233  else {
234  if (row->pitch_decision == PITCH_DEF_FIXED)
235  other_votes += textord_words_veto_power;
236  else if (row->pitch_decision == PITCH_MAYBE_FIXED
237  || row->pitch_decision == PITCH_CORR_FIXED)
238  other_votes++;
239  else if (row->pitch_decision == PITCH_DEF_PROP)
240  other_votes -= textord_words_veto_power;
241  else if (row->pitch_decision == PITCH_MAYBE_PROP
242  || row->pitch_decision == PITCH_CORR_PROP)
243  other_votes--;
244  }
245  row_index++;
246  }
247  block_index++;
248  }
249  if (block_votes > textord_words_veto_power) {
250  bad_row->fixed_pitch = block_stats.ile (0.5);
251  bad_row->pitch_decision = PITCH_CORR_FIXED;
252  }
253  else if (block_votes <= textord_words_veto_power && like_votes > 0) {
254  bad_row->fixed_pitch = like_stats.ile (0.5);
255  bad_row->pitch_decision = PITCH_CORR_FIXED;
256  }
257  else {
258  bad_row->pitch_decision = PITCH_CORR_PROP;
259  if (block_votes == 0 && like_votes == 0 && other_votes > 0
261  tprintf
262  ("Warning:row %d of block %d set prop with no like rows against trend\n",
263  row_target, block_target);
264  }
265  }
267  tprintf(":b_votes=%d:l_votes=%d:o_votes=%d",
268  block_votes, like_votes, other_votes);
269  tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
270  }
271  if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
272  if (bad_row->fixed_pitch < textord_min_xheight) {
273  if (block_votes > 0)
274  bad_row->fixed_pitch = block_stats.ile (0.5);
275  else if (block_votes == 0 && like_votes > 0)
276  bad_row->fixed_pitch = like_stats.ile (0.5);
277  else {
278  tprintf
279  ("Warning:guessing pitch as xheight on row %d, block %d\n",
280  row_target, block_target);
281  bad_row->fixed_pitch = bad_row->xheight;
282  }
283  }
284  if (bad_row->fixed_pitch < textord_min_xheight)
285  bad_row->fixed_pitch = (float) textord_min_xheight;
286  bad_row->kern_size = bad_row->fixed_pitch / 4;
287  bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6);
288  bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4);
289  bad_row->space_threshold =
290  (bad_row->min_space + bad_row->max_nonspace) / 2;
291  bad_row->space_size = bad_row->fixed_pitch;
292  if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
293  tune_row_pitch (bad_row, &bad_row->projection,
294  bad_row->projection_left, bad_row->projection_right,
295  (bad_row->fixed_pitch +
296  bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
297  sp_sd, mid_cuts, &bad_row->char_cells, false);
298  }
299  }
300  else if (bad_row->pitch_decision == PITCH_CORR_PROP
301  || bad_row->pitch_decision == PITCH_DEF_PROP) {
302  bad_row->fixed_pitch = 0.0f;
303  bad_row->char_cells.clear ();
304  }
305 }
306 
307 
308 /**********************************************************************
309  * compute_block_pitch
310  *
311  * Decide whether each block is fixed pitch individually.
312  **********************************************************************/
313 
314 void compute_block_pitch(TO_BLOCK* block, // input list
315  FCOORD rotation, // for drawing
316  int32_t block_index, // block number
317  bool testing_on) { // correct orientation
318  TBOX block_box; //bounding box
319 
320  block_box = block->block->pdblk.bounding_box ();
321  if (testing_on && textord_debug_pitch_test) {
322  tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
323  block_index,
324  block_box.left (), block_box.bottom (),
325  block_box.right (), block_box.top ());
326  }
327  block->min_space = static_cast<int32_t>(floor (block->xheight
329  block->max_nonspace = static_cast<int32_t>(ceil (block->xheight
331  block->fixed_pitch = 0.0f;
332  block->space_size = static_cast<float>(block->min_space);
333  block->kern_size = static_cast<float>(block->max_nonspace);
334  block->pr_nonsp = block->xheight * words_default_prop_nonspace;
336  if (!block->get_rows ()->empty ()) {
337  ASSERT_HOST (block->xheight > 0);
338  find_repeated_chars(block, textord_show_initial_words && testing_on);
339 #ifndef GRAPHICS_DISABLED
340  if (textord_show_initial_words && testing_on)
341  //overlap_picture_ops(true);
343 #endif
344  compute_rows_pitch(block,
345  block_index,
346  textord_debug_pitch_test && testing_on);
347  }
348 }
349 
350 
351 /**********************************************************************
352  * compute_rows_pitch
353  *
354  * Decide whether each row is fixed pitch individually.
355  **********************************************************************/
356 
357 bool compute_rows_pitch( //find line stats
358  TO_BLOCK* block, //block to do
359  int32_t block_index, //block number
360  bool testing_on //correct orientation
361 ) {
362  int32_t maxwidth; //of spaces
363  TO_ROW *row; //current row
364  int32_t row_index; //row number.
365  float lower, upper; //cluster thresholds
366  TO_ROW_IT row_it = block->get_rows ();
367 
368  row_index = 1;
369  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
370  row = row_it.data ();
371  ASSERT_HOST (row->xheight > 0);
373  maxwidth = static_cast<int32_t>(ceil (row->xheight * textord_words_maxspace));
374  if (row_pitch_stats (row, maxwidth, testing_on)
375  && find_row_pitch (row, maxwidth,
376  textord_dotmatrix_gap + 1, block, block_index,
377  row_index, testing_on)) {
378  if (row->fixed_pitch == 0) {
379  lower = row->pr_nonsp;
380  upper = row->pr_space;
381  row->space_size = upper;
382  row->kern_size = lower;
383  }
384  }
385  else {
386  row->fixed_pitch = 0.0f; //insufficient data
388  }
389  row_index++;
390  }
391  return false;
392 }
393 
394 
395 /**********************************************************************
396  * try_doc_fixed
397  *
398  * Attempt to call the entire document fixed pitch.
399  **********************************************************************/
400 
401 bool try_doc_fixed( //determine pitch
402  ICOORD page_tr, //top right
403  TO_BLOCK_LIST* port_blocks, //input list
404  float gradient //page skew
405 ) {
406  int16_t master_x; //uniform shifts
407  int16_t pitch; //median pitch.
408  int x; //profile coord
409  int prop_blocks; //correct counts
410  int fixed_blocks;
411  int total_row_count; //total in page
412  //iterator
413  TO_BLOCK_IT block_it = port_blocks;
414  TO_BLOCK *block; //current block;
415  TO_ROW *row; //current row
416  int16_t projection_left; //edges
417  int16_t projection_right;
418  int16_t row_left; //edges of row
419  int16_t row_right;
420  ICOORDELT_LIST *master_cells; //cells for page
421  float master_y; //uniform shifts
422  float shift_factor; //page skew correction
423  float row_shift; //shift for row
424  float final_pitch; //output pitch
425  float row_y; //baseline
426  STATS projection; //entire page
427  STATS pitches (0, MAX_ALLOWED_PITCH);
428  //for median
429  float sp_sd; //space sd
430  int16_t mid_cuts; //no of cheap cuts
431  float pitch_sd; //sync rating
432 
433  if (block_it.empty ()
434  // || block_it.data()==block_it.data_relative(1)
436  return false;
437  shift_factor = gradient / (gradient * gradient + 1);
438  // row iterator
439  TO_ROW_IT row_it(block_it.data ()->get_rows());
440  master_x = row_it.data ()->projection_left;
441  master_y = row_it.data ()->baseline.y (master_x);
442  projection_left = INT16_MAX;
443  projection_right = -INT16_MAX;
444  prop_blocks = 0;
445  fixed_blocks = 0;
446  total_row_count = 0;
447 
448  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
449  block_it.forward ()) {
450  block = block_it.data ();
451  row_it.set_to_list (block->get_rows ());
452  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
453  row = row_it.data ();
454  total_row_count++;
455  if (row->fixed_pitch > 0)
456  pitches.add (static_cast<int32_t>(row->fixed_pitch), 1);
457  //find median
458  row_y = row->baseline.y (master_x);
459  row_left =
460  static_cast<int16_t>(row->projection_left -
461  shift_factor * (master_y - row_y));
462  row_right =
463  static_cast<int16_t>(row->projection_right -
464  shift_factor * (master_y - row_y));
465  if (row_left < projection_left)
466  projection_left = row_left;
467  if (row_right > projection_right)
468  projection_right = row_right;
469  }
470  }
471  if (pitches.get_total () == 0)
472  return false;
473  projection.set_range (projection_left, projection_right);
474 
475  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
476  block_it.forward ()) {
477  block = block_it.data ();
478  row_it.set_to_list (block->get_rows ());
479  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
480  row = row_it.data ();
481  row_y = row->baseline.y (master_x);
482  row_left =
483  static_cast<int16_t>(row->projection_left -
484  shift_factor * (master_y - row_y));
485  for (x = row->projection_left; x < row->projection_right;
486  x++, row_left++) {
487  projection.add (row_left, row->projection.pile_count (x));
488  }
489  }
490  }
491 
492  row_it.set_to_list (block_it.data ()->get_rows ());
493  row = row_it.data ();
494 #ifndef GRAPHICS_DISABLED
495  if (textord_show_page_cuts && to_win != nullptr)
496  projection.plot (to_win, projection_left,
497  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
498 #endif
499  final_pitch = pitches.ile (0.5);
500  pitch = static_cast<int16_t>(final_pitch);
501  pitch_sd =
502  tune_row_pitch (row, &projection, projection_left, projection_right,
503  pitch * 0.75, final_pitch, sp_sd, mid_cuts,
504  &row->char_cells, false);
505 
507  tprintf
508  ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
509  prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
510  pitch_sd / total_row_count, pitch_sd / pitch,
511  pitch_sd / total_row_count / pitch);
512 
513 #ifndef GRAPHICS_DISABLED
514  if (textord_show_page_cuts && to_win != nullptr) {
515  master_cells = &row->char_cells;
516  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
517  block_it.forward ()) {
518  block = block_it.data ();
519  row_it.set_to_list (block->get_rows ());
520  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
521  row_it.forward ()) {
522  row = row_it.data ();
523  row_y = row->baseline.y (master_x);
524  row_shift = shift_factor * (master_y - row_y);
525  plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
526  }
527  }
528  }
529 #endif
530  row->char_cells.clear ();
531  return false;
532 }
533 
534 
535 /**********************************************************************
536  * try_block_fixed
537  *
538  * Try to call the entire block fixed.
539  **********************************************************************/
540 
541 bool try_block_fixed( //find line stats
542  TO_BLOCK* block, //block to do
543  int32_t block_index //block number
544 ) {
545  return false;
546 }
547 
548 
549 /**********************************************************************
550  * try_rows_fixed
551  *
552  * Decide whether each row is fixed pitch individually.
553  **********************************************************************/
554 
555 bool try_rows_fixed( //find line stats
556  TO_BLOCK* block, //block to do
557  int32_t block_index, //block number
558  bool testing_on //correct orientation
559 ) {
560  TO_ROW *row; //current row
561  int32_t row_index; //row number.
562  int32_t def_fixed = 0; //counters
563  int32_t def_prop = 0;
564  int32_t maybe_fixed = 0;
565  int32_t maybe_prop = 0;
566  int32_t dunno = 0;
567  int32_t corr_fixed = 0;
568  int32_t corr_prop = 0;
569  float lower, upper; //cluster thresholds
570  TO_ROW_IT row_it = block->get_rows ();
571 
572  row_index = 1;
573  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
574  row = row_it.data ();
575  ASSERT_HOST (row->xheight > 0);
576  if (row->fixed_pitch > 0 &&
577  fixed_pitch_row(row, block->block, block_index)) {
578  if (row->fixed_pitch == 0) {
579  lower = row->pr_nonsp;
580  upper = row->pr_space;
581  row->space_size = upper;
582  row->kern_size = lower;
583  }
584  }
585  row_index++;
586  }
587  count_block_votes(block,
588  def_fixed,
589  def_prop,
590  maybe_fixed,
591  maybe_prop,
592  corr_fixed,
593  corr_prop,
594  dunno);
595  if (testing_on
598  tprintf ("Initially:");
599  print_block_counts(block, block_index);
600  }
601  if (def_fixed > def_prop * textord_words_veto_power)
603  else if (def_prop > def_fixed * textord_words_veto_power)
605  else if (def_fixed > 0 || def_prop > 0)
606  block->pitch_decision = PITCH_DUNNO;
607  else if (maybe_fixed > maybe_prop * textord_words_veto_power)
609  else if (maybe_prop > maybe_fixed * textord_words_veto_power)
611  else
612  block->pitch_decision = PITCH_DUNNO;
613  return false;
614 }
615 
616 
617 /**********************************************************************
618  * print_block_counts
619  *
620  * Count up how many rows have what decision and print the results.
621  **********************************************************************/
622 
623 void print_block_counts( //find line stats
624  TO_BLOCK *block, //block to do
625  int32_t block_index //block number
626  ) {
627  int32_t def_fixed = 0; //counters
628  int32_t def_prop = 0;
629  int32_t maybe_fixed = 0;
630  int32_t maybe_prop = 0;
631  int32_t dunno = 0;
632  int32_t corr_fixed = 0;
633  int32_t corr_prop = 0;
634 
635  count_block_votes(block,
636  def_fixed,
637  def_prop,
638  maybe_fixed,
639  maybe_prop,
640  corr_fixed,
641  corr_prop,
642  dunno);
643  tprintf ("Block %d has (%d,%d,%d)",
644  block_index, def_fixed, maybe_fixed, corr_fixed);
645  if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed))
646  tprintf (" (Wrongly)");
647  tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
648  if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop))
649  tprintf (" (Wrongly)");
650  tprintf (" prop, %d dunno\n", dunno);
651 }
652 
653 
654 /**********************************************************************
655  * count_block_votes
656  *
657  * Count the number of rows in the block with each kind of pitch_decision.
658  **********************************************************************/
659 
660 void count_block_votes( //find line stats
661  TO_BLOCK *block, //block to do
662  int32_t &def_fixed, //add to counts
663  int32_t &def_prop,
664  int32_t &maybe_fixed,
665  int32_t &maybe_prop,
666  int32_t &corr_fixed,
667  int32_t &corr_prop,
668  int32_t &dunno) {
669  TO_ROW *row; //current row
670  TO_ROW_IT row_it = block->get_rows ();
671 
672  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
673  row = row_it.data ();
674  switch (row->pitch_decision) {
675  case PITCH_DUNNO:
676  dunno++;
677  break;
678  case PITCH_DEF_PROP:
679  def_prop++;
680  break;
681  case PITCH_MAYBE_PROP:
682  maybe_prop++;
683  break;
684  case PITCH_DEF_FIXED:
685  def_fixed++;
686  break;
687  case PITCH_MAYBE_FIXED:
688  maybe_fixed++;
689  break;
690  case PITCH_CORR_PROP:
691  corr_prop++;
692  break;
693  case PITCH_CORR_FIXED:
694  corr_fixed++;
695  break;
696  }
697  }
698 }
699 
700 
701 /**********************************************************************
702  * row_pitch_stats
703  *
704  * Decide whether each row is fixed pitch individually.
705  **********************************************************************/
706 
707 bool row_pitch_stats( //find line stats
708  TO_ROW* row, //current row
709  int32_t maxwidth, //of spaces
710  bool testing_on //correct orientation
711 ) {
712  BLOBNBOX *blob; //current blob
713  int gap_index; //current gap
714  int32_t prev_x; //end of prev blob
715  int32_t cluster_count; //no of clusters
716  int32_t prev_count; //of clusters
717  int32_t smooth_factor; //for smoothing stats
718  TBOX blob_box; //bounding box
719  float lower, upper; //cluster thresholds
720  //gap sizes
721  float gaps[BLOCK_STATS_CLUSTERS];
722  //blobs
723  BLOBNBOX_IT blob_it = row->blob_list ();
724  STATS gap_stats (0, maxwidth);
725  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
726  //clusters
727 
728  smooth_factor =
729  static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5);
730  if (!blob_it.empty ()) {
731  prev_x = blob_it.data ()->bounding_box ().right ();
732  blob_it.forward ();
733  while (!blob_it.at_first ()) {
734  blob = blob_it.data ();
735  if (!blob->joined_to_prev ()) {
736  blob_box = blob->bounding_box ();
737  if (blob_box.left () - prev_x < maxwidth)
738  gap_stats.add (blob_box.left () - prev_x, 1);
739  prev_x = blob_box.right ();
740  }
741  blob_it.forward ();
742  }
743  }
744  if (gap_stats.get_total () == 0) {
745  return false;
746  }
747  cluster_count = 0;
748  lower = row->xheight * words_initial_lower;
749  upper = row->xheight * words_initial_upper;
750  gap_stats.smooth (smooth_factor);
751  do {
752  prev_count = cluster_count;
753  cluster_count = gap_stats.cluster (lower, upper,
755  BLOCK_STATS_CLUSTERS, cluster_stats);
756  }
757  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
758  if (cluster_count < 1) {
759  return false;
760  }
761  for (gap_index = 0; gap_index < cluster_count; gap_index++)
762  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
763  //get medians
764  if (testing_on) {
765  tprintf ("cluster_count=%d:", cluster_count);
766  for (gap_index = 0; gap_index < cluster_count; gap_index++)
767  tprintf (" %g(%d)", gaps[gap_index],
768  cluster_stats[gap_index + 1].get_total ());
769  tprintf ("\n");
770  }
771  qsort (gaps, cluster_count, sizeof (float), sort_floats);
772 
773  //Try to find proportional non-space and space for row.
774  lower = row->xheight * words_default_prop_nonspace;
775  upper = row->xheight * textord_words_min_minspace;
776  for (gap_index = 0; gap_index < cluster_count
777  && gaps[gap_index] < lower; gap_index++);
778  if (gap_index == 0) {
779  if (testing_on)
780  tprintf ("No clusters below nonspace threshold!!\n");
781  if (cluster_count > 1) {
782  row->pr_nonsp = gaps[0];
783  row->pr_space = gaps[1];
784  }
785  else {
786  row->pr_nonsp = lower;
787  row->pr_space = gaps[0];
788  }
789  }
790  else {
791  row->pr_nonsp = gaps[gap_index - 1];
792  while (gap_index < cluster_count && gaps[gap_index] < upper)
793  gap_index++;
794  if (gap_index == cluster_count) {
795  if (testing_on)
796  tprintf ("No clusters above nonspace threshold!!\n");
797  row->pr_space = lower * textord_spacesize_ratioprop;
798  }
799  else
800  row->pr_space = gaps[gap_index];
801  }
802 
803  //Now try to find the fixed pitch space and non-space.
804  upper = row->xheight * words_default_fixed_space;
805  for (gap_index = 0; gap_index < cluster_count
806  && gaps[gap_index] < upper; gap_index++);
807  if (gap_index == 0) {
808  if (testing_on)
809  tprintf ("No clusters below space threshold!!\n");
810  row->fp_nonsp = upper;
811  row->fp_space = gaps[0];
812  }
813  else {
814  row->fp_nonsp = gaps[gap_index - 1];
815  if (gap_index == cluster_count) {
816  if (testing_on)
817  tprintf ("No clusters above space threshold!!\n");
818  row->fp_space = row->xheight;
819  }
820  else
821  row->fp_space = gaps[gap_index];
822  }
823  if (testing_on) {
824  tprintf
825  ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
826  row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
827  }
828  return true; //computed some stats
829 }
830 
831 
832 /**********************************************************************
833  * find_row_pitch
834  *
835  * Check to see if this row could be fixed pitch using the given spacings.
836  * Blobs with gaps smaller than the lower threshold are assumed to be one.
837  * The larger threshold is the word gap threshold.
838  **********************************************************************/
839 
840 bool find_row_pitch( //find lines
841  TO_ROW* row, //row to do
842  int32_t maxwidth, //max permitted space
843  int32_t dm_gap, //ignorable gaps
844  TO_BLOCK* block, //block of row
845  int32_t block_index, //block_number
846  int32_t row_index, //number of row
847  bool testing_on //correct orientation
848 ) {
849  bool used_dm_model; //looks like dot matrix
850  float min_space; //estimate threshold
851  float non_space; //gap size
852  float gap_iqr; //interquartile range
853  float pitch_iqr;
854  float dm_gap_iqr; //interquartile range
855  float dm_pitch_iqr;
856  float dm_pitch; //pitch with dm on
857  float pitch; //revised estimate
858  float initial_pitch; //guess at pitch
859  STATS gap_stats (0, maxwidth);
860  //centre-centre
861  STATS pitch_stats (0, maxwidth);
862 
863  row->fixed_pitch = 0.0f;
864  initial_pitch = row->fp_space;
865  if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
866  initial_pitch = row->xheight;//keep pitch decent
867  non_space = row->fp_nonsp;
868  if (non_space > initial_pitch)
869  non_space = initial_pitch;
870  min_space = (initial_pitch + non_space) / 2;
871 
872  if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
873  initial_pitch, min_space, true, false, dm_gap)) {
874  dm_gap_iqr = 0.0001;
875  dm_pitch_iqr = maxwidth * 2.0f;
876  dm_pitch = initial_pitch;
877  }
878  else {
879  dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
880  dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
881  dm_pitch = pitch_stats.ile (0.5);
882  }
883  gap_stats.clear ();
884  pitch_stats.clear ();
885  if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
886  initial_pitch, min_space, true, false, 0)) {
887  gap_iqr = 0.0001;
888  pitch_iqr = maxwidth * 3.0f;
889  }
890  else {
891  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
892  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
893  if (testing_on)
894  tprintf
895  ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
896  initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
897  initial_pitch = pitch_stats.ile (0.5);
898  if (min_space > initial_pitch
899  && count_pitch_stats (row, &gap_stats, &pitch_stats,
900  initial_pitch, initial_pitch, true, false, 0)) {
901  min_space = initial_pitch;
902  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
903  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
904  if (testing_on)
905  tprintf
906  ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
907  initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
908  initial_pitch = pitch_stats.ile (0.5);
909  }
910  }
912  tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
913  block_index, row_index, 'X',
914  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
915  pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' :
916  (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
917  if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
920  tprintf ("\n");
921  return false; //insufficient data
922  }
923  if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
924  if (testing_on)
925  tprintf
926  ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
927  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
928  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
929  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
930  pitch = pitch_stats.ile (0.5);
931  used_dm_model = false;
932  }
933  else {
934  if (testing_on)
935  tprintf
936  ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
937  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
938  gap_iqr = dm_gap_iqr;
939  pitch_iqr = dm_pitch_iqr;
940  pitch = dm_pitch;
941  used_dm_model = true;
942  }
944  tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
945  pitch_iqr, gap_iqr, pitch);
946  tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
947  pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
948  pitch_iqr < gap_iqr * textord_fpiqr_ratio
949  && pitch_iqr < block->xheight * textord_max_pitch_iqr
950  && pitch < block->xheight * textord_words_default_maxspace
951  ? 'F' : 'P');
952  }
953  if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
954  && pitch_iqr < block->xheight * textord_max_pitch_iqr
955  && pitch < block->xheight * textord_words_default_maxspace)
957  else
959  row->fixed_pitch = pitch;
960  row->kern_size = gap_stats.ile (0.5);
961  row->min_space = static_cast<int32_t>(row->fixed_pitch + non_space) / 2;
962  if (row->min_space > row->fixed_pitch)
963  row->min_space = static_cast<int32_t>(row->fixed_pitch);
964  row->max_nonspace = row->min_space;
965  row->space_size = row->fixed_pitch;
966  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
967  row->used_dm_model = used_dm_model;
968  return true;
969 }
970 
971 
972 /**********************************************************************
973  * fixed_pitch_row
974  *
975  * Check to see if this row could be fixed pitch using the given spacings.
976  * Blobs with gaps smaller than the lower threshold are assumed to be one.
977  * The larger threshold is the word gap threshold.
978  **********************************************************************/
979 
980 bool fixed_pitch_row(TO_ROW* row, // row to do
981  BLOCK* block,
982  int32_t block_index // block_number
983 ) {
984  const char *res_string; // pitch result
985  int16_t mid_cuts; // no of cheap cuts
986  float non_space; // gap size
987  float pitch_sd; // error on pitch
988  float sp_sd = 0.0f; // space sd
989 
990  non_space = row->fp_nonsp;
991  if (non_space > row->fixed_pitch)
992  non_space = row->fixed_pitch;
993  POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
994  if (textord_all_prop || (pb != nullptr && !pb->IsText())) {
995  // Set the decision to definitely proportional.
996  pitch_sd = textord_words_def_prop * row->fixed_pitch;
998  } else {
999  pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
1000  row->projection_right,
1001  (row->fixed_pitch + non_space * 3) / 4,
1002  row->fixed_pitch, sp_sd, mid_cuts,
1003  &row->char_cells,
1004  block_index == textord_debug_block);
1005  if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1006  && ((pitsync_linear_version & 3) < 3
1007  || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
1008  || sp_sd > 20
1009  || (pitch_sd == 0 && sp_sd > 10))))) {
1010  if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1011  && !row->all_caps
1012  && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1014  else
1016  }
1017  else if ((pitsync_linear_version & 3) < 3
1018  || sp_sd > 20
1019  || mid_cuts > 0
1020  || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
1021  if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1023  else
1025  }
1026  else
1027  row->pitch_decision = PITCH_DUNNO;
1028  }
1029 
1031  res_string = "??";
1032  switch (row->pitch_decision) {
1033  case PITCH_DEF_PROP:
1034  res_string = "DP";
1035  break;
1036  case PITCH_MAYBE_PROP:
1037  res_string = "MP";
1038  break;
1039  case PITCH_DEF_FIXED:
1040  res_string = "DF";
1041  break;
1042  case PITCH_MAYBE_FIXED:
1043  res_string = "MF";
1044  break;
1045  default:
1046  res_string = "??";
1047  }
1048  tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
1049  pitch_sd / row->fixed_pitch, sp_sd, res_string);
1050  }
1051  return true;
1052 }
1053 
1054 
1055 /**********************************************************************
1056  * count_pitch_stats
1057  *
1058  * Count up the gap and pitch stats on the block to see if it is fixed pitch.
1059  * Blobs with gaps smaller than the lower threshold are assumed to be one.
1060  * The larger threshold is the word gap threshold.
1061  * The return value indicates whether there were any decent values to use.
1062  **********************************************************************/
1063 
1064 bool count_pitch_stats( //find lines
1065  TO_ROW* row, //row to do
1066  STATS* gap_stats, //blob gaps
1067  STATS* pitch_stats, //centre-centre stats
1068  float initial_pitch, //guess at pitch
1069  float min_space, //estimate space size
1070  bool ignore_outsize, //discard big objects
1071  bool split_outsize, //split big objects
1072  int32_t dm_gap //ignorable gaps
1073 ) {
1074  bool prev_valid; //not word broken
1075  BLOBNBOX *blob; //current blob
1076  //blobs
1077  BLOBNBOX_IT blob_it = row->blob_list ();
1078  int32_t prev_right; //end of prev blob
1079  int32_t prev_centre; //centre of previous blob
1080  int32_t x_centre; //centre of this blob
1081  int32_t blob_width; //width of blob
1082  int32_t width_units; //no of widths in blob
1083  float width; //blob width
1084  TBOX blob_box; //bounding box
1085  TBOX joined_box; //of super blob
1086 
1087  gap_stats->clear ();
1088  pitch_stats->clear ();
1089  if (blob_it.empty ())
1090  return false;
1091  prev_valid = false;
1092  prev_centre = 0;
1093  prev_right = 0; // stop compiler warning
1094  joined_box = blob_it.data ()->bounding_box ();
1095  do {
1096  blob_it.forward ();
1097  blob = blob_it.data ();
1098  if (!blob->joined_to_prev ()) {
1099  blob_box = blob->bounding_box ();
1100  if ((blob_box.left () - joined_box.right () < dm_gap
1101  && !blob_it.at_first ())
1102  || blob->cblob() == nullptr)
1103  joined_box += blob_box; //merge blobs
1104  else {
1105  blob_width = joined_box.width ();
1106  if (split_outsize) {
1107  width_units =
1108  static_cast<int32_t>(floor (static_cast<float>(blob_width) / initial_pitch + 0.5));
1109  if (width_units < 1)
1110  width_units = 1;
1111  width_units--;
1112  }
1113  else if (ignore_outsize) {
1114  width = static_cast<float>(blob_width) / initial_pitch;
1115  width_units = width < 1 + words_default_fixed_limit
1116  && width > 1 - words_default_fixed_limit ? 0 : -1;
1117  }
1118  else
1119  width_units = 0; //everything in
1120  x_centre = static_cast<int32_t>(joined_box.left ()
1121  + (blob_width -
1122  width_units * initial_pitch) / 2);
1123  if (prev_valid && width_units >= 0) {
1124  // if (width_units>0)
1125  // {
1126  // tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
1127  // width_units,blob_width,x_centre,x_centre-prev_centre);
1128  // }
1129  gap_stats->add (joined_box.left () - prev_right, 1);
1130  pitch_stats->add (x_centre - prev_centre, 1);
1131  }
1132  prev_centre = static_cast<int32_t>(x_centre + width_units * initial_pitch);
1133  prev_right = joined_box.right ();
1134  prev_valid = blob_box.left () - joined_box.right () < min_space;
1135  prev_valid = prev_valid && width_units >= 0;
1136  joined_box = blob_box;
1137  }
1138  }
1139  }
1140  while (!blob_it.at_first ());
1141  return gap_stats->get_total () >= 3;
1142 }
1143 
1144 
1145 /**********************************************************************
1146  * tune_row_pitch
1147  *
1148  * Use a dp algorithm to fit the character cells and return the sd of
1149  * the cell size over the row.
1150  **********************************************************************/
1151 
1152 float tune_row_pitch( //find fp cells
1153  TO_ROW* row, //row to do
1154  STATS* projection, //vertical projection
1155  int16_t projection_left, //edge of projection
1156  int16_t projection_right, //edge of projection
1157  float space_size, //size of blank
1158  float& initial_pitch, //guess at pitch
1159  float& best_sp_sd, //space sd
1160  int16_t& best_mid_cuts, //no of cheap cuts
1161  ICOORDELT_LIST* best_cells, //row cells
1162  bool testing_on //inidividual words
1163 ) {
1164  int pitch_delta; //offset pitch
1165  int16_t mid_cuts; //cheap cuts
1166  float pitch_sd; //current sd
1167  float best_sd; //best result
1168  float best_pitch; //pitch for best result
1169  float initial_sd; //starting error
1170  float sp_sd; //space sd
1171  ICOORDELT_LIST test_cells; //row cells
1172  ICOORDELT_IT best_it; //start of best list
1173 
1175  return tune_row_pitch2 (row, projection, projection_left,
1176  projection_right, space_size, initial_pitch,
1177  best_sp_sd,
1178  //space sd
1179  best_mid_cuts, best_cells, testing_on);
1180  if (textord_disable_pitch_test) {
1181  best_sp_sd = initial_pitch;
1182  return initial_pitch;
1183  }
1184  initial_sd =
1185  compute_pitch_sd(row,
1186  projection,
1187  projection_left,
1188  projection_right,
1189  space_size,
1190  initial_pitch,
1191  best_sp_sd,
1192  best_mid_cuts,
1193  best_cells,
1194  testing_on);
1195  best_sd = initial_sd;
1196  best_pitch = initial_pitch;
1197  if (testing_on)
1198  tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1199  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1200  pitch_sd =
1201  compute_pitch_sd (row, projection, projection_left, projection_right,
1202  space_size, initial_pitch + pitch_delta, sp_sd,
1203  mid_cuts, &test_cells, testing_on);
1204  if (testing_on)
1205  tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1206  pitch_sd);
1207  if (pitch_sd < best_sd) {
1208  best_sd = pitch_sd;
1209  best_mid_cuts = mid_cuts;
1210  best_sp_sd = sp_sd;
1211  best_pitch = initial_pitch + pitch_delta;
1212  best_cells->clear ();
1213  best_it.set_to_list (best_cells);
1214  best_it.add_list_after (&test_cells);
1215  }
1216  else
1217  test_cells.clear ();
1218  if (pitch_sd > initial_sd)
1219  break; //getting worse
1220  }
1221  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1222  pitch_sd =
1223  compute_pitch_sd (row, projection, projection_left, projection_right,
1224  space_size, initial_pitch - pitch_delta, sp_sd,
1225  mid_cuts, &test_cells, testing_on);
1226  if (testing_on)
1227  tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1228  pitch_sd);
1229  if (pitch_sd < best_sd) {
1230  best_sd = pitch_sd;
1231  best_mid_cuts = mid_cuts;
1232  best_sp_sd = sp_sd;
1233  best_pitch = initial_pitch - pitch_delta;
1234  best_cells->clear ();
1235  best_it.set_to_list (best_cells);
1236  best_it.add_list_after (&test_cells);
1237  }
1238  else
1239  test_cells.clear ();
1240  if (pitch_sd > initial_sd)
1241  break;
1242  }
1243  initial_pitch = best_pitch;
1244 
1246  print_pitch_sd(row,
1247  projection,
1248  projection_left,
1249  projection_right,
1250  space_size,
1251  best_pitch);
1252 
1253  return best_sd;
1254 }
1255 
1256 
1257 /**********************************************************************
1258  * tune_row_pitch
1259  *
1260  * Use a dp algorithm to fit the character cells and return the sd of
1261  * the cell size over the row.
1262  **********************************************************************/
1263 
1264 float tune_row_pitch2( //find fp cells
1265  TO_ROW* row, //row to do
1266  STATS* projection, //vertical projection
1267  int16_t projection_left, //edge of projection
1268  int16_t projection_right, //edge of projection
1269  float space_size, //size of blank
1270  float& initial_pitch, //guess at pitch
1271  float& best_sp_sd, //space sd
1272  int16_t& best_mid_cuts, //no of cheap cuts
1273  ICOORDELT_LIST* best_cells, //row cells
1274  bool testing_on //inidividual words
1275 ) {
1276  int pitch_delta; //offset pitch
1277  int16_t pixel; //pixel coord
1278  int16_t best_pixel; //pixel coord
1279  int16_t best_delta; //best pitch
1280  int16_t best_pitch; //best pitch
1281  int16_t start; //of good range
1282  int16_t end; //of good range
1283  int32_t best_count; //lowest sum
1284  float best_sd; //best result
1285 
1286  best_sp_sd = initial_pitch;
1287 
1288  best_pitch = static_cast<int>(initial_pitch);
1289  if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
1290  return initial_pitch;
1291  }
1292  std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); //summed projection
1293 
1294  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1295  pitch_delta++)
1296  sum_proj[textord_pitch_range + pitch_delta].set_range (0,
1297  best_pitch +
1298  pitch_delta + 1);
1299  for (pixel = projection_left; pixel <= projection_right; pixel++) {
1300  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1301  pitch_delta++) {
1302  sum_proj[textord_pitch_range + pitch_delta].add(
1303  (pixel - projection_left) % (best_pitch + pitch_delta),
1304  projection->pile_count(pixel));
1305  }
1306  }
1307  best_count = sum_proj[textord_pitch_range].pile_count (0);
1308  best_delta = 0;
1309  best_pixel = 0;
1310  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1311  pitch_delta++) {
1312  for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1313  if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
1314  < best_count) {
1315  best_count =
1316  sum_proj[textord_pitch_range +
1317  pitch_delta].pile_count (pixel);
1318  best_delta = pitch_delta;
1319  best_pixel = pixel;
1320  }
1321  }
1322  }
1323  if (testing_on)
1324  tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1325  initial_pitch, best_delta, best_count);
1326  best_pitch += best_delta;
1327  initial_pitch = best_pitch;
1328  best_count++;
1329  best_count += best_count;
1330  for (start = best_pixel - 2; start > best_pixel - best_pitch
1331  && sum_proj[textord_pitch_range +
1332  best_delta].pile_count (start % best_pitch) <= best_count;
1333  start--);
1334  for (end = best_pixel + 2;
1335  end < best_pixel + best_pitch
1336  && sum_proj[textord_pitch_range +
1337  best_delta].pile_count (end % best_pitch) <= best_count;
1338  end++);
1339 
1340  best_sd =
1341  compute_pitch_sd(row,
1342  projection,
1343  projection_left,
1344  projection_right,
1345  space_size,
1346  initial_pitch,
1347  best_sp_sd,
1348  best_mid_cuts,
1349  best_cells,
1350  testing_on,
1351  start,
1352  end);
1353  if (testing_on)
1354  tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
1355  best_sd);
1356 
1358  print_pitch_sd(row,
1359  projection,
1360  projection_left,
1361  projection_right,
1362  space_size,
1363  initial_pitch);
1364 
1365  return best_sd;
1366 }
1367 
1368 
1369 /**********************************************************************
1370  * compute_pitch_sd
1371  *
1372  * Use a dp algorithm to fit the character cells and return the sd of
1373  * the cell size over the row.
1374  **********************************************************************/
1375 
1376 float compute_pitch_sd( //find fp cells
1377  TO_ROW* row, //row to do
1378  STATS* projection, //vertical projection
1379  int16_t projection_left, //edge
1380  int16_t projection_right, //edge
1381  float space_size, //size of blank
1382  float initial_pitch, //guess at pitch
1383  float& sp_sd, //space sd
1384  int16_t& mid_cuts, //no of free cuts
1385  ICOORDELT_LIST* row_cells, //list of chop pts
1386  bool testing_on, //inidividual words
1387  int16_t start, //start of good range
1388  int16_t end //end of good range
1389 ) {
1390  int16_t occupation; //no of cells in word.
1391  //blobs
1392  BLOBNBOX_IT blob_it = row->blob_list ();
1393  BLOBNBOX_IT start_it; //start of word
1394  BLOBNBOX_IT plot_it; //for plotting
1395  int16_t blob_count; //no of blobs
1396  TBOX blob_box; //bounding box
1397  TBOX prev_box; //of super blob
1398  int32_t prev_right; //of word sync
1399  int scale_factor; //on scores for big words
1400  int32_t sp_count; //spaces
1401  FPSEGPT_LIST seg_list; //char cells
1402  FPSEGPT_IT seg_it; //iterator
1403  int16_t segpos; //position of segment
1404  int16_t cellpos; //previous cell boundary
1405  //iterator
1406  ICOORDELT_IT cell_it = row_cells;
1407  ICOORDELT *cell; //new cell
1408  double sqsum; //sum of squares
1409  double spsum; //of spaces
1410  double sp_var; //space error
1411  double word_sync; //result for word
1412  int32_t total_count; //total blobs
1413 
1414  if ((pitsync_linear_version & 3) > 1) {
1415  word_sync = compute_pitch_sd2 (row, projection, projection_left,
1416  projection_right, initial_pitch,
1417  occupation, mid_cuts, row_cells,
1418  testing_on, start, end);
1419  sp_sd = occupation;
1420  return word_sync;
1421  }
1422  mid_cuts = 0;
1423  cellpos = 0;
1424  total_count = 0;
1425  sqsum = 0;
1426  sp_count = 0;
1427  spsum = 0;
1428  prev_right = -1;
1429  if (blob_it.empty ())
1430  return space_size * 10;
1431 #ifndef GRAPHICS_DISABLED
1432  if (testing_on && to_win != nullptr) {
1433  blob_box = blob_it.data ()->bounding_box ();
1434  projection->plot (to_win, projection_left,
1435  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1436  }
1437 #endif
1438  start_it = blob_it;
1439  blob_count = 0;
1440  blob_box = box_next (&blob_it);//first blob
1441  blob_it.mark_cycle_pt ();
1442  do {
1443  for (; blob_count > 0; blob_count--)
1444  box_next(&start_it);
1445  do {
1446  prev_box = blob_box;
1447  blob_count++;
1448  blob_box = box_next (&blob_it);
1449  }
1450  while (!blob_it.cycled_list ()
1451  && blob_box.left () - prev_box.right () < space_size);
1452  plot_it = start_it;
1453  if (pitsync_linear_version & 3)
1454  word_sync =
1455  check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1456  projection, projection_left, projection_right,
1458  occupation, &seg_list, start, end);
1459  else
1460  word_sync =
1461  check_pitch_sync (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1462  projection, &seg_list);
1463  if (testing_on) {
1464  tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
1465  prev_box.right (), prev_box.top (),
1466  seg_list.length () - 1, word_sync);
1467  seg_it.set_to_list (&seg_list);
1468  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1469  seg_it.forward ()) {
1470  if (seg_it.data ()->faked)
1471  tprintf ("(F)");
1472  tprintf ("%d, ", seg_it.data ()->position ());
1473  // tprintf("C=%g, s=%g, sq=%g\n",
1474  // seg_it.data()->cost_function(),
1475  // seg_it.data()->sum(),
1476  // seg_it.data()->squares());
1477  }
1478  tprintf ("\n");
1479  }
1480 #ifndef GRAPHICS_DISABLED
1481  if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
1482  plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1483 #endif
1484  seg_it.set_to_list (&seg_list);
1485  if (prev_right >= 0) {
1486  sp_var = seg_it.data ()->position () - prev_right;
1487  sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1488  sp_var *= sp_var;
1489  spsum += sp_var;
1490  sp_count++;
1491  }
1492  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1493  segpos = seg_it.data ()->position ();
1494  if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1495  //big gap
1496  while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1497  cell = new ICOORDELT (cellpos + static_cast<int16_t>(initial_pitch), 0);
1498  cell_it.add_after_then_move (cell);
1499  cellpos += static_cast<int16_t>(initial_pitch);
1500  }
1501  //make new one
1502  cell = new ICOORDELT (segpos, 0);
1503  cell_it.add_after_then_move (cell);
1504  cellpos = segpos;
1505  }
1506  else if (segpos > cellpos - initial_pitch / 2) {
1507  cell = cell_it.data ();
1508  //average positions
1509  cell->set_x ((cellpos + segpos) / 2);
1510  cellpos = cell->x ();
1511  }
1512  }
1513  seg_it.move_to_last ();
1514  prev_right = seg_it.data ()->position ();
1516  scale_factor = (seg_list.length () - 2) / 2;
1517  if (scale_factor < 1)
1518  scale_factor = 1;
1519  }
1520  else
1521  scale_factor = 1;
1522  sqsum += word_sync * scale_factor;
1523  total_count += (seg_list.length () - 1) * scale_factor;
1524  seg_list.clear ();
1525  }
1526  while (!blob_it.cycled_list ());
1527  sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1528  return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1529 }
1530 
1531 
1532 /**********************************************************************
1533  * compute_pitch_sd2
1534  *
1535  * Use a dp algorithm to fit the character cells and return the sd of
1536  * the cell size over the row.
1537  **********************************************************************/
1538 
1539 float compute_pitch_sd2( //find fp cells
1540  TO_ROW* row, //row to do
1541  STATS* projection, //vertical projection
1542  int16_t projection_left, //edge
1543  int16_t projection_right, //edge
1544  float initial_pitch, //guess at pitch
1545  int16_t& occupation, //no of occupied cells
1546  int16_t& mid_cuts, //no of free cuts
1547  ICOORDELT_LIST* row_cells, //list of chop pts
1548  bool testing_on, //inidividual words
1549  int16_t start, //start of good range
1550  int16_t end //end of good range
1551 ) {
1552  //blobs
1553  BLOBNBOX_IT blob_it = row->blob_list ();
1554  BLOBNBOX_IT plot_it;
1555  int16_t blob_count; //no of blobs
1556  TBOX blob_box; //bounding box
1557  FPSEGPT_LIST seg_list; //char cells
1558  FPSEGPT_IT seg_it; //iterator
1559  int16_t segpos; //position of segment
1560  //iterator
1561  ICOORDELT_IT cell_it = row_cells;
1562  ICOORDELT *cell; //new cell
1563  double word_sync; //result for word
1564 
1565  mid_cuts = 0;
1566  if (blob_it.empty ()) {
1567  occupation = 0;
1568  return initial_pitch * 10;
1569  }
1570 #ifndef GRAPHICS_DISABLED
1571  if (testing_on && to_win != nullptr) {
1572  projection->plot (to_win, projection_left,
1573  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1574  }
1575 #endif
1576  blob_count = 0;
1577  blob_it.mark_cycle_pt ();
1578  do {
1579  //first blob
1580  blob_box = box_next (&blob_it);
1581  blob_count++;
1582  }
1583  while (!blob_it.cycled_list ());
1584  plot_it = blob_it;
1585  word_sync = check_pitch_sync2 (&blob_it, blob_count, static_cast<int16_t>(initial_pitch),
1586  2, projection, projection_left,
1587  projection_right,
1589  occupation, &seg_list, start, end);
1590  if (testing_on) {
1591  tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
1592  blob_box.right (), blob_box.top (),
1593  seg_list.length () - 1, word_sync);
1594  seg_it.set_to_list (&seg_list);
1595  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1596  if (seg_it.data ()->faked)
1597  tprintf ("(F)");
1598  tprintf ("%d, ", seg_it.data ()->position ());
1599  // tprintf("C=%g, s=%g, sq=%g\n",
1600  // seg_it.data()->cost_function(),
1601  // seg_it.data()->sum(),
1602  // seg_it.data()->squares());
1603  }
1604  tprintf ("\n");
1605  }
1606 #ifndef GRAPHICS_DISABLED
1607  if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
1608  plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1609 #endif
1610  seg_it.set_to_list (&seg_list);
1611  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1612  segpos = seg_it.data ()->position ();
1613  //make new one
1614  cell = new ICOORDELT (segpos, 0);
1615  cell_it.add_after_then_move (cell);
1616  if (seg_it.at_last ())
1617  mid_cuts = seg_it.data ()->cheap_cuts ();
1618  }
1619  seg_list.clear ();
1620  return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
1621 }
1622 
1623 
1624 /**********************************************************************
1625  * print_pitch_sd
1626  *
1627  * Use a dp algorithm to fit the character cells and return the sd of
1628  * the cell size over the row.
1629  **********************************************************************/
1630 
1631 void print_pitch_sd( //find fp cells
1632  TO_ROW *row, //row to do
1633  STATS *projection, //vertical projection
1634  int16_t projection_left, //edges //size of blank
1635  int16_t projection_right,
1636  float space_size,
1637  float initial_pitch //guess at pitch
1638  ) {
1639  const char *res2; //pitch result
1640  int16_t occupation; //used cells
1641  float sp_sd; //space sd
1642  //blobs
1643  BLOBNBOX_IT blob_it = row->blob_list ();
1644  BLOBNBOX_IT start_it; //start of word
1645  BLOBNBOX_IT row_start; //start of row
1646  int16_t blob_count; //no of blobs
1647  int16_t total_blob_count; //total blobs in line
1648  TBOX blob_box; //bounding box
1649  TBOX prev_box; //of super blob
1650  int32_t prev_right; //of word sync
1651  int scale_factor; //on scores for big words
1652  int32_t sp_count; //spaces
1653  FPSEGPT_LIST seg_list; //char cells
1654  FPSEGPT_IT seg_it; //iterator
1655  double sqsum; //sum of squares
1656  double spsum; //of spaces
1657  double sp_var; //space error
1658  double word_sync; //result for word
1659  double total_count; //total cuts
1660 
1661  if (blob_it.empty ())
1662  return;
1663  row_start = blob_it;
1664  total_blob_count = 0;
1665 
1666  total_count = 0;
1667  sqsum = 0;
1668  sp_count = 0;
1669  spsum = 0;
1670  prev_right = -1;
1671  blob_it = row_start;
1672  start_it = blob_it;
1673  blob_count = 0;
1674  blob_box = box_next (&blob_it);//first blob
1675  blob_it.mark_cycle_pt ();
1676  do {
1677  for (; blob_count > 0; blob_count--)
1678  box_next(&start_it);
1679  do {
1680  prev_box = blob_box;
1681  blob_count++;
1682  blob_box = box_next (&blob_it);
1683  }
1684  while (!blob_it.cycled_list ()
1685  && blob_box.left () - prev_box.right () < space_size);
1686  word_sync =
1687  check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1688  projection, projection_left, projection_right,
1690  occupation, &seg_list, 0, 0);
1691  total_blob_count += blob_count;
1692  seg_it.set_to_list (&seg_list);
1693  if (prev_right >= 0) {
1694  sp_var = seg_it.data ()->position () - prev_right;
1695  sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1696  sp_var *= sp_var;
1697  spsum += sp_var;
1698  sp_count++;
1699  }
1700  seg_it.move_to_last ();
1701  prev_right = seg_it.data ()->position ();
1703  scale_factor = (seg_list.length () - 2) / 2;
1704  if (scale_factor < 1)
1705  scale_factor = 1;
1706  }
1707  else
1708  scale_factor = 1;
1709  sqsum += word_sync * scale_factor;
1710  total_count += (seg_list.length () - 1) * scale_factor;
1711  seg_list.clear ();
1712  }
1713  while (!blob_it.cycled_list ());
1714  sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1715  word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1716  tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1717  word_sync, word_sync / initial_pitch, sp_sd,
1718  word_sync < textord_words_pitchsd_threshold * initial_pitch
1719  ? 'F' : 'P');
1720 
1721  start_it = row_start;
1722  blob_it = row_start;
1723  word_sync =
1724  check_pitch_sync2 (&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
1725  projection, projection_left, projection_right,
1726  row->xheight * textord_projection_scale, occupation,
1727  &seg_list, 0, 0);
1728  if (occupation > 1)
1729  word_sync /= occupation;
1730  word_sync = sqrt (word_sync);
1731 
1732 #ifndef GRAPHICS_DISABLED
1733  if (textord_show_row_cuts && to_win != nullptr)
1734  plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
1735 #endif
1736  seg_list.clear ();
1737  if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
1738  if (word_sync < textord_words_def_fixed * initial_pitch
1739  && !row->all_caps)
1740  res2 = "DF";
1741  else
1742  res2 = "MF";
1743  }
1744  else
1745  res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
1746  tprintf
1747  ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1748  word_sync, word_sync / initial_pitch,
1749  word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
1750  occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
1751 }
1752 
1753 /**********************************************************************
1754  * find_repeated_chars
1755  *
1756  * Extract marked leader blobs and put them
1757  * into words in advance of fixed pitch checking and word generation.
1758  **********************************************************************/
1759 void find_repeated_chars(TO_BLOCK* block, // Block to search.
1760  bool testing_on) { // Debug mode.
1761  POLY_BLOCK* pb = block->block->pdblk.poly_block();
1762  if (pb != nullptr && !pb->IsText())
1763  return; // Don't find repeated chars in non-text blocks.
1764 
1765  TO_ROW *row;
1766  BLOBNBOX_IT box_it;
1767  BLOBNBOX_IT search_it; // forward search
1768  WERD *word; // new word
1769  TBOX word_box; // for plotting
1770  int blobcount, repeated_set;
1771 
1772  TO_ROW_IT row_it = block->get_rows();
1773  if (row_it.empty()) return; // empty block
1774  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1775  row = row_it.data();
1776  box_it.set_to_list(row->blob_list());
1777  if (box_it.empty()) continue; // no blobs in this row
1778  if (!row->rep_chars_marked()) {
1779  mark_repeated_chars(row);
1780  }
1781  if (row->num_repeated_sets() == 0) continue; // nothing to do for this row
1782  // new words
1783  WERD_IT word_it(&row->rep_words);
1784  do {
1785  if (box_it.data()->repeated_set() != 0 &&
1786  !box_it.data()->joined_to_prev()) {
1787  blobcount = 1;
1788  repeated_set = box_it.data()->repeated_set();
1789  search_it = box_it;
1790  search_it.forward();
1791  while (!search_it.at_first() &&
1792  search_it.data()->repeated_set() == repeated_set) {
1793  blobcount++;
1794  search_it.forward();
1795  }
1796  // After the call to make_real_word() all the blobs from this
1797  // repeated set will be removed from the blob list. box_it will be
1798  // set to point to the blob after the end of the extracted sequence.
1799  word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
1800  if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1801  tprintf("Bad box joined to prev at");
1802  box_it.data()->bounding_box().print();
1803  tprintf("After repeated word:");
1804  word->bounding_box().print();
1805  }
1806  ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1807  word->set_flag(W_REP_CHAR, true);
1808  word->set_flag(W_DONT_CHOP, true);
1809  word_it.add_after_then_move(word);
1810  } else {
1811  box_it.forward();
1812  }
1813  } while (!box_it.at_first());
1814  }
1815 }
1816 
1817 
1818 /**********************************************************************
1819  * plot_fp_word
1820  *
1821  * Plot a block of words as if fixed pitch.
1822  **********************************************************************/
1823 
1824 #ifndef GRAPHICS_DISABLED
1825 void plot_fp_word( //draw block of words
1826  TO_BLOCK *block, //block to draw
1827  float pitch, //pitch to draw with
1828  float nonspace //for space threshold
1829  ) {
1830  TO_ROW *row; //current row
1831  TO_ROW_IT row_it = block->get_rows ();
1832 
1833  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1834  row = row_it.data ();
1835  row->min_space = static_cast<int32_t>((pitch + nonspace) / 2);
1836  row->max_nonspace = row->min_space;
1837  row->space_threshold = row->min_space;
1838  plot_word_decisions (to_win, static_cast<int16_t>(pitch), row);
1839  }
1840 }
1841 #endif
TO_ROW::min_space
int32_t min_space
Definition: blobbox.h:662
STATS::get_total
int32_t get_total() const
Definition: statistc.h:83
ICOORD::set_x
void set_x(int16_t xin)
rewrite function
Definition: points.h:60
textord_fpiqr_ratio
double textord_fpiqr_ratio
Definition: tovars.cpp:78
TO_BLOCK::max_nonspace
int32_t max_nonspace
Definition: blobbox.h:792
textord_debug_block
int textord_debug_block
Definition: tovars.cpp:33
check_pitch_sync2
double check_pitch_sync2(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
Definition: pithsync.cpp:286
TO_ROW::rep_words
WERD_LIST rep_words
Definition: blobbox.h:667
textord_blocksall_prop
bool textord_blocksall_prop
Definition: tovars.cpp:28
pithsync.h
textord_balance_factor
double textord_balance_factor
Definition: topitch.cpp:53
TO_ROW::space_size
float space_size
Definition: blobbox.h:666
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:58
textord_projection_scale
double textord_projection_scale
Definition: topitch.cpp:51
TO_ROW::pr_nonsp
float pr_nonsp
Definition: blobbox.h:654
TO_BLOCK::kern_size
float kern_size
Definition: blobbox.h:789
textord_words_def_fixed
double textord_words_def_fixed
Definition: tovars.cpp:58
create_to_win
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:42
W_REP_CHAR
repeated character
Definition: werd.h:52
POLY_BLOCK::IsText
bool IsText() const
Definition: polyblk.h:62
TO_ROW::pr_space
float pr_space
Definition: blobbox.h:653
words_default_fixed_space
double words_default_fixed_space
Definition: tovars.cpp:70
W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:51
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
plot_word_decisions
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:239
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
compute_block_pitch
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
Definition: topitch.cpp:310
PITCH_DEF_PROP
Definition: blobbox.h:48
BLOCK_STATS_CLUSTERS
#define BLOCK_STATS_CLUSTERS
Definition: topitch.cpp:55
blobbox.h
plot_fp_cells2
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
Definition: drawtord.cpp:351
tune_row_pitch2
float tune_row_pitch2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
Definition: topitch.cpp:1248
print_pitch_sd
void print_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
Definition: topitch.cpp:1612
ICOORD
integer coordinate
Definition: points.h:30
textord_pitch_range
int textord_pitch_range
Definition: tovars.cpp:34
TBOX::print
void print() const
Definition: rect.h:277
textord_words_default_maxspace
double textord_words_default_maxspace
Definition: tovars.cpp:43
TO_ROW::projection_left
int16_t projection_left
Definition: blobbox.h:647
TBOX::top
int16_t top() const
Definition: rect.h:57
TO_ROW::projection_right
int16_t projection_right
Definition: blobbox.h:648
STATS::pile_count
int32_t pile_count(int32_t value) const
Definition: statistc.h:75
TO_BLOCK
Definition: blobbox.h:691
textord_show_fixed_cuts
bool textord_show_fixed_cuts
Definition: drawtord.cpp:32
PITCH_CORR_PROP
Definition: blobbox.h:51
plot_fp_word
void plot_fp_word(TO_BLOCK *block, float pitch, float nonspace)
Definition: topitch.cpp:1804
textord_dotmatrix_gap
int textord_dotmatrix_gap
Definition: tovars.cpp:32
STATS::smooth
void smooth(int32_t factor)
Definition: statistc.cpp:266
PITCH_DEF_FIXED
Definition: blobbox.h:46
plot_row_cells
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
Definition: drawtord.cpp:383
textord_words_pitchsd_threshold
double textord_words_pitchsd_threshold
Definition: tovars.cpp:56
words_default_prop_nonspace
double words_default_prop_nonspace
Definition: tovars.cpp:69
TO_ROW::pitch_decision
PITCH_TYPE pitch_decision
Definition: blobbox.h:649
try_rows_fixed
bool try_rows_fixed(TO_BLOCK *block, int32_t block_index, bool testing_on)
Definition: topitch.cpp:547
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
FCOORD
Definition: points.h:187
fixed_pitch_row
bool fixed_pitch_row(TO_ROW *row, BLOCK *block, int32_t block_index)
Definition: topitch.cpp:967
BLOBNBOX
Definition: blobbox.h:142
compute_pitch_sd
float compute_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
Definition: topitch.cpp:1359
textord_pitch_rowsimilarity
double textord_pitch_rowsimilarity
Definition: tovars.cpp:64
words_default_fixed_limit
double words_default_fixed_limit
Definition: tovars.cpp:71
textord_pitch_scalebigwords
bool textord_pitch_scalebigwords
Definition: tovars.cpp:66
check_pitch_sync
double check_pitch_sync(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
Definition: pitsync1.cpp:138
textord_blocksall_fixed
bool textord_blocksall_fixed
Definition: tovars.cpp:26
TO_ROW::fp_nonsp
float fp_nonsp
Definition: blobbox.h:652
textord_show_initial_words
bool textord_show_initial_words
Definition: tovars.cpp:22
textord_min_xheight
int textord_min_xheight
Definition: makerow.cpp:67
textord_wordstats_smooth_factor
double textord_wordstats_smooth_factor
Definition: tovars.cpp:36
textord_words_veto_power
int textord_words_veto_power
Definition: tovars.cpp:62
textord_debug_pitch_metric
bool textord_debug_pitch_metric
Definition: topitch.cpp:44
statistc.h
tovars.h
topitch.h
TO_ROW::rep_chars_marked
bool rep_chars_marked() const
Definition: blobbox.h:630
textord_fast_pitch_test
bool textord_fast_pitch_test
Definition: topitch.cpp:42
textord_show_page_cuts
bool textord_show_page_cuts
Definition: topitch.cpp:46
BLOCK
Definition: ocrblock.h:28
TO_ROW::used_dm_model
bool used_dm_model
Definition: blobbox.h:646
textord_show_row_cuts
bool textord_show_row_cuts
Definition: topitch.cpp:45
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
mark_repeated_chars
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2639
makerow.h
PITCH_DUNNO
Definition: blobbox.h:45
compute_rows_pitch
bool compute_rows_pitch(TO_BLOCK *block, int32_t block_index, bool testing_on)
Definition: topitch.cpp:352
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
try_doc_fixed
bool try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
Definition: topitch.cpp:395
PITCH_CORR_FIXED
Definition: blobbox.h:50
TO_BLOCK::block
BLOCK * block
Definition: blobbox.h:776
tune_row_pitch
float tune_row_pitch(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
Definition: topitch.cpp:1137
TO_ROW::projection
STATS projection
Definition: blobbox.h:670
WERD::set_flag
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:117
TO_BLOCK::fixed_pitch
float fixed_pitch
Definition: blobbox.h:788
words_initial_upper
double words_initial_upper
Definition: tovars.cpp:68
TO_ROW::fp_space
float fp_space
Definition: blobbox.h:651
print_block_counts
void print_block_counts(TO_BLOCK *block, int32_t block_index)
Definition: topitch.cpp:614
TO_BLOCK::xheight
float xheight
Definition: blobbox.h:787
TO_ROW::num_repeated_sets
int num_repeated_sets() const
Definition: blobbox.h:636
BLOBNBOX::joined_to_prev
bool joined_to_prev() const
Definition: blobbox.h:255
textord_spacesize_ratioprop
double textord_spacesize_ratioprop
Definition: tovars.cpp:77
TBOX::width
int16_t width() const
Definition: rect.h:114
textord_blockndoc_fixed
bool textord_blockndoc_fixed
Definition: topitch.cpp:50
STATS::plot
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:558
BOOL_VAR
#define BOOL_VAR(name, val, comment)
Definition: params.h:303
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
TO_ROW::fixed_pitch
float fixed_pitch
Definition: blobbox.h:650
TO_ROW::xheight
float xheight
Definition: blobbox.h:656
helpers.h
make_real_word
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
Definition: wordseg.cpp:578
count_pitch_stats
bool count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
Definition: topitch.cpp:1050
TO_BLOCK::space_size
float space_size
Definition: blobbox.h:790
double_VAR
#define double_VAR(name, val, comment)
Definition: params.h:309
TO_BLOCK::pitch_decision
PITCH_TYPE pitch_decision
Definition: blobbox.h:777
textord_words_maxspace
double textord_words_maxspace
Definition: tovars.cpp:41
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
textord_words_def_prop
double textord_words_def_prop
Definition: tovars.cpp:60
TO_BLOCK::pr_space
float pr_space
Definition: blobbox.h:795
drawtord.h
TO_BLOCK::pr_nonsp
float pr_nonsp
Definition: blobbox.h:796
PITCH_MAYBE_FIXED
Definition: blobbox.h:47
TO_BLOCK::min_space
int32_t min_space
Definition: blobbox.h:791
TO_BLOCK::get_rows
TO_ROW_LIST * get_rows()
Definition: blobbox.h:703
STATS::ile
double ile(double frac) const
Definition: statistc.cpp:156
TO_ROW::space_threshold
int32_t space_threshold
Definition: blobbox.h:664
words_initial_lower
double words_initial_lower
Definition: tovars.cpp:67
TO_ROW::max_nonspace
int32_t max_nonspace
Definition: blobbox.h:663
box_next
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:629
QSPLINE::y
double y(double x) const
Definition: quspline.cpp:202
ScrollView::GOLDENROD
Definition: scrollview.h:123
TO_ROW::intercept
float intercept() const
Definition: blobbox.h:588
textord_words_default_minspace
double textord_words_default_minspace
Definition: tovars.cpp:45
wordseg.h
WERD
Definition: werd.h:55
TBOX::left
int16_t left() const
Definition: rect.h:71
STATS::add
void add(int32_t value, int32_t count)
Definition: statistc.cpp:87
textord_words_min_minspace
double textord_words_min_minspace
Definition: tovars.cpp:46
fix_row_pitch
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
Definition: topitch.cpp:146
find_repeated_chars
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1739
TBOX::right
int16_t right() const
Definition: rect.h:78
textord_pitch_cheat
bool textord_pitch_cheat
Definition: topitch.cpp:48
compute_pitch_sd2
float compute_pitch_sd2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
Definition: topitch.cpp:1521
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
TO_ROW::char_cells
ICOORDELT_LIST char_cells
Definition: blobbox.h:668
POLY_BLOCK
Definition: polyblk.h:26
TO_ROW
Definition: blobbox.h:543
compute_fixed_pitch
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
Definition: topitch.cpp:78
ScrollView::Update
static void Update()
Definition: scrollview.cpp:708
TO_ROW::ascrise
float ascrise
Definition: blobbox.h:658
TO_ROW::kern_size
float kern_size
Definition: blobbox.h:665
textord_words_default_nonspace
double textord_words_default_nonspace
Definition: tovars.cpp:48
TO_ROW::all_caps
bool all_caps
Definition: blobbox.h:645
ScrollView::CORAL
Definition: scrollview.h:119
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
find_row_pitch
bool find_row_pitch(TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
Definition: topitch.cpp:828
TO_ROW::baseline
QSPLINE baseline
Definition: blobbox.h:669
PITCH_MAYBE_PROP
Definition: blobbox.h:49
textord_max_pitch_iqr
double textord_max_pitch_iqr
Definition: tovars.cpp:79
ICOORDELT
Definition: points.h:160
count_block_votes
void count_block_votes(TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
Definition: topitch.cpp:650
textord_debug_pitch_test
bool textord_debug_pitch_test
Definition: topitch.cpp:38
pitsync1.h
STATS::cluster
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
Definition: statistc.cpp:296
STATS::set_range
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
Definition: statistc.cpp:53
try_block_fixed
bool try_block_fixed(TO_BLOCK *block, int32_t block_index)
Definition: topitch.cpp:534
to_win
ScrollView * to_win
Definition: drawtord.cpp:34
row_pitch_stats
bool row_pitch_stats(TO_ROW *row, int32_t maxwidth, bool testing_on)
Definition: topitch.cpp:696
TO_ROW::compute_vertical_projection
void compute_vertical_projection()
Definition: blobbox.cpp:784
MAX_ALLOWED_PITCH
#define MAX_ALLOWED_PITCH
Definition: topitch.cpp:56
TO_ROW::blob_list
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:599
STATS::clear
void clear()
Definition: statistc.cpp:71
TBOX
Definition: rect.h:33