37 #include "config_auto.h"
44 "Debug on fixed pitch test");
46 "Turn off dp fixed pitch algorithm");
48 "Do even faster pitch algorithm");
50 "Write full metric stuff");
54 "Use correct answer for fixed/prop");
56 "Attempt whole doc/block fixed pitch");
59 "Ding rate for unbalanced char cells");
61 #define FIXED_WIDTH_MULTIPLE 5
62 #define BLOCK_STATS_CLUSTERS 10
63 #define MAX_ALLOWED_PITCH 100 //max pixel pitch.
74 TO_BLOCK_LIST *port_blocks,
85 #ifndef GRAPHICS_DISABLED
92 block_it.set_to_list (port_blocks);
94 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
95 block_it.forward ()) {
96 block = block_it.data ();
103 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
104 block_it.forward ()) {
105 block = block_it.data ();
113 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
114 block_it.forward()) {
115 block = block_it.data ();
118 row_it.set_to_list (block->
get_rows ());
120 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
121 row = row_it.data ();
122 fix_row_pitch(row, block, port_blocks, row_index, block_index);
127 #ifndef GRAPHICS_DISABLED
144 TO_BLOCK_LIST *blocks,
146 inT32 block_target) {
154 TO_BLOCK_IT block_it = blocks;
162 block_votes = like_votes = other_votes = 0;
169 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
170 block_it.forward()) {
171 block = block_it.data();
175 row_it.set_to_list (block->
get_rows ());
176 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
178 row = row_it.data ();
192 if (block_index == block_target) {
247 else if (block_votes <= textord_words_veto_power && like_votes > 0) {
253 if (block_votes == 0 && like_votes == 0 && other_votes > 0
256 (
"Warning:row %d of block %d set prop with no like rows against trend\n",
257 row_target, block_target);
261 tprintf(
":b_votes=%d:l_votes=%d:o_votes=%d",
262 block_votes, like_votes, other_votes);
269 else if (block_votes == 0 && like_votes > 0)
273 (
"Warning:guessing pitch as xheight on row %d, block %d\n",
274 row_target, block_target);
316 tprintf (
"Block %d at (%d,%d)->(%d,%d)\n",
319 block_box.
right (), block_box.
top ());
330 if (!block->
get_rows ()->empty ()) {
333 #ifndef GRAPHICS_DISABLED
360 TO_ROW_IT row_it = block->
get_rows ();
363 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
364 row = row_it.data ();
371 row_index, testing_on)) {
397 TO_BLOCK_LIST *port_blocks,
407 TO_BLOCK_IT block_it = port_blocks;
411 inT16 projection_left;
412 inT16 projection_right;
415 ICOORDELT_LIST *master_cells;
428 if (block_it.empty ()
432 shift_factor = gradient / (gradient * gradient + 1);
433 row_it.set_to_list (block_it.data ()->get_rows ());
434 master_x = row_it.data ()->projection_left;
435 master_y = row_it.data ()->baseline.y (master_x);
442 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
443 block_it.forward ()) {
444 block = block_it.data ();
445 row_it.set_to_list (block->
get_rows ());
446 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
447 row = row_it.data ();
455 shift_factor * (master_y - row_y));
458 shift_factor * (master_y - row_y));
459 if (row_left < projection_left)
460 projection_left = row_left;
461 if (row_right > projection_right)
462 projection_right = row_right;
467 projection.
set_range (projection_left, projection_right);
469 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
470 block_it.forward ()) {
471 block = block_it.data ();
472 row_it.set_to_list (block->
get_rows ());
473 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
474 row = row_it.data ();
478 shift_factor * (master_y - row_y));
486 row_it.set_to_list (block_it.data ()->get_rows ());
487 row = row_it.data ();
488 #ifndef GRAPHICS_DISABLED
493 final_pitch = pitches.
ile (0.5);
494 pitch = (
inT16) final_pitch;
496 tune_row_pitch (row, &projection, projection_left, projection_right,
497 pitch * 0.75, final_pitch, sp_sd, mid_cuts,
502 (
"try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
503 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
504 pitch_sd / total_row_count, pitch_sd / pitch,
505 pitch_sd / total_row_count / pitch);
507 #ifndef GRAPHICS_DISABLED
510 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
511 block_it.forward ()) {
512 block = block_it.data ();
513 row_it.set_to_list (block->
get_rows ());
514 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
516 row = row_it.data ();
518 row_shift = shift_factor * (master_y - row_y);
558 inT32 maybe_fixed = 0;
559 inT32 maybe_prop = 0;
561 inT32 corr_fixed = 0;
564 TO_ROW_IT row_it = block->
get_rows ();
567 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
568 row = row_it.data ();
597 else if (def_prop > def_fixed * textord_words_veto_power)
599 else if (def_fixed > 0 || def_prop > 0)
601 else if (maybe_fixed > maybe_prop * textord_words_veto_power)
603 else if (maybe_prop > maybe_fixed * textord_words_veto_power)
623 inT32 maybe_fixed = 0;
624 inT32 maybe_prop = 0;
626 inT32 corr_fixed = 0;
637 tprintf (
"Block %d has (%d,%d,%d)",
638 block_index, def_fixed, maybe_fixed, corr_fixed);
641 tprintf (
" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
644 tprintf (
" prop, %d dunno\n", dunno);
664 TO_ROW_IT row_it = block->
get_rows ();
666 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
667 row = row_it.data ();
718 STATS gap_stats (0, maxwidth);
724 if (!blob_it.empty ()) {
725 prev_x = blob_it.data ()->bounding_box ().right ();
727 while (!blob_it.at_first ()) {
728 blob = blob_it.data ();
731 if (blob_box.
left () - prev_x < maxwidth)
732 gap_stats.
add (blob_box.
left () - prev_x, 1);
733 prev_x = blob_box.
right ();
744 gap_stats.
smooth (smooth_factor);
746 prev_count = cluster_count;
747 cluster_count = gap_stats.
cluster (lower, upper,
752 if (cluster_count < 1) {
755 for (gap_index = 0; gap_index < cluster_count; gap_index++)
756 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
759 tprintf (
"cluster_count=%d:", cluster_count);
760 for (gap_index = 0; gap_index < cluster_count; gap_index++)
761 tprintf (
" %g(%d)", gaps[gap_index],
762 cluster_stats[gap_index + 1].get_total ());
765 qsort (gaps, cluster_count,
sizeof (
float),
sort_floats);
770 for (gap_index = 0; gap_index < cluster_count
771 && gaps[gap_index] < lower; gap_index++);
772 if (gap_index == 0) {
774 tprintf (
"No clusters below nonspace threshold!!\n");
775 if (cluster_count > 1) {
785 row->
pr_nonsp = gaps[gap_index - 1];
786 while (gap_index < cluster_count && gaps[gap_index] < upper)
788 if (gap_index == cluster_count) {
790 tprintf (
"No clusters above nonspace threshold!!\n");
799 for (gap_index = 0; gap_index < cluster_count
800 && gaps[gap_index] < upper; gap_index++);
801 if (gap_index == 0) {
803 tprintf (
"No clusters below space threshold!!\n");
808 row->
fp_nonsp = gaps[gap_index - 1];
809 if (gap_index == cluster_count) {
811 tprintf (
"No clusters above space threshold!!\n");
819 (
"Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
853 STATS gap_stats (0, maxwidth);
855 STATS pitch_stats (0, maxwidth);
862 if (non_space > initial_pitch)
863 non_space = initial_pitch;
864 min_space = (initial_pitch + non_space) / 2;
867 initial_pitch, min_space,
TRUE,
FALSE, dm_gap)) {
869 dm_pitch_iqr = maxwidth * 2.0f;
870 dm_pitch = initial_pitch;
873 dm_gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
874 dm_pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
875 dm_pitch = pitch_stats.
ile (0.5);
878 pitch_stats.
clear ();
880 initial_pitch, min_space,
TRUE,
FALSE, 0)) {
882 pitch_iqr = maxwidth * 3.0f;
885 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
886 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
889 (
"First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
890 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.
ile (0.5));
891 initial_pitch = pitch_stats.
ile (0.5);
892 if (min_space > initial_pitch
894 initial_pitch, initial_pitch,
TRUE,
FALSE, 0)) {
895 min_space = initial_pitch;
896 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
897 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
900 (
"Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
901 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.
ile (0.5));
902 initial_pitch = pitch_stats.
ile (0.5);
906 tprintf(
"Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
907 block_index, row_index,
'X',
908 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
909 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ?
'D' :
910 (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ?
'S' :
'M'));
911 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
917 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
920 (
"Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
921 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
922 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
923 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
924 pitch = pitch_stats.
ile (0.5);
925 used_dm_model =
FALSE;
930 (
"Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
931 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
932 gap_iqr = dm_gap_iqr;
933 pitch_iqr = dm_pitch_iqr;
935 used_dm_model =
TRUE;
938 tprintf (
"rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
939 pitch_iqr, gap_iqr, pitch);
940 tprintf (
"p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
941 pitch_iqr / gap_iqr, pitch_iqr / block->
xheight,
978 const char *res_string;
999 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1000 && ((pitsync_linear_version & 3) < 3
1001 || ((pitsync_linear_version & 3) >= 3 && (row->
used_dm_model
1003 || (pitch_sd == 0 && sp_sd > 10))))) {
1004 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1006 && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1011 else if ((pitsync_linear_version & 3) < 3
1015 if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1042 tprintf (
":sd/p=%g:occ=%g:init_res=%s\n",
1062 float initial_pitch,
1064 BOOL8 ignore_outsize,
1065 BOOL8 split_outsize,
1071 BLOBNBOX_IT blob_it = row->
blob_list ();
1081 gap_stats->
clear ();
1082 pitch_stats->
clear ();
1083 if (blob_it.empty ())
1088 joined_box = blob_it.data ()->bounding_box ();
1091 blob = blob_it.data ();
1094 if ((blob_box.
left () - joined_box.
right () < dm_gap
1095 && !blob_it.at_first ())
1097 joined_box += blob_box;
1099 blob_width = joined_box.
width ();
1100 if (split_outsize) {
1102 (
inT32) floor ((
float) blob_width / initial_pitch + 0.5);
1103 if (width_units < 1)
1107 else if (ignore_outsize) {
1108 width = (float) blob_width / initial_pitch;
1116 width_units * initial_pitch) / 2);
1117 if (prev_valid && width_units >= 0) {
1123 gap_stats->
add (joined_box.
left () - prev_right, 1);
1124 pitch_stats->
add (x_centre - prev_centre, 1);
1126 prev_centre = (
inT32) (x_centre + width_units * initial_pitch);
1127 prev_right = joined_box.
right ();
1128 prev_valid = blob_box.
left () - joined_box.
right () < min_space;
1129 prev_valid = prev_valid && width_units >= 0;
1130 joined_box = blob_box;
1134 while (!blob_it.at_first ());
1149 inT16 projection_left,
1150 inT16 projection_right,
1152 float &initial_pitch,
1154 inT16 &best_mid_cuts,
1155 ICOORDELT_LIST *best_cells,
1165 ICOORDELT_LIST test_cells;
1166 ICOORDELT_IT best_it;
1170 projection_right, space_size, initial_pitch,
1173 best_mid_cuts, best_cells, testing_on);
1175 best_sp_sd = initial_pitch;
1176 return initial_pitch;
1189 best_sd = initial_sd;
1190 best_pitch = initial_pitch;
1192 tprintf (
"tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1196 space_size, initial_pitch + pitch_delta, sp_sd,
1197 mid_cuts, &test_cells, testing_on);
1199 tprintf (
"testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1201 if (pitch_sd < best_sd) {
1203 best_mid_cuts = mid_cuts;
1205 best_pitch = initial_pitch + pitch_delta;
1206 best_cells->clear ();
1207 best_it.set_to_list (best_cells);
1208 best_it.add_list_after (&test_cells);
1211 test_cells.clear ();
1212 if (pitch_sd > initial_sd)
1218 space_size, initial_pitch - pitch_delta, sp_sd,
1219 mid_cuts, &test_cells, testing_on);
1221 tprintf (
"testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1223 if (pitch_sd < best_sd) {
1225 best_mid_cuts = mid_cuts;
1227 best_pitch = initial_pitch - pitch_delta;
1228 best_cells->clear ();
1229 best_it.set_to_list (best_cells);
1230 best_it.add_list_after (&test_cells);
1233 test_cells.clear ();
1234 if (pitch_sd > initial_sd)
1237 initial_pitch = best_pitch;
1261 inT16 projection_left,
1262 inT16 projection_right,
1264 float &initial_pitch,
1266 inT16 &best_mid_cuts,
1267 ICOORDELT_LIST *best_cells,
1281 best_sp_sd = initial_pitch;
1283 best_pitch =
static_cast<int>(initial_pitch);
1285 return initial_pitch;
1288 if (sum_proj ==
NULL)
1289 return initial_pitch;
1296 for (pixel = projection_left; pixel <= projection_right; pixel++) {
1300 (pixel - projection_left) % (best_pitch + pitch_delta),
1309 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1315 best_delta = pitch_delta;
1321 tprintf (
"tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1322 initial_pitch, best_delta, best_count);
1323 best_pitch += best_delta;
1324 initial_pitch = best_pitch;
1326 best_count += best_count;
1327 for (start = best_pixel - 2; start > best_pixel - best_pitch
1329 best_delta].
pile_count (start % best_pitch) <= best_count;
1331 for (end = best_pixel + 2;
1332 end < best_pixel + best_pitch
1334 best_delta].
pile_count (end % best_pitch) <= best_count;
1351 tprintf (
"tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
1378 inT16 projection_left,
1379 inT16 projection_right,
1381 float initial_pitch,
1384 ICOORDELT_LIST *row_cells,
1391 BLOBNBOX_IT blob_it = row->
blob_list ();
1392 BLOBNBOX_IT start_it;
1393 BLOBNBOX_IT plot_it;
1400 FPSEGPT_LIST seg_list;
1405 ICOORDELT_IT cell_it = row_cells;
1413 if ((pitsync_linear_version & 3) > 1) {
1415 projection_right, initial_pitch,
1416 occupation, mid_cuts, row_cells,
1417 testing_on, start, end);
1428 if (blob_it.empty ())
1429 return space_size * 10;
1430 #ifndef GRAPHICS_DISABLED
1432 blob_box = blob_it.data ()->bounding_box ();
1440 blob_it.mark_cycle_pt ();
1442 for (; blob_count > 0; blob_count--)
1445 prev_box = blob_box;
1449 while (!blob_it.cycled_list ()
1450 && blob_box.
left () - prev_box.
right () < space_size);
1452 if (pitsync_linear_version & 3)
1455 projection, projection_left, projection_right,
1457 occupation, &seg_list, start, end);
1461 projection, &seg_list);
1463 tprintf (
"Word ending at (%d,%d), len=%d, sync rating=%g, ",
1464 prev_box.
right (), prev_box.
top (),
1465 seg_list.length () - 1, word_sync);
1466 seg_it.set_to_list (&seg_list);
1467 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1468 seg_it.forward ()) {
1469 if (seg_it.data ()->faked)
1471 tprintf (
"%d, ", seg_it.data ()->position ());
1479 #ifndef GRAPHICS_DISABLED
1483 seg_it.set_to_list (&seg_list);
1484 if (prev_right >= 0) {
1485 sp_var = seg_it.data ()->position () - prev_right;
1486 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1491 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1492 segpos = seg_it.data ()->position ();
1493 if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1495 while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1497 cell_it.add_after_then_move (cell);
1498 cellpos += (
inT16) initial_pitch;
1502 cell_it.add_after_then_move (cell);
1505 else if (segpos > cellpos - initial_pitch / 2) {
1506 cell = cell_it.data ();
1508 cell->
set_x ((cellpos + segpos) / 2);
1509 cellpos = cell->
x ();
1512 seg_it.move_to_last ();
1513 prev_right = seg_it.data ()->position ();
1515 scale_factor = (seg_list.length () - 2) / 2;
1516 if (scale_factor < 1)
1521 sqsum += word_sync * scale_factor;
1522 total_count += (seg_list.length () - 1) * scale_factor;
1525 while (!blob_it.cycled_list ());
1526 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1527 return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1541 inT16 projection_left,
1542 inT16 projection_right,
1543 float initial_pitch,
1546 ICOORDELT_LIST *row_cells,
1552 BLOBNBOX_IT blob_it = row->
blob_list ();
1553 BLOBNBOX_IT plot_it;
1556 FPSEGPT_LIST seg_list;
1560 ICOORDELT_IT cell_it = row_cells;
1565 if (blob_it.empty ()) {
1567 return initial_pitch * 10;
1569 #ifndef GRAPHICS_DISABLED
1576 blob_it.mark_cycle_pt ();
1582 while (!blob_it.cycled_list ());
1585 2, projection, projection_left,
1588 occupation, &seg_list, start, end);
1590 tprintf (
"Row ending at (%d,%d), len=%d, sync rating=%g, ",
1591 blob_box.
right (), blob_box.
top (),
1592 seg_list.length () - 1, word_sync);
1593 seg_it.set_to_list (&seg_list);
1594 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1595 if (seg_it.data ()->faked)
1597 tprintf (
"%d, ", seg_it.data ()->position ());
1605 #ifndef GRAPHICS_DISABLED
1609 seg_it.set_to_list (&seg_list);
1610 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1611 segpos = seg_it.data ()->position ();
1614 cell_it.add_after_then_move (cell);
1615 if (seg_it.at_last ())
1616 mid_cuts = seg_it.data ()->cheap_cuts ();
1619 return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
1633 inT16 projection_left,
1634 inT16 projection_right,
1642 BLOBNBOX_IT blob_it = row->
blob_list ();
1643 BLOBNBOX_IT start_it;
1644 BLOBNBOX_IT row_start;
1646 inT16 total_blob_count;
1652 FPSEGPT_LIST seg_list;
1660 if (blob_it.empty ())
1662 row_start = blob_it;
1663 total_blob_count = 0;
1670 blob_it = row_start;
1674 blob_it.mark_cycle_pt ();
1676 for (; blob_count > 0; blob_count--)
1679 prev_box = blob_box;
1683 while (!blob_it.cycled_list ()
1684 && blob_box.
left () - prev_box.
right () < space_size);
1687 projection, projection_left, projection_right,
1689 occupation, &seg_list, 0, 0);
1690 total_blob_count += blob_count;
1691 seg_it.set_to_list (&seg_list);
1692 if (prev_right >= 0) {
1693 sp_var = seg_it.data ()->position () - prev_right;
1694 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1699 seg_it.move_to_last ();
1700 prev_right = seg_it.data ()->position ();
1702 scale_factor = (seg_list.length () - 2) / 2;
1703 if (scale_factor < 1)
1708 sqsum += word_sync * scale_factor;
1709 total_count += (seg_list.length () - 1) * scale_factor;
1712 while (!blob_it.cycled_list ());
1713 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1714 word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1715 tprintf (
"new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1716 word_sync, word_sync / initial_pitch, sp_sd,
1720 start_it = row_start;
1721 blob_it = row_start;
1724 projection, projection_left, projection_right,
1728 word_sync /= occupation;
1729 word_sync = sqrt (word_sync);
1731 #ifndef GRAPHICS_DISABLED
1746 (
"row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1747 word_sync, word_sync / initial_pitch,
1766 BLOBNBOX_IT search_it;
1770 int blobcount, repeated_set;
1772 TO_ROW_IT row_it = block->
get_rows();
1773 if (row_it.empty())
return;
1774 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1775 row = row_it.data();
1777 if (box_it.empty())
continue;
1784 if (box_it.data()->repeated_set() != 0 &&
1785 !box_it.data()->joined_to_prev()) {
1787 repeated_set = box_it.data()->repeated_set();
1789 search_it.forward();
1790 while (!search_it.at_first() &&
1791 search_it.data()->repeated_set() == repeated_set) {
1793 search_it.forward();
1799 if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1800 tprintf(
"Bad box joined to prev at");
1801 box_it.data()->bounding_box().print();
1802 tprintf(
"After repeated word:");
1805 ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1808 word_it.add_after_then_move(word);
1812 }
while (!box_it.at_first());
1823 #ifndef GRAPHICS_DISABLED
1830 TO_ROW_IT row_it = block->
get_rows ();
1832 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1833 row = row_it.data ();
void set_x(inT16 xin)
rewrite function
EXTERN double textord_pitch_rowsimilarity
EXTERN double textord_words_def_fixed
inT32 cluster(float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters)
EXTERN double words_initial_upper
EXTERN double textord_wordstats_smooth_factor
ScrollView * create_to_win(ICOORD page_tr)
PITCH_TYPE pitch_decision
WERD * make_real_word(BLOBNBOX_IT *box_it, inT32 blobcount, BOOL8 bol, uinT8 blanks)
#define BLOCK_STATS_CLUSTERS
#define double_VAR(name, val, comment)
EXTERN bool textord_pitch_scalebigwords
float tune_row_pitch2(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
EXTERN double words_default_prop_nonspace
bool joined_to_prev() const
EXTERN double textord_words_default_maxspace
EXTERN double textord_words_default_minspace
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, inT32 row_target, inT32 block_target)
EXTERN double textord_words_default_nonspace
int num_repeated_sets() const
BOOL8 row_pitch_stats(TO_ROW *row, inT32 maxwidth, BOOL8 testing_on)
void add(inT32 value, inT32 count)
#define BOOL_VAR(name, val, comment)
BOOL8 try_rows_fixed(TO_BLOCK *block, inT32 block_index, BOOL8 testing_on)
TBOX bounding_box() const
EXTERN int textord_pitch_range
EXTERN bool textord_show_row_cuts
BOOL8 try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
BLOBNBOX_LIST * blob_list()
EXTERN double textord_projection_scale
EXTERN bool textord_debug_pitch_metric
EXTERN bool textord_debug_pitch_test
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
EXTERN int textord_dotmatrix_gap
EXTERN double textord_fpiqr_ratio
int sort_floats(const void *arg1, const void *arg2)
EXTERN bool textord_blocksall_fixed
EXTERN double textord_max_pitch_iqr
double check_pitch_sync(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
EXTERN bool textord_pitch_cheat
EXTERN ScrollView * to_win
EXTERN bool textord_blockndoc_fixed
double ile(double frac) const
EXTERN double textord_spacesize_ratioprop
EXTERN double textord_words_maxspace
#define MAX_ALLOWED_PITCH
EXTERN bool textord_fast_pitch_test
EXTERN double textord_balance_factor
BOOL8 find_row_pitch(TO_ROW *row, inT32 maxwidth, inT32 dm_gap, TO_BLOCK *block, inT32 block_index, inT32 row_index, BOOL8 testing_on)
ICOORDELT_LIST char_cells
float compute_pitch_sd(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch, float &sp_sd, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
void plot_fp_word(TO_BLOCK *block, float pitch, float nonspace)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
EXTERN double textord_words_pitchsd_threshold
EXTERN bool textord_show_fixed_cuts
BOOL8 count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, BOOL8 ignore_outsize, BOOL8 split_outsize, inT32 dm_gap)
bool rep_chars_marked() const
EXTERN double textord_words_min_minspace
void compute_vertical_projection()
void smooth(inT32 factor)
double check_pitch_sync2(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, inT16 projection_left, inT16 projection_right, float projection_scale, inT16 &occupation_count, FPSEGPT_LIST *seg_list, inT16 start, inT16 end)
EXTERN bool textord_show_initial_words
float tune_row_pitch(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
TBOX box_next(BLOBNBOX_IT *it)
float compute_pitch_sd2(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float initial_pitch, inT16 &occupation, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
BOOL8 try_block_fixed(TO_BLOCK *block, inT32 block_index)
void print_pitch_sd(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch)
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, BOOL8 testing_on)
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
PITCH_TYPE pitch_decision
inT16 x() const
access function
void print_block_counts(TO_BLOCK *block, inT32 block_index)
void count_block_votes(TO_BLOCK *block, inT32 &def_fixed, inT32 &def_prop, inT32 &maybe_fixed, inT32 &maybe_prop, inT32 &corr_fixed, inT32 &corr_prop, inT32 &dunno)
EXTERN bool textord_all_prop
void find_repeated_chars(TO_BLOCK *block, BOOL8 testing_on)
EXTERN bool textord_disable_pitch_test
void mark_repeated_chars(TO_ROW *row)
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
BOOL8 compute_rows_pitch(TO_BLOCK *block, inT32 block_index, BOOL8 testing_on)
EXTERN double words_default_fixed_space
EXTERN int textord_words_veto_power
EXTERN bool textord_show_page_cuts
EXTERN bool textord_blocksall_prop
inT32 pile_count(inT32 value) const
EXTERN double words_default_fixed_limit
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, inT32 block_index, BOOL8 testing_on)
const TBOX & bounding_box() const
EXTERN int textord_debug_block
EXTERN double words_initial_lower
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
POLY_BLOCK * poly_block() const
void set_flag(WERD_FLAGS mask, BOOL8 value)
EXTERN double textord_words_def_prop
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
BOOL8 fixed_pitch_row(TO_ROW *row, BLOCK *block, inT32 block_index)