33 #include "config_auto.h" 42 "Debug on fixed pitch test");
44 "Turn off dp fixed pitch algorithm");
46 "Do even faster pitch algorithm");
48 "Write full metric stuff");
52 "Use correct answer for fixed/prop");
54 "Attempt whole doc/block fixed pitch");
57 "Ding rate for unbalanced char cells");
59 #define FIXED_WIDTH_MULTIPLE 5 60 #define BLOCK_STATS_CLUSTERS 10 61 #define MAX_ALLOWED_PITCH 100 //max pixel pitch. 64 static int sort_floats(
const void *arg1,
const void *arg2) {
65 float diff = *
reinterpret_cast<const float*
>(arg1) -
66 *reinterpret_cast<const float*>(arg2);
69 }
else if (diff < 0) {
85 TO_BLOCK_LIST* port_blocks,
95 #ifndef GRAPHICS_DISABLED 102 block_it.set_to_list (port_blocks);
104 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
105 block_it.forward ()) {
106 block = block_it.data ();
113 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
114 block_it.forward ()) {
115 block = block_it.data ();
123 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
124 block_it.forward()) {
125 block = block_it.data ();
127 if (pb !=
nullptr && !pb->
IsText())
continue;
129 TO_ROW_IT row_it(block->
get_rows());
131 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
132 row = row_it.data ();
133 fix_row_pitch(row, block, port_blocks, row_index, block_index);
138 #ifndef GRAPHICS_DISABLED 155 TO_BLOCK_LIST *blocks,
157 int32_t block_target) {
165 TO_BLOCK_IT block_it = blocks;
172 block_votes = like_votes = other_votes = 0;
179 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
180 block_it.forward()) {
181 block = block_it.data();
183 if (pb !=
nullptr && !pb->
IsText())
continue;
185 TO_ROW_IT row_it(block->
get_rows());
186 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
188 row = row_it.data ();
202 if (block_index == block_target) {
257 else if (block_votes <= textord_words_veto_power && like_votes > 0) {
263 if (block_votes == 0 && like_votes == 0 && other_votes > 0
266 (
"Warning:row %d of block %d set prop with no like rows against trend\n",
267 row_target, block_target);
271 tprintf(
":b_votes=%d:l_votes=%d:o_votes=%d",
272 block_votes, like_votes, other_votes);
279 else if (block_votes == 0 && like_votes > 0)
283 (
"Warning:guessing pitch as xheight on row %d, block %d\n",
284 row_target, block_target);
326 tprintf (
"Block %d at (%d,%d)->(%d,%d)\n",
329 block_box.
right (), block_box.
top ());
340 if (!block->
get_rows ()->empty ()) {
343 #ifndef GRAPHICS_DISABLED 370 TO_ROW_IT row_it = block->
get_rows ();
373 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
374 row = row_it.data ();
381 row_index, testing_on)) {
407 TO_BLOCK_LIST* port_blocks,
417 TO_BLOCK_IT block_it = port_blocks;
420 int16_t projection_left;
421 int16_t projection_right;
424 ICOORDELT_LIST *master_cells;
437 if (block_it.empty ()
441 shift_factor = gradient / (gradient * gradient + 1);
443 TO_ROW_IT row_it(block_it.data ()->get_rows());
444 master_x = row_it.data ()->projection_left;
445 master_y = row_it.data ()->baseline.y (master_x);
446 projection_left = INT16_MAX;
447 projection_right = -INT16_MAX;
452 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
453 block_it.forward ()) {
454 block = block_it.data ();
455 row_it.set_to_list (block->
get_rows ());
456 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
457 row = row_it.data ();
465 shift_factor * (master_y - row_y));
468 shift_factor * (master_y - row_y));
469 if (row_left < projection_left)
470 projection_left = row_left;
471 if (row_right > projection_right)
472 projection_right = row_right;
477 projection.
set_range (projection_left, projection_right);
479 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
480 block_it.forward ()) {
481 block = block_it.data ();
482 row_it.set_to_list (block->
get_rows ());
483 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
484 row = row_it.data ();
488 shift_factor * (master_y - row_y));
496 row_it.set_to_list (block_it.data ()->get_rows ());
497 row = row_it.data ();
498 #ifndef GRAPHICS_DISABLED 503 final_pitch = pitches.
ile (0.5);
504 pitch = (int16_t) final_pitch;
506 tune_row_pitch (row, &projection, projection_left, projection_right,
507 pitch * 0.75, final_pitch, sp_sd, mid_cuts,
512 (
"try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
513 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
514 pitch_sd / total_row_count, pitch_sd / pitch,
515 pitch_sd / total_row_count / pitch);
517 #ifndef GRAPHICS_DISABLED 520 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
521 block_it.forward ()) {
522 block = block_it.data ();
523 row_it.set_to_list (block->
get_rows ());
524 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
526 row = row_it.data ();
528 row_shift = shift_factor * (master_y - row_y);
566 int32_t def_fixed = 0;
567 int32_t def_prop = 0;
568 int32_t maybe_fixed = 0;
569 int32_t maybe_prop = 0;
571 int32_t corr_fixed = 0;
572 int32_t corr_prop = 0;
574 TO_ROW_IT row_it = block->
get_rows ();
577 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
578 row = row_it.data ();
609 else if (def_fixed > 0 || def_prop > 0)
631 int32_t def_fixed = 0;
632 int32_t def_prop = 0;
633 int32_t maybe_fixed = 0;
634 int32_t maybe_prop = 0;
636 int32_t corr_fixed = 0;
637 int32_t corr_prop = 0;
647 tprintf (
"Block %d has (%d,%d,%d)",
648 block_index, def_fixed, maybe_fixed, corr_fixed);
651 tprintf (
" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
654 tprintf (
" prop, %d dunno\n", dunno);
668 int32_t &maybe_fixed,
674 TO_ROW_IT row_it = block->
get_rows ();
676 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
677 row = row_it.data ();
719 int32_t cluster_count;
721 int32_t smooth_factor;
728 STATS gap_stats (0, maxwidth);
734 if (!blob_it.empty ()) {
735 prev_x = blob_it.data ()->bounding_box ().right ();
737 while (!blob_it.at_first ()) {
738 blob = blob_it.data ();
741 if (blob_box.
left () - prev_x < maxwidth)
742 gap_stats.
add (blob_box.
left () - prev_x, 1);
743 prev_x = blob_box.
right ();
754 gap_stats.
smooth (smooth_factor);
756 prev_count = cluster_count;
757 cluster_count = gap_stats.
cluster (lower, upper,
762 if (cluster_count < 1) {
765 for (gap_index = 0; gap_index < cluster_count; gap_index++)
766 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
769 tprintf (
"cluster_count=%d:", cluster_count);
770 for (gap_index = 0; gap_index < cluster_count; gap_index++)
771 tprintf (
" %g(%d)", gaps[gap_index],
772 cluster_stats[gap_index + 1].get_total ());
775 qsort (gaps, cluster_count,
sizeof (
float), sort_floats);
780 for (gap_index = 0; gap_index < cluster_count
781 && gaps[gap_index] < lower; gap_index++);
782 if (gap_index == 0) {
784 tprintf (
"No clusters below nonspace threshold!!\n");
785 if (cluster_count > 1) {
795 row->
pr_nonsp = gaps[gap_index - 1];
796 while (gap_index < cluster_count && gaps[gap_index] < upper)
798 if (gap_index == cluster_count) {
800 tprintf (
"No clusters above nonspace threshold!!\n");
809 for (gap_index = 0; gap_index < cluster_count
810 && gaps[gap_index] < upper; gap_index++);
811 if (gap_index == 0) {
813 tprintf (
"No clusters below space threshold!!\n");
818 row->
fp_nonsp = gaps[gap_index - 1];
819 if (gap_index == cluster_count) {
821 tprintf (
"No clusters above space threshold!!\n");
829 (
"Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
863 STATS gap_stats (0, maxwidth);
865 STATS pitch_stats (0, maxwidth);
872 if (non_space > initial_pitch)
873 non_space = initial_pitch;
874 min_space = (initial_pitch + non_space) / 2;
877 initial_pitch, min_space,
TRUE,
FALSE, dm_gap)) {
879 dm_pitch_iqr = maxwidth * 2.0f;
880 dm_pitch = initial_pitch;
883 dm_gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
884 dm_pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
885 dm_pitch = pitch_stats.
ile (0.5);
888 pitch_stats.
clear ();
890 initial_pitch, min_space,
TRUE,
FALSE, 0)) {
892 pitch_iqr = maxwidth * 3.0f;
895 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
896 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
899 (
"First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
900 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.
ile (0.5));
901 initial_pitch = pitch_stats.
ile (0.5);
902 if (min_space > initial_pitch
904 initial_pitch, initial_pitch,
TRUE,
FALSE, 0)) {
905 min_space = initial_pitch;
906 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
907 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
910 (
"Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
911 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.
ile (0.5));
912 initial_pitch = pitch_stats.
ile (0.5);
916 tprintf(
"Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
917 block_index, row_index,
'X',
918 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
919 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ?
'D' :
920 (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ?
'S' :
'M'));
921 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
927 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
930 (
"Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
931 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
932 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
933 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
934 pitch = pitch_stats.
ile (0.5);
935 used_dm_model =
false;
940 (
"Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
941 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
942 gap_iqr = dm_gap_iqr;
943 pitch_iqr = dm_pitch_iqr;
945 used_dm_model =
true;
948 tprintf (
"rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
949 pitch_iqr, gap_iqr, pitch);
950 tprintf (
"p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
951 pitch_iqr / gap_iqr, pitch_iqr / block->
xheight,
988 const char *res_string;
1009 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1010 && ((pitsync_linear_version & 3) < 3
1011 || ((pitsync_linear_version & 3) >= 3 && (row->
used_dm_model 1013 || (pitch_sd == 0 && sp_sd > 10))))) {
1014 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1016 && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1021 else if ((pitsync_linear_version & 3) < 3
1025 if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1052 tprintf (
":sd/p=%g:occ=%g:init_res=%s\n",
1072 float initial_pitch,
1074 bool ignore_outsize,
1081 BLOBNBOX_IT blob_it = row->
blob_list ();
1083 int32_t prev_centre;
1086 int32_t width_units;
1091 gap_stats->
clear ();
1092 pitch_stats->
clear ();
1093 if (blob_it.empty ())
1098 joined_box = blob_it.data ()->bounding_box ();
1101 blob = blob_it.data ();
1104 if ((blob_box.
left () - joined_box.
right () < dm_gap
1105 && !blob_it.at_first ())
1106 || blob->
cblob() ==
nullptr)
1107 joined_box += blob_box;
1109 blob_width = joined_box.
width ();
1110 if (split_outsize) {
1112 (int32_t) floor ((
float) blob_width / initial_pitch + 0.5);
1113 if (width_units < 1)
1117 else if (ignore_outsize) {
1118 width = (float) blob_width / initial_pitch;
1124 x_centre = (int32_t) (joined_box.
left ()
1126 width_units * initial_pitch) / 2);
1127 if (prev_valid && width_units >= 0) {
1133 gap_stats->
add (joined_box.
left () - prev_right, 1);
1134 pitch_stats->
add (x_centre - prev_centre, 1);
1136 prev_centre = (int32_t) (x_centre + width_units * initial_pitch);
1137 prev_right = joined_box.
right ();
1138 prev_valid = blob_box.
left () - joined_box.
right () < min_space;
1139 prev_valid = prev_valid && width_units >= 0;
1140 joined_box = blob_box;
1144 while (!blob_it.at_first ());
1159 int16_t projection_left,
1160 int16_t projection_right,
1162 float& initial_pitch,
1164 int16_t& best_mid_cuts,
1165 ICOORDELT_LIST* best_cells,
1175 ICOORDELT_LIST test_cells;
1176 ICOORDELT_IT best_it;
1180 projection_right, space_size, initial_pitch,
1183 best_mid_cuts, best_cells, testing_on);
1185 best_sp_sd = initial_pitch;
1186 return initial_pitch;
1199 best_sd = initial_sd;
1200 best_pitch = initial_pitch;
1202 tprintf (
"tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1206 space_size, initial_pitch + pitch_delta, sp_sd,
1207 mid_cuts, &test_cells, testing_on);
1209 tprintf (
"testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1211 if (pitch_sd < best_sd) {
1213 best_mid_cuts = mid_cuts;
1215 best_pitch = initial_pitch + pitch_delta;
1216 best_cells->clear ();
1217 best_it.set_to_list (best_cells);
1218 best_it.add_list_after (&test_cells);
1221 test_cells.clear ();
1222 if (pitch_sd > initial_sd)
1228 space_size, initial_pitch - pitch_delta, sp_sd,
1229 mid_cuts, &test_cells, testing_on);
1231 tprintf (
"testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1233 if (pitch_sd < best_sd) {
1235 best_mid_cuts = mid_cuts;
1237 best_pitch = initial_pitch - pitch_delta;
1238 best_cells->clear ();
1239 best_it.set_to_list (best_cells);
1240 best_it.add_list_after (&test_cells);
1243 test_cells.clear ();
1244 if (pitch_sd > initial_sd)
1247 initial_pitch = best_pitch;
1271 int16_t projection_left,
1272 int16_t projection_right,
1274 float& initial_pitch,
1276 int16_t& best_mid_cuts,
1277 ICOORDELT_LIST* best_cells,
1290 best_sp_sd = initial_pitch;
1292 best_pitch =
static_cast<int>(initial_pitch);
1294 return initial_pitch;
1303 for (pixel = projection_left; pixel <= projection_right; pixel++) {
1307 (pixel - projection_left) % (best_pitch + pitch_delta),
1316 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1321 pitch_delta].pile_count (pixel);
1322 best_delta = pitch_delta;
1328 tprintf (
"tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1329 initial_pitch, best_delta, best_count);
1330 best_pitch += best_delta;
1331 initial_pitch = best_pitch;
1333 best_count += best_count;
1334 for (start = best_pixel - 2; start > best_pixel - best_pitch
1336 best_delta].pile_count (start % best_pitch) <= best_count;
1338 for (end = best_pixel + 2;
1339 end < best_pixel + best_pitch
1341 best_delta].pile_count (end % best_pitch) <= best_count;
1358 tprintf (
"tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
1383 int16_t projection_left,
1384 int16_t projection_right,
1386 float initial_pitch,
1389 ICOORDELT_LIST* row_cells,
1396 BLOBNBOX_IT blob_it = row->
blob_list ();
1397 BLOBNBOX_IT start_it;
1398 BLOBNBOX_IT plot_it;
1405 FPSEGPT_LIST seg_list;
1410 ICOORDELT_IT cell_it = row_cells;
1416 int32_t total_count;
1418 if ((pitsync_linear_version & 3) > 1) {
1420 projection_right, initial_pitch,
1421 occupation, mid_cuts, row_cells,
1422 testing_on, start, end);
1433 if (blob_it.empty ())
1434 return space_size * 10;
1435 #ifndef GRAPHICS_DISABLED 1436 if (testing_on &&
to_win !=
nullptr) {
1437 blob_box = blob_it.data ()->bounding_box ();
1445 blob_it.mark_cycle_pt ();
1447 for (; blob_count > 0; blob_count--)
1450 prev_box = blob_box;
1454 while (!blob_it.cycled_list ()
1455 && blob_box.
left () - prev_box.
right () < space_size);
1457 if (pitsync_linear_version & 3)
1460 projection, projection_left, projection_right,
1462 occupation, &seg_list, start, end);
1466 projection, &seg_list);
1468 tprintf (
"Word ending at (%d,%d), len=%d, sync rating=%g, ",
1469 prev_box.
right (), prev_box.
top (),
1470 seg_list.length () - 1, word_sync);
1471 seg_it.set_to_list (&seg_list);
1472 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1473 seg_it.forward ()) {
1474 if (seg_it.data ()->faked)
1476 tprintf (
"%d, ", seg_it.data ()->position ());
1484 #ifndef GRAPHICS_DISABLED 1488 seg_it.set_to_list (&seg_list);
1489 if (prev_right >= 0) {
1490 sp_var = seg_it.data ()->position () - prev_right;
1491 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1496 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1497 segpos = seg_it.data ()->position ();
1498 if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1500 while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1501 cell =
new ICOORDELT (cellpos + (int16_t) initial_pitch, 0);
1502 cell_it.add_after_then_move (cell);
1503 cellpos += (int16_t) initial_pitch;
1507 cell_it.add_after_then_move (cell);
1510 else if (segpos > cellpos - initial_pitch / 2) {
1511 cell = cell_it.data ();
1513 cell->
set_x ((cellpos + segpos) / 2);
1514 cellpos = cell->
x ();
1517 seg_it.move_to_last ();
1518 prev_right = seg_it.data ()->position ();
1520 scale_factor = (seg_list.length () - 2) / 2;
1521 if (scale_factor < 1)
1526 sqsum += word_sync * scale_factor;
1527 total_count += (seg_list.length () - 1) * scale_factor;
1530 while (!blob_it.cycled_list ());
1531 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1532 return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1546 int16_t projection_left,
1547 int16_t projection_right,
1548 float initial_pitch,
1549 int16_t& occupation,
1551 ICOORDELT_LIST* row_cells,
1557 BLOBNBOX_IT blob_it = row->
blob_list ();
1558 BLOBNBOX_IT plot_it;
1561 FPSEGPT_LIST seg_list;
1565 ICOORDELT_IT cell_it = row_cells;
1570 if (blob_it.empty ()) {
1572 return initial_pitch * 10;
1574 #ifndef GRAPHICS_DISABLED 1575 if (testing_on &&
to_win !=
nullptr) {
1581 blob_it.mark_cycle_pt ();
1587 while (!blob_it.cycled_list ());
1590 2, projection, projection_left,
1593 occupation, &seg_list, start, end);
1595 tprintf (
"Row ending at (%d,%d), len=%d, sync rating=%g, ",
1596 blob_box.
right (), blob_box.
top (),
1597 seg_list.length () - 1, word_sync);
1598 seg_it.set_to_list (&seg_list);
1599 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1600 if (seg_it.data ()->faked)
1602 tprintf (
"%d, ", seg_it.data ()->position ());
1610 #ifndef GRAPHICS_DISABLED 1614 seg_it.set_to_list (&seg_list);
1615 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1616 segpos = seg_it.data ()->position ();
1619 cell_it.add_after_then_move (cell);
1620 if (seg_it.at_last ())
1621 mid_cuts = seg_it.data ()->cheap_cuts ();
1624 return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
1638 int16_t projection_left,
1639 int16_t projection_right,
1647 BLOBNBOX_IT blob_it = row->
blob_list ();
1648 BLOBNBOX_IT start_it;
1649 BLOBNBOX_IT row_start;
1651 int16_t total_blob_count;
1657 FPSEGPT_LIST seg_list;
1665 if (blob_it.empty ())
1667 row_start = blob_it;
1668 total_blob_count = 0;
1675 blob_it = row_start;
1679 blob_it.mark_cycle_pt ();
1681 for (; blob_count > 0; blob_count--)
1684 prev_box = blob_box;
1688 while (!blob_it.cycled_list ()
1689 && blob_box.
left () - prev_box.
right () < space_size);
1692 projection, projection_left, projection_right,
1694 occupation, &seg_list, 0, 0);
1695 total_blob_count += blob_count;
1696 seg_it.set_to_list (&seg_list);
1697 if (prev_right >= 0) {
1698 sp_var = seg_it.data ()->position () - prev_right;
1699 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1704 seg_it.move_to_last ();
1705 prev_right = seg_it.data ()->position ();
1707 scale_factor = (seg_list.length () - 2) / 2;
1708 if (scale_factor < 1)
1713 sqsum += word_sync * scale_factor;
1714 total_count += (seg_list.length () - 1) * scale_factor;
1717 while (!blob_it.cycled_list ());
1718 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1719 word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1720 tprintf (
"new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1721 word_sync, word_sync / initial_pitch, sp_sd,
1725 start_it = row_start;
1726 blob_it = row_start;
1729 projection, projection_left, projection_right,
1733 word_sync /= occupation;
1734 word_sync = sqrt (word_sync);
1736 #ifndef GRAPHICS_DISABLED 1751 (
"row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1752 word_sync, word_sync / initial_pitch,
1766 if (pb !=
nullptr && !pb->
IsText())
1771 BLOBNBOX_IT search_it;
1774 int blobcount, repeated_set;
1776 TO_ROW_IT row_it = block->
get_rows();
1777 if (row_it.empty())
return;
1778 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1779 row = row_it.data();
1781 if (box_it.empty())
continue;
1789 if (box_it.data()->repeated_set() != 0 &&
1790 !box_it.data()->joined_to_prev()) {
1792 repeated_set = box_it.data()->repeated_set();
1794 search_it.forward();
1795 while (!search_it.at_first() &&
1796 search_it.data()->repeated_set() == repeated_set) {
1798 search_it.forward();
1804 if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1805 tprintf(
"Bad box joined to prev at");
1806 box_it.data()->bounding_box().print();
1807 tprintf(
"After repeated word:");
1810 ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1813 word_it.add_after_then_move(word);
1817 }
while (!box_it.at_first());
1828 #ifndef GRAPHICS_DISABLED 1835 TO_ROW_IT row_it = block->
get_rows ();
1837 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1838 row = row_it.data ();
1839 row->
min_space = (int32_t) ((pitch + nonspace) / 2);
float tune_row_pitch2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
bool compute_rows_pitch(TO_BLOCK *block, int32_t block_index, bool testing_on)
EXTERN double textord_pitch_rowsimilarity
bool row_pitch_stats(TO_ROW *row, int32_t maxwidth, bool testing_on)
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
EXTERN double words_default_fixed_limit
EXTERN bool textord_fast_pitch_test
int32_t pile_count(int32_t value) const
void print_block_counts(TO_BLOCK *block, int32_t block_index)
EXTERN double textord_words_min_minspace
void compute_vertical_projection()
#define BOOL_VAR(name, val, comment)
ICOORDELT_LIST char_cells
EXTERN bool textord_show_page_cuts
void set_x(int16_t xin)
rewrite function
#define double_VAR(name, val, comment)
int num_repeated_sets() const
TBOX bounding_box() const
EXTERN double textord_words_default_maxspace
bool try_block_fixed(TO_BLOCK *block, int32_t block_index)
EXTERN double words_default_fixed_space
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
void mark_repeated_chars(TO_ROW *row)
bool fixed_pitch_row(TO_ROW *row, BLOCK *block, int32_t block_index)
bool try_rows_fixed(TO_BLOCK *block, int32_t block_index, bool testing_on)
EXTERN double textord_max_pitch_iqr
double check_pitch_sync(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
void set_flag(WERD_FLAGS mask, bool value)
EXTERN double textord_balance_factor
EXTERN bool textord_debug_pitch_metric
TBOX box_next(BLOBNBOX_IT *it)
EXTERN double textord_words_default_nonspace
EXTERN double textord_projection_scale
EXTERN double textord_spacesize_ratioprop
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
#define MAX_ALLOWED_PITCH
void plot_fp_word(TO_BLOCK *block, float pitch, float nonspace)
bool find_row_pitch(TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
EXTERN bool textord_blocksall_fixed
float compute_pitch_sd2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
double check_pitch_sync2(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
EXTERN int textord_words_veto_power
EXTERN bool textord_blocksall_prop
EXTERN double words_initial_upper
EXTERN double textord_words_def_fixed
PITCH_TYPE pitch_decision
EXTERN bool textord_pitch_scalebigwords
int16_t x() const
access function
EXTERN int textord_pitch_range
bool rep_chars_marked() const
EXTERN double textord_words_maxspace
double ile(double frac) const
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
bool joined_to_prev() const
void smooth(int32_t factor)
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
EXTERN bool textord_disable_pitch_test
POLY_BLOCK * poly_block() const
EXTERN bool textord_show_fixed_cuts
EXTERN double words_default_prop_nonspace
DLLSYM void tprintf(const char *format,...)
EXTERN double textord_words_pitchsd_threshold
EXTERN double textord_wordstats_smooth_factor
EXTERN bool textord_all_prop
void add(int32_t value, int32_t count)
float tune_row_pitch(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
EXTERN ScrollView * to_win
EXTERN bool textord_show_initial_words
#define BLOCK_STATS_CLUSTERS
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
bool count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
EXTERN bool textord_show_row_cuts
void print_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
EXTERN bool textord_pitch_cheat
EXTERN double textord_fpiqr_ratio
EXTERN double textord_words_default_minspace
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
float compute_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
const TBOX & bounding_box() const
void count_block_votes(TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
EXTERN bool textord_blockndoc_fixed
EXTERN int textord_debug_block
ScrollView * create_to_win(ICOORD page_tr)
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
bool try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
EXTERN double words_initial_lower
EXTERN double textord_words_def_prop
EXTERN int textord_dotmatrix_gap
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
BLOBNBOX_LIST * blob_list()
int32_t get_total() const
EXTERN bool textord_debug_pitch_test
PITCH_TYPE pitch_decision