27 #include "config_auto.h"
30 #define MAXSPACING 128
44 inT16 block_space_gap_width;
46 inT16 block_non_space_gap_width;
47 BOOL8 old_text_ord_proportional;
50 block_it.set_to_list (blocks);
52 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
53 block_it.forward ()) {
54 block = block_it.data ();
55 gapmap =
new GAPMAP (block);
56 block_spacing_stats(block,
58 old_text_ord_proportional,
59 block_space_gap_width,
60 block_non_space_gap_width);
68 (
float) block_space_gap_width / block_non_space_gap_width < 3.0) {
69 block_non_space_gap_width = (
inT16) floor (block_space_gap_width / 3.0);
71 row_it.set_to_list (block->
get_rows ());
73 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
78 tprintf (
"Block %d Row %d: Now Proportional\n",
79 block_index, row_index);
80 row_spacing_stats(row,
84 block_space_gap_width,
85 block_non_space_gap_width);
90 (
"Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
94 #ifndef GRAPHICS_DISABLED
110 void Textord::block_spacing_stats(
113 BOOL8 &old_text_ord_proportional,
114 inT16 &block_space_gap_width,
115 inT16 &block_non_space_gap_width
128 inT16 centre_to_centre;
130 float real_space_threshold;
131 float iqr_centre_to_centre;
132 float iqr_all_gap_stats;
136 row_it.set_to_list (block->
get_rows ());
137 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
138 row = row_it.data ();
144 blob_it.mark_cycle_pt ();
145 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
149 blob_box = reduced_box_next (row, &blob_it);
152 row_length = end_of_row - blob_box.
left ();
153 if (blob_box.
width () < minwidth)
154 minwidth = blob_box.
width ();
155 prev_blob_box = blob_box;
156 while (!blob_it.cycled_list ()) {
160 blob_box = reduced_box_next (row, &blob_it);
163 if (blob_box.
width () < minwidth)
164 minwidth = blob_box.
width ();
165 gap_width = blob_box.
left () - prev_blob_box.
right ();
166 if (!ignore_big_gap (row, row_length, gapmap,
167 prev_blob_box.
right (), blob_box.
left ())) {
168 all_gap_stats.add (gap_width, 1);
170 centre_to_centre = (blob_box.
left () + blob_box.
right () -
171 (prev_blob_box.
left () +
172 prev_blob_box.
right ())) / 2;
174 centre_to_centre_stats.add (centre_to_centre, 1);
177 prev_blob_box = blob_box;
183 if (all_gap_stats.get_total () <= 1) {
184 block_non_space_gap_width = minwidth;
185 block_space_gap_width = -1;
187 old_text_ord_proportional =
TRUE;
191 iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -
192 centre_to_centre_stats.ile (0.25);
193 iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);
194 old_text_ord_proportional =
195 iqr_centre_to_centre * 2 > iqr_all_gap_stats;
207 block_non_space_gap_width = (
inT16) floor (all_gap_stats.median ());
210 row_it.set_to_list (block->
get_rows ());
211 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
212 row = row_it.data ();
217 real_space_threshold =
221 blob_it.mark_cycle_pt ();
223 blob_it.data_relative (-1)->bounding_box ().right ();
227 blob_box = reduced_box_next (row, &blob_it);
230 row_length = blob_box.
left () - end_of_row;
231 prev_blob_box = blob_box;
232 while (!blob_it.cycled_list ()) {
236 blob_box = reduced_box_next (row, &blob_it);
239 gap_width = blob_box.
left () - prev_blob_box.
right ();
240 if ((gap_width > real_space_threshold) &&
241 !ignore_big_gap (row, row_length, gapmap,
242 prev_blob_box.
right (),
257 || (!narrow_blob (row, prev_blob_box)
258 && !narrow_blob (row, blob_box))))
259 || (wide_blob (row, prev_blob_box)
260 && wide_blob (row, blob_box)))
261 space_gap_stats.add (gap_width, 1);
263 prev_blob_box = blob_box;
268 if (space_gap_stats.get_total () <= 2)
269 block_space_gap_width = -1;
271 block_space_gap_width =
272 MAX ((
inT16) floor (space_gap_stats.median ()),
273 3 * block_non_space_gap_width);
283 void Textord::row_spacing_stats(
288 inT16 block_space_gap_width,
289 inT16 block_non_space_gap_width
300 inT16 real_space_threshold = 0;
303 inT16 large_gap_count = 0;
304 BOOL8 suspected_table;
305 inT32 max_max_nonspace;
306 BOOL8 good_block_space_estimate = block_space_gap_width > 0;
308 inT32 row_length = 0;
310 inT32 sane_threshold;
314 if (!good_block_space_estimate)
315 block_space_gap_width =
inT16 (floor (row->
xheight / 2));
318 real_space_threshold =
319 block_non_space_gap_width +
322 block_non_space_gap_width)));
324 real_space_threshold =
325 (block_space_gap_width + block_non_space_gap_width) / 2;
327 blob_it.mark_cycle_pt ();
328 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
332 blob_box = reduced_box_next (row, &blob_it);
335 row_length = end_of_row - blob_box.
left ();
336 prev_blob_box = blob_box;
337 while (!blob_it.cycled_list ()) {
341 blob_box = reduced_box_next (row, &blob_it);
344 gap_width = blob_box.
left () - prev_blob_box.
right ();
345 if (ignore_big_gap (row, row_length, gapmap,
346 prev_blob_box.
right (), blob_box.
left ()))
349 if (gap_width >= real_space_threshold) {
354 || (!narrow_blob (row, prev_blob_box)
355 && !narrow_blob (row, blob_box))))
356 || (wide_blob (row, prev_blob_box)
357 && wide_blob (row, blob_box)))
358 cert_space_gap_stats.add (gap_width, 1);
359 all_space_gap_stats.add (gap_width, 1);
362 small_gap_stats.add (gap_width, 1);
363 all_gap_stats.add (gap_width, 1);
365 prev_blob_box = blob_box;
368 suspected_table = (large_gap_count > 1) ||
369 ((large_gap_count > 0) &&
374 if ((cert_space_gap_stats.get_total () >=
378 cert_space_gap_stats.get_total () > 0)) {
381 &cert_space_gap_stats,
383 block_space_gap_width,
384 block_non_space_gap_width);
387 !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
388 block_idx, row_idx)) {
390 tprintf (
"B:%d R:%d -- Inadequate certain spaces.\n",
396 row->
kern_size = all_gap_stats.median ();
398 row->
kern_size = block_non_space_gap_width;
406 &all_space_gap_stats,
408 block_space_gap_width,
409 block_non_space_gap_width);
414 improve_row_threshold(row, &all_gap_stats);
419 if (suspected_table &&
422 tprintf (
"B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f.\n",
436 if (good_block_space_estimate &&
438 sane_space = block_space_gap_width;
445 (
"B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
458 tprintf (
"B:%d R:%d -- DONT BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
467 if (suspected_table) {
470 sane_threshold =
inT32 (floor ((sane_space + row->
kern_size) / 2));
475 tprintf (
"B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
520 for (index = 0; index <= max_max_nonspace; index++) {
521 if (all_gap_stats.pile_count (index) > max)
522 max = all_gap_stats.pile_count (index);
524 (all_gap_stats.pile_count (index) < 0.1 * max)) {
568 (
"B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
569 block_idx, row_idx, row_length, block_non_space_gap_width,
570 block_space_gap_width, real_space_threshold, row->
kern_size,
574 tprintf(
"row->kern_size = %3.2f, row->space_size = %3.2f, "
575 "row->space_threshold = %d\n",
579 void Textord::old_to_method(
581 STATS *all_gap_stats,
582 STATS *space_gap_stats,
583 STATS *small_gap_stats,
584 inT16 block_space_gap_width,
585 inT16 block_non_space_gap_width
593 if (row->
space_size > block_space_gap_width * 1.5) {
595 row->
space_size = block_space_gap_width * 1.5;
600 if (row->
space_size < (block_non_space_gap_width * 2) + 1)
601 row->
space_size = (block_non_space_gap_width * 2) + 1;
604 else if (space_gap_stats->
get_total () >= 1) {
607 if (row->
space_size > block_space_gap_width * 1.5) {
609 row->
space_size = block_space_gap_width * 1.5;
614 if (row->
space_size < (block_non_space_gap_width * 3) + 1)
615 row->
space_size = (block_non_space_gap_width * 3) + 1;
629 row->
kern_size = block_non_space_gap_width;
672 STATS *all_gap_stats,
673 BOOL8 suspected_table,
677 float crude_threshold_estimate;
678 inT16 small_gaps_count;
691 kern_estimate = all_gap_stats->
median ();
694 small_gaps_count = stats_count_under (all_gap_stats,
696 ceil (crude_threshold_estimate));
701 (total - small_gaps_count < 1)) {
703 tprintf (
"B:%d R:%d -- Cant do isolated row stats.\n",
708 blob_it.mark_cycle_pt ();
709 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
713 blob_box = reduced_box_next (row, &blob_it);
716 row_length = end_of_row - blob_box.
left ();
717 prev_blob_box = blob_box;
718 while (!blob_it.cycled_list ()) {
722 blob_box = reduced_box_next (row, &blob_it);
725 gap_width = blob_box.
left () - prev_blob_box.
right ();
726 if (!ignore_big_gap (row, row_length, gapmap,
727 prev_blob_box.
right (), blob_box.
left ()) &&
728 (gap_width > crude_threshold_estimate)) {
732 (!narrow_blob (row, prev_blob_box) &&
733 !narrow_blob (row, blob_box)))) ||
734 (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
735 cert_space_gap_stats.add (gap_width, 1);
736 all_space_gap_stats.add (gap_width, 1);
738 if (gap_width < crude_threshold_estimate)
739 small_gap_stats.
add (gap_width, 1);
741 prev_blob_box = blob_box;
743 if (cert_space_gap_stats.get_total () >=
746 row->
space_size = cert_space_gap_stats.median ();
747 else if (suspected_table && (cert_space_gap_stats.get_total () > 0))
749 row->
space_size = cert_space_gap_stats.mean ();
751 else if (all_space_gap_stats.get_total () >=
754 row->
space_size = all_space_gap_stats.median ();
756 row->
space_size = all_space_gap_stats.mean ();
769 tprintf (
"B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
779 tprintf (
"B:%d R:%d -- Isolated row stats: %f %d %f\n",
789 for (index = 0; index < threshold; index++)
810 void Textord::improve_row_threshold(
TO_ROW *row,
STATS *all_gap_stats) {
813 inT16 reqd_zero_width = 0;
814 inT16 zero_width = 0;
815 inT16 zero_start = 0;
819 tprintf (
"Improve row threshold 0");
820 if ((all_gap_stats->
get_total () <= 25) ||
823 (stats_count_under (all_gap_stats,
824 (
inT16) ceil (kn + (sp - kn) / 3 + 0.5)) <
834 reqd_zero_width = (
inT16) floor ((sp - kn) / 3 + 0.5);
835 if (reqd_zero_width < 3)
838 for (index =
inT16 (ceil (kn)); index <
inT16 (floor (sp)); index++) {
845 if (zero_width >= reqd_zero_width)
854 tprintf (
" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
856 if ((zero_width < reqd_zero_width) ||
865 (
"Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
872 (
"Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
892 BOOL8 prev_fuzzy_non;
894 BOOL8 fuzzy_sp =
false;
895 BOOL8 fuzzy_non =
false;
900 C_OUTLINE_IT cout_it;
902 C_BLOB_IT cblob_it = &cblobs;
908 float repetition_spacing;
922 inT16 word_count = 0;
924 rep_char_it.set_to_list (&(row->
rep_words));
925 if (!rep_char_it.empty ()) {
926 next_rep_char_word_right =
927 rep_char_it.data ()->bounding_box ().right ();
931 cblob_it.set_to_list (&cblobs);
933 word_it.set_to_list (&words);
936 prev_fuzzy_sp =
FALSE;
937 prev_fuzzy_non =
FALSE;
938 if (!box_it.empty ()) {
939 xstarts[0] = box_it.data ()->bounding_box ().left ();
940 if (xstarts[0] > next_rep_char_word_right) {
942 word = rep_char_it.extract ();
943 word_it.add_after_then_move (word);
953 repetition_spacing = find_mean_blob_spacing (word);
954 current_gap = box_it.data ()->bounding_box ().left () -
955 next_rep_char_word_right;
956 current_within_xht_gap = current_gap;
965 tprintf (
"Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
966 box_it.data ()->bounding_box ().left (),
967 box_it.data ()->bounding_box ().bottom (),
968 repetition_spacing, current_gap);
969 prev_fuzzy_sp =
FALSE;
970 prev_fuzzy_non =
FALSE;
971 if (rep_char_it.empty ()) {
975 rep_char_it.forward ();
976 next_rep_char_word_right =
977 rep_char_it.data ()->bounding_box ().right ();
981 peek_at_next_gap(row,
985 next_within_xht_gap);
987 bblob = box_it.data ();
991 cout_it.set_to_list (cblob_it.data ()->out_list ());
992 cout_it.move_to_last ();
994 delete bblob->
cblob ();
998 cblob_it.add_after_then_move (bblob->
cblob ());
999 prev_x = blob_box.
right ();
1002 bblob = box_it.data ();
1007 prev_gap = current_gap;
1008 prev_within_xht_gap = current_within_xht_gap;
1009 prev_blob_box = next_blob_box;
1010 current_gap = next_gap;
1011 current_within_xht_gap = next_within_xht_gap;
1012 peek_at_next_gap(row,
1016 next_within_xht_gap);
1018 inT16 prev_gap_arg = prev_gap;
1019 inT16 next_gap_arg = next_gap;
1021 prev_gap_arg = prev_within_xht_gap;
1022 next_gap_arg = next_within_xht_gap;
1025 if (blob_box.
left () > next_rep_char_word_right ||
1026 make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1027 current_gap, current_within_xht_gap,
1028 next_blob_box, next_gap_arg,
1029 blanks, fuzzy_sp, fuzzy_non,
1030 prev_gap_was_a_space,
1031 break_at_next_gap) ||
1032 box_it.at_first()) {
1034 word =
new WERD (&cblobs, prev_blanks,
NULL);
1036 word_it.add_after_then_move (word);
1044 else if (prev_fuzzy_non)
1048 if (blob_box.
left () > next_rep_char_word_right) {
1050 word = rep_char_it.extract ();
1051 word_it.add_after_then_move (word);
1054 repetition_spacing = find_mean_blob_spacing (word);
1056 current_within_xht_gap = current_gap;
1067 (
"Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1070 repetition_spacing, current_gap, blanks);
1078 blob_box.
left () - next_rep_char_word_right;
1087 tprintf (
" Rgap:%d (%d blanks)\n",
1088 current_gap, blanks);
1092 if (rep_char_it.empty ()) {
1096 rep_char_it.forward ();
1097 next_rep_char_word_right =
1098 rep_char_it.data ()->bounding_box ().right ();
1102 if (box_it.at_first () && rep_char_it.empty ()) {
1105 xstarts[1] = prev_x;
1108 prev_blanks = blanks;
1109 prev_fuzzy_sp = fuzzy_sp;
1110 prev_fuzzy_non = fuzzy_non;
1115 while (!box_it.at_first ());
1118 while (!rep_char_it.empty ()) {
1119 word = rep_char_it.extract ();
1120 word_it.add_after_then_move (word);
1123 repetition_spacing = find_mean_blob_spacing (word);
1134 (
"Repch wd at EOL (%d,%d). rep spacing %d; Lgap:%d (%d blanks)\n",
1136 repetition_spacing, current_gap, blanks);
1142 if (rep_char_it.empty ()) {
1145 xstarts[1] = prev_x;
1148 rep_char_it.forward ();
1151 real_row =
new ROW (row,
1153 word_it.set_to_list (real_row->
word_list ());
1155 word_it.add_list_after (&words);
1159 tprintf (
"Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1183 C_OUTLINE_IT cout_it;
1185 C_BLOB_IT cblob_it = &cblobs;
1192 inT16 word_count = 0;
1194 cblob_it.set_to_list(&cblobs);
1196 word_it.set_to_list(&words);
1198 if (!box_it.empty()) {
1201 bblob = box_it.data();
1205 cout_it.set_to_list(cblob_it.data()->out_list());
1206 cout_it.move_to_last();
1208 delete bblob->
cblob();
1212 cblob_it.add_after_then_move(bblob->
cblob());
1215 bblob = box_it.data();
1221 word_it.add_after_then_move(word);
1226 if (box_it.at_first()) {
1231 while (!box_it.at_first());
1234 word_it.set_to_list(real_row->
word_list());
1236 word_it.add_list_after(&words);
1239 tprintf (
"Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1251 BOOL8 Textord::make_a_word_break(
1256 inT16 real_current_gap,
1257 inT16 within_xht_current_gap,
1263 BOOL8& prev_gap_was_a_space,
1264 BOOL8& break_at_next_gap) {
1267 float fuzzy_sp_to_kn_limit;
1269 if (break_at_next_gap) {
1270 break_at_next_gap =
FALSE;
1281 (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
1283 within_xht_current_gap = real_current_gap;
1286 current_gap = within_xht_current_gap;
1288 current_gap = real_current_gap;
1293 if (space && (current_gap <
MAX_INT16)) {
1294 if (current_gap < row->min_space) {
1319 prev_gap_was_a_space =
TRUE;
1340 (real_current_gap <= row->max_nonspace) &&
1344 #ifndef GRAPHICS_DISABLED
1345 mark_gap (blob_box, 20,
1346 prev_gap, prev_blob_box.
width (),
1347 current_gap, next_blob_box.
width (), next_gap);
1351 (real_current_gap <= row->space_threshold) &&
1358 #ifndef GRAPHICS_DISABLED
1359 mark_gap (blob_box, 21,
1360 prev_gap, prev_blob_box.
width (),
1361 current_gap, next_blob_box.
width (), next_gap);
1365 (real_current_gap < row->min_space) &&
1366 (within_xht_current_gap >= row->
min_space)) {
1368 #ifndef GRAPHICS_DISABLED
1369 mark_gap (blob_box, 22,
1370 prev_gap, prev_blob_box.
width (),
1371 current_gap, next_blob_box.
width (), next_gap);
1375 !suspected_punct_blob(row, prev_blob_box) &&
1376 suspected_punct_blob(row, blob_box)) {
1377 break_at_next_gap =
TRUE;
1380 else if ((current_gap < row->min_space) &&
1388 fuzzy_sp_to_kn_limit = 99999.0f;
1392 if ((prev_blob_box.
width () > 0) &&
1393 narrow_blob (row, prev_blob_box) &&
1394 prev_gap_was_a_space &&
1397 (current_gap > fuzzy_sp_to_kn_limit)) {
1405 #ifndef GRAPHICS_DISABLED
1406 mark_gap (blob_box, 1,
1407 prev_gap, prev_blob_box.
width (),
1408 current_gap, next_blob_box.
width (), next_gap);
1413 else if ((prev_blob_box.
width () > 0) &&
1414 narrow_blob (row, prev_blob_box) &&
1415 !prev_gap_was_a_space &&
1418 (current_gap > fuzzy_sp_to_kn_limit)) {
1426 #ifndef GRAPHICS_DISABLED
1427 mark_gap (blob_box, 2,
1428 prev_gap, prev_blob_box.
width (),
1429 current_gap, next_blob_box.
width (), next_gap);
1432 else if ((next_blob_box.
width () > 0) &&
1433 narrow_blob (row, next_blob_box) &&
1435 (current_gap <= tosp_gap_factor * next_gap)) {
1437 (current_gap > fuzzy_sp_to_kn_limit)) {
1445 #ifndef GRAPHICS_DISABLED
1446 mark_gap (blob_box, 3,
1447 prev_gap, prev_blob_box.
width (),
1448 current_gap, next_blob_box.
width (), next_gap);
1451 else if ((next_blob_box.
width () > 0) &&
1452 narrow_blob (row, next_blob_box) &&
1454 (current_gap * tosp_gap_factor <= next_gap)) {
1456 (current_gap > fuzzy_sp_to_kn_limit)) {
1464 #ifndef GRAPHICS_DISABLED
1465 mark_gap (blob_box, 4,
1466 prev_gap, prev_blob_box.
width (),
1467 current_gap, next_blob_box.
width (), next_gap);
1470 else if ((((next_blob_box.
width () > 0) &&
1471 narrow_blob (row, next_blob_box)) ||
1472 ((prev_blob_box.
width () > 0) &&
1473 narrow_blob (row, prev_blob_box)))) {
1475 #ifndef GRAPHICS_DISABLED
1476 mark_gap (blob_box, 6,
1477 prev_gap, prev_blob_box.
width (),
1478 current_gap, next_blob_box.
width (), next_gap);
1492 if ((prev_blob_box.
width () > 0) &&
1493 (next_blob_box.
width () > 0) &&
1496 wide_blob (row, prev_blob_box) &&
1497 wide_blob (row, next_blob_box)) {
1511 #ifndef GRAPHICS_DISABLED
1512 mark_gap (blob_box, 7,
1513 prev_gap, prev_blob_box.
width (),
1514 current_gap, next_blob_box.
width (), next_gap);
1516 }
else if (prev_blob_box.
width() > 0 &&
1517 next_blob_box.
width() > 0 &&
1521 !(narrow_blob(row, prev_blob_box) ||
1522 suspected_punct_blob(row, prev_blob_box)) &&
1523 !(narrow_blob(row, next_blob_box) ||
1524 suspected_punct_blob(row, next_blob_box))) {
1527 #ifndef GRAPHICS_DISABLED
1528 mark_gap (blob_box, 8,
1529 prev_gap, prev_blob_box.
width (),
1530 current_gap, next_blob_box.
width (), next_gap);
1534 (prev_blob_box.
width () > 0) &&
1535 (next_blob_box.
width () > 0) &&
1538 (!suspected_punct_blob (row, prev_blob_box) &&
1539 !suspected_punct_blob (row, next_blob_box)))) {
1542 #ifndef GRAPHICS_DISABLED
1543 mark_gap (blob_box, 9,
1544 prev_gap, prev_blob_box.
width (),
1545 current_gap, next_blob_box.
width (), next_gap);
1550 tprintf(
"word break = %d current_gap = %d, prev_gap = %d, "
1551 "next_gap = %d\n", space ? 1 : 0, current_gap,
1552 prev_gap, next_gap);
1553 prev_gap_was_a_space = space && !(fuzzy_non);
1561 (((
float) blob_box.
width () / blob_box.
height ()) <=
1571 (((
float) blob_box.
width () / blob_box.
height ()) >
1577 result = !narrow_blob (row, blob_box);
1584 float blob_x_centre;
1586 blob_x_centre = (box.
right () + box.
left ()) / 2.0;
1587 baseline = row->
baseline.
y (blob_x_centre);
1590 (box.
top () < baseline + row->
xheight / 2.0) ||
1596 void Textord::peek_at_next_gap(
TO_ROW *row,
1598 TBOX &next_blob_box,
1600 inT16 &next_within_xht_gap) {
1601 TBOX next_reduced_blob_box;
1603 BLOBNBOX_IT reduced_box_it = box_it;
1605 next_blob_box =
box_next (&box_it);
1606 next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
1607 if (box_it.at_first ()) {
1612 bit_beyond = box_it.data ()->bounding_box ();
1613 next_gap = bit_beyond.
left () - next_blob_box.
right ();
1614 bit_beyond = reduced_box_next (row, &reduced_box_it);
1615 next_within_xht_gap =
1616 bit_beyond.
left () - next_reduced_blob_box.
right ();
1621 #ifndef GRAPHICS_DISABLED
1622 void Textord::mark_gap(
1626 inT16 prev_blob_width,
1628 inT16 next_blob_width,
1684 blob.
left () - current_gap / 2.0f,
1689 tprintf (
" (%d,%d) Sp<->Kn Rule %d %d %d %d %d\n",
1690 blob.
left () - current_gap / 2, blob.
bottom (), rule,
1691 prev_gap, prev_blob_width, current_gap,
1692 next_blob_width, next_gap);
1696 float Textord::find_mean_blob_spacing(
WERD *word) {
1700 inT16 gap_count = 0;
1704 if (!cblob_it.empty ()) {
1705 cblob_it.mark_cycle_pt ();
1706 prev_right = cblob_it.data ()->bounding_box ().
right ();
1708 cblob_it.forward ();
1709 for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
1710 blob_box = cblob_it.data ()->bounding_box ();
1711 gap_sum += blob_box.
left () - prev_right;
1713 prev_right = blob_box.
right ();
1717 return (gap_sum / (
float) gap_count);
1728 inT16 gap = right - left + 1;
1737 if ((gap > 2.1 * row->
xheight) && (row_length > 20 * row->
xheight))
1739 if ((gap > 1.75 * row->
xheight) &&
1740 ((row_length > 35 * row->
xheight) ||
1762 TBOX Textord::reduced_box_next(
1770 inT16 left_above_xht;
1771 inT16 new_left_above_xht;
1785 reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
1794 reduced_box_for_blob(blob, row, &new_left_above_xht);
1795 left_above_xht =
MIN (left_above_xht, new_left_above_xht);
1801 if ((reduced_box.
width () > 0) &&
1803 < left_above_xht) && (reduced_box.
height () > 0.7 * row->
xheight)) {
1804 #ifndef GRAPHICS_DISABLED
1810 reduced_box = full_box;
1836 TBOX Textord::reduced_box_for_blob(
1839 inT16 *left_above_xht) {
1841 float blob_x_centre;
1850 blob_x_centre = (blob_box.
left () + blob_box.
right ()) / 2.0;
1851 baseline = row->
baseline.
y (blob_x_centre);
1860 static_cast<float>(
MAX_INT16), left_limit, junk);
1861 if (left_limit > junk)
1864 *left_above_xht = (
inT16) floor (left_limit);
1874 if (left_limit > junk)
1882 (baseline + row->
xheight), junk, right_limit);
1883 if (junk > right_limit)
1887 ICOORD ((
inT16) ceil (right_limit), blob_box.top ()));
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
void set_reduced_box(TBOX new_box)
double tosp_old_sp_kn_th_factor
double tosp_fuzzy_space_factor1
bool tosp_fuzzy_limit_all
double tosp_min_sane_kn_sp
const TBOX & reduced_box() const
bool tosp_stats_use_xht_gaps
bool tosp_only_use_xht_gaps
bool joined_to_prev() const
double tosp_pass_wide_fuzz_sp_to_context
bool tosp_use_pre_chopping
bool tosp_old_to_constrain_sp_kn
int tosp_enough_space_samples_for_median
bool tosp_row_use_cert_spaces1
void add(inT32 value, inT32 count)
bool tosp_flip_fuzz_sp_to_kn
double tosp_init_guess_kn_mult
bool tosp_only_small_gaps_for_kern
TBOX bounding_box() const
bool tosp_block_use_cert_spaces
double tosp_ignore_big_gaps
BOOL8 table_gap(inT16 left, inT16 right)
double tosp_table_kn_sp_ratio
bool tosp_only_use_prop_rows
BLOBNBOX_LIST * blob_list()
EXTERN double gapmap_big_gaps
double tosp_kern_gap_factor3
double tosp_ignore_very_big_gaps
double tosp_enough_small_gaps
void find_cblob_hlimits(C_BLOB *blob, float bottomy, float topy, float &xmin, float &xmax)
C_OUTLINE_LIST * out_list()
double tosp_threshold_bias1
double tosp_silly_kn_sp_gap
EXTERN ScrollView * to_win
double tosp_fuzzy_sp_fraction
double tosp_wide_fraction
bool tosp_narrow_blobs_not_cert
bool tosp_all_flips_fuzzy
TBOX bounding_box() const
void Ellipse(int x, int y, int width, int height)
bool tosp_flip_fuzz_kn_to_sp
EXTERN bool textord_show_initial_words
double tosp_fuzzy_kn_fraction
TBOX box_next(BLOBNBOX_IT *it)
bool tosp_force_wordbreak_on_punct
double tosp_table_fuzzy_kn_sp_ratio
bool tosp_row_use_cert_spaces
double tosp_init_guess_xht_mult
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
PITCH_TYPE pitch_decision
double tosp_fuzzy_space_factor
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
double tosp_large_kerning
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
double tosp_kern_gap_factor2
inT32 pile_count(inT32 value) const
double tosp_table_xht_sp_ratio
double tosp_fuzzy_space_factor2
bool tosp_rule_9_test_punct
const TBOX & bounding_box() const
double tosp_threshold_bias2
double tosp_wide_aspect_ratio
double tosp_narrow_fraction
double tosp_kern_gap_factor1
void set_flag(WERD_FLAGS mask, BOOL8 value)
double tosp_max_sane_kn_thresh
bool tosp_recovery_isolated_row_stats
double tosp_narrow_aspect_ratio
void set_blanks(uinT8 new_blanks)
double tosp_dont_fool_with_small_kerns
C_BLOB_LIST * cblob_list()
void recalc_bounding_box()
void plot(ScrollView *fd) const