35 #include "config_auto.h" 41 #define MAXSPACING 128 54 int16_t block_space_gap_width;
56 int16_t block_non_space_gap_width;
57 bool old_text_ord_proportional;
59 block_it.set_to_list (blocks);
61 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
62 block_it.forward ()) {
63 block = block_it.data ();
64 std::unique_ptr<GAPMAP> gapmap(
new GAPMAP (block));
65 block_spacing_stats(block,
67 old_text_ord_proportional,
68 block_space_gap_width,
69 block_non_space_gap_width);
77 (
float) block_space_gap_width / block_non_space_gap_width < 3.0) {
78 block_non_space_gap_width = (int16_t) floor (block_space_gap_width / 3.0);
83 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
88 tprintf (
"Block %d Row %d: Now Proportional\n",
89 block_index, row_index);
90 row_spacing_stats(row,
94 block_space_gap_width,
95 block_non_space_gap_width);
100 (
"Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
104 #ifndef GRAPHICS_DISABLED 119 void Textord::block_spacing_stats(
122 bool& old_text_ord_proportional,
123 int16_t& block_space_gap_width,
124 int16_t& block_non_space_gap_width
136 int16_t centre_to_centre;
138 float real_space_threshold;
139 float iqr_centre_to_centre;
140 float iqr_all_gap_stats;
145 TO_ROW_IT row_it(block->
get_rows());
146 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
147 row = row_it.data ();
153 blob_it.mark_cycle_pt ();
154 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
158 blob_box = reduced_box_next (row, &blob_it);
161 row_length = end_of_row - blob_box.
left ();
162 if (blob_box.
width () < minwidth)
163 minwidth = blob_box.
width ();
164 prev_blob_box = blob_box;
165 while (!blob_it.cycled_list ()) {
169 blob_box = reduced_box_next (row, &blob_it);
172 if (blob_box.
width () < minwidth)
173 minwidth = blob_box.
width ();
174 int16_t left = prev_blob_box.
right();
175 int16_t right = blob_box.
left();
176 gap_width = right - left;
177 if (!ignore_big_gap(row, row_length, gapmap, left, right)) {
178 all_gap_stats.add (gap_width, 1);
180 centre_to_centre = (right + blob_box.
right () -
181 (prev_blob_box.
left () + left)) / 2;
183 centre_to_centre_stats.add (centre_to_centre, 1);
186 prev_blob_box = blob_box;
192 if (all_gap_stats.get_total () <= 1) {
193 block_non_space_gap_width = minwidth;
194 block_space_gap_width = -1;
196 old_text_ord_proportional =
true;
200 iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -
201 centre_to_centre_stats.ile (0.25);
202 iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);
203 old_text_ord_proportional =
204 iqr_centre_to_centre * 2 > iqr_all_gap_stats;
216 block_non_space_gap_width = (int16_t) floor (all_gap_stats.median ());
219 row_it.set_to_list (block->
get_rows ());
220 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
221 row = row_it.data ();
226 real_space_threshold =
230 blob_it.mark_cycle_pt ();
232 blob_it.data_relative (-1)->bounding_box ().right ();
236 blob_box = reduced_box_next (row, &blob_it);
239 row_length = blob_box.
left () - end_of_row;
240 prev_blob_box = blob_box;
241 while (!blob_it.cycled_list ()) {
245 blob_box = reduced_box_next (row, &blob_it);
248 int16_t left = prev_blob_box.
right();
249 int16_t right = blob_box.
left();
250 gap_width = right - left;
251 if ((gap_width > real_space_threshold) &&
252 !ignore_big_gap(row, row_length, gapmap, left, right)) {
266 || (!narrow_blob (row, prev_blob_box)
267 && !narrow_blob (row, blob_box))))
268 || (wide_blob (row, prev_blob_box)
269 && wide_blob (row, blob_box)))
270 space_gap_stats.add (gap_width, 1);
272 prev_blob_box = blob_box;
277 if (space_gap_stats.get_total () <= 2)
278 block_space_gap_width = -1;
280 block_space_gap_width =
281 std::max(static_cast<int16_t>(floor(space_gap_stats.median())),
282 static_cast<int16_t
>(3 * block_non_space_gap_width));
292 void Textord::row_spacing_stats(
297 int16_t block_space_gap_width,
298 int16_t block_non_space_gap_width
309 int16_t real_space_threshold = 0;
312 int16_t large_gap_count = 0;
313 bool suspected_table;
314 int32_t max_max_nonspace;
315 bool good_block_space_estimate = block_space_gap_width > 0;
317 int32_t row_length = 0;
319 int32_t sane_threshold;
323 if (!good_block_space_estimate)
324 block_space_gap_width = int16_t (floor (row->
xheight / 2));
327 real_space_threshold =
328 block_non_space_gap_width +
329 int16_t (floor (0.5 +
331 block_non_space_gap_width)));
333 real_space_threshold =
334 (block_space_gap_width + block_non_space_gap_width) / 2;
336 blob_it.mark_cycle_pt ();
337 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
341 blob_box = reduced_box_next (row, &blob_it);
344 row_length = end_of_row - blob_box.
left ();
345 prev_blob_box = blob_box;
346 while (!blob_it.cycled_list ()) {
350 blob_box = reduced_box_next (row, &blob_it);
353 int16_t left = prev_blob_box.
right();
354 int16_t right = blob_box.
left();
355 gap_width = right - left;
356 if (ignore_big_gap(row, row_length, gapmap, left, right)) {
359 if (gap_width >= real_space_threshold) {
364 || (!narrow_blob (row, prev_blob_box)
365 && !narrow_blob (row, blob_box))))
366 || (wide_blob (row, prev_blob_box)
367 && wide_blob (row, blob_box)))
368 cert_space_gap_stats.add (gap_width, 1);
369 all_space_gap_stats.add (gap_width, 1);
372 small_gap_stats.add (gap_width, 1);
373 all_gap_stats.add (gap_width, 1);
375 prev_blob_box = blob_box;
378 suspected_table = (large_gap_count > 1) ||
379 ((large_gap_count > 0) &&
384 if ((cert_space_gap_stats.get_total () >=
388 cert_space_gap_stats.get_total () > 0)) {
391 &cert_space_gap_stats,
393 block_space_gap_width,
394 block_non_space_gap_width);
397 !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
398 block_idx, row_idx)) {
400 tprintf (
"B:%d R:%d -- Inadequate certain spaces.\n",
406 row->
kern_size = all_gap_stats.median ();
408 row->
kern_size = block_non_space_gap_width;
416 &all_space_gap_stats,
418 block_space_gap_width,
419 block_non_space_gap_width);
424 improve_row_threshold(row, &all_gap_stats);
429 if (suspected_table &&
432 tprintf(
"B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx,
445 if (good_block_space_estimate &&
447 sane_space = block_space_gap_width;
453 tprintf(
"B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
466 tprintf(
"B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
474 if (suspected_table) {
477 sane_threshold = int32_t (floor ((sane_space + row->
kern_size) / 2));
482 tprintf (
"B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
527 for (index = 0; index <= max_max_nonspace; index++) {
528 if (all_gap_stats.pile_count (index) > max)
529 max = all_gap_stats.pile_count (index);
531 (all_gap_stats.pile_count (index) < 0.1 * max)) {
575 (
"B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
576 block_idx, row_idx, row_length, block_non_space_gap_width,
577 block_space_gap_width, real_space_threshold, row->
kern_size,
581 tprintf(
"row->kern_size = %3.2f, row->space_size = %3.2f, " 582 "row->space_threshold = %d\n",
586 void Textord::old_to_method(
588 STATS *all_gap_stats,
589 STATS *space_gap_stats,
590 STATS *small_gap_stats,
591 int16_t block_space_gap_width,
592 int16_t block_non_space_gap_width
600 if (row->
space_size > block_space_gap_width * 1.5) {
602 row->
space_size = block_space_gap_width * 1.5;
607 if (row->
space_size < (block_non_space_gap_width * 2) + 1)
608 row->
space_size = (block_non_space_gap_width * 2) + 1;
611 else if (space_gap_stats->
get_total () >= 1) {
614 if (row->
space_size > block_space_gap_width * 1.5) {
616 row->
space_size = block_space_gap_width * 1.5;
621 if (row->
space_size < (block_non_space_gap_width * 3) + 1)
622 row->
space_size = (block_non_space_gap_width * 3) + 1;
636 row->
kern_size = block_non_space_gap_width;
677 bool Textord::isolated_row_stats(
TO_ROW* row,
679 STATS* all_gap_stats,
680 bool suspected_table,
684 float crude_threshold_estimate;
685 int16_t small_gaps_count;
698 kern_estimate = all_gap_stats->
median ();
701 small_gaps_count = stats_count_under (all_gap_stats,
703 ceil (crude_threshold_estimate));
708 (total - small_gaps_count < 1)) {
710 tprintf(
"B:%d R:%d -- Can't do isolated row stats.\n", block_idx,
715 blob_it.mark_cycle_pt ();
716 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
720 blob_box = reduced_box_next (row, &blob_it);
723 row_length = end_of_row - blob_box.
left ();
724 prev_blob_box = blob_box;
725 while (!blob_it.cycled_list ()) {
729 blob_box = reduced_box_next (row, &blob_it);
732 int16_t left = prev_blob_box.
right();
733 int16_t right = blob_box.
left();
734 gap_width = right - left;
735 if (!ignore_big_gap(row, row_length, gapmap, left, right) &&
736 (gap_width > crude_threshold_estimate)) {
740 (!narrow_blob (row, prev_blob_box) &&
741 !narrow_blob (row, blob_box)))) ||
742 (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
743 cert_space_gap_stats.add (gap_width, 1);
744 all_space_gap_stats.add (gap_width, 1);
746 if (gap_width < crude_threshold_estimate)
747 small_gap_stats.
add (gap_width, 1);
749 prev_blob_box = blob_box;
751 if (cert_space_gap_stats.get_total () >=
754 row->
space_size = cert_space_gap_stats.median ();
755 else if (suspected_table && (cert_space_gap_stats.get_total () > 0))
757 row->
space_size = cert_space_gap_stats.mean ();
759 else if (all_space_gap_stats.get_total () >=
762 row->
space_size = all_space_gap_stats.median ();
764 row->
space_size = all_space_gap_stats.mean ();
777 tprintf (
"B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
787 tprintf (
"B:%d R:%d -- Isolated row stats: %f %d %f\n",
793 int16_t Textord::stats_count_under(
STATS *stats, int16_t threshold) {
797 for (index = 0; index < threshold; index++)
818 void Textord::improve_row_threshold(
TO_ROW *row,
STATS *all_gap_stats) {
821 int16_t reqd_zero_width = 0;
822 int16_t zero_width = 0;
823 int16_t zero_start = 0;
827 tprintf (
"Improve row threshold 0");
828 if ((all_gap_stats->
get_total () <= 25) ||
831 (stats_count_under (all_gap_stats,
832 (int16_t) ceil (kn + (sp - kn) / 3 + 0.5)) <
842 reqd_zero_width = (int16_t) floor ((sp - kn) / 3 + 0.5);
843 if (reqd_zero_width < 3)
846 for (index = int16_t (ceil (kn)); index < int16_t (floor (sp)); index++) {
853 if (zero_width >= reqd_zero_width)
862 tprintf (
" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
864 if ((zero_width < reqd_zero_width) ||
873 (
"Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
880 (
"Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
902 bool fuzzy_sp =
false;
903 bool fuzzy_non =
false;
905 bool prev_gap_was_a_space =
false;
906 bool break_at_next_gap =
false;
908 C_OUTLINE_IT cout_it;
910 C_BLOB_IT cblob_it = &cblobs;
913 int32_t next_rep_char_word_right = INT32_MAX;
914 float repetition_spacing;
922 int16_t prev_gap = INT16_MAX;
923 int16_t current_gap = INT16_MAX;
924 int16_t next_gap = INT16_MAX;
925 int16_t prev_within_xht_gap = INT16_MAX;
926 int16_t current_within_xht_gap = INT16_MAX;
927 int16_t next_within_xht_gap = INT16_MAX;
928 int16_t word_count = 0;
932 if (!rep_char_it.empty ()) {
933 next_rep_char_word_right =
934 rep_char_it.data ()->bounding_box ().right ();
938 cblob_it.set_to_list (&cblobs);
941 WERD_IT word_it(&words);
944 prev_fuzzy_sp =
false;
945 prev_fuzzy_non =
false;
946 if (!box_it.empty ()) {
947 xstarts[0] = box_it.data ()->bounding_box ().left ();
948 if (xstarts[0] > next_rep_char_word_right) {
950 word = rep_char_it.extract ();
951 word_it.add_after_then_move (word);
961 repetition_spacing = find_mean_blob_spacing (word);
962 current_gap = box_it.data ()->bounding_box ().left () -
963 next_rep_char_word_right;
964 current_within_xht_gap = current_gap;
966 prev_blanks = (uint8_t) floor (current_gap / row->
space_size);
973 tprintf (
"Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
974 box_it.data ()->bounding_box ().left (),
975 box_it.data ()->bounding_box ().bottom (),
976 repetition_spacing, current_gap);
977 prev_fuzzy_sp =
false;
978 prev_fuzzy_non =
false;
979 if (rep_char_it.empty ()) {
980 next_rep_char_word_right = INT32_MAX;
983 rep_char_it.forward ();
984 next_rep_char_word_right =
985 rep_char_it.data ()->bounding_box ().right ();
989 peek_at_next_gap(row,
993 next_within_xht_gap);
995 bblob = box_it.data ();
998 if (bblob->
cblob () !=
nullptr) {
999 cout_it.set_to_list (cblob_it.data ()->out_list ());
1000 cout_it.move_to_last ();
1002 delete bblob->
cblob ();
1005 if (bblob->
cblob() !=
nullptr)
1006 cblob_it.add_after_then_move (bblob->
cblob ());
1007 prev_x = blob_box.
right ();
1010 bblob = box_it.data ();
1015 prev_gap = current_gap;
1016 prev_within_xht_gap = current_within_xht_gap;
1017 prev_blob_box = next_blob_box;
1018 current_gap = next_gap;
1019 current_within_xht_gap = next_within_xht_gap;
1020 peek_at_next_gap(row,
1024 next_within_xht_gap);
1026 int16_t prev_gap_arg = prev_gap;
1027 int16_t next_gap_arg = next_gap;
1029 prev_gap_arg = prev_within_xht_gap;
1030 next_gap_arg = next_within_xht_gap;
1033 if (blob_box.
left () > next_rep_char_word_right ||
1034 make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1035 current_gap, current_within_xht_gap,
1036 next_blob_box, next_gap_arg,
1037 blanks, fuzzy_sp, fuzzy_non,
1038 prev_gap_was_a_space,
1039 break_at_next_gap) ||
1040 box_it.at_first()) {
1042 word =
new WERD (&cblobs, prev_blanks,
nullptr);
1044 word_it.add_after_then_move (word);
1052 else if (prev_fuzzy_non)
1056 if (blob_box.
left () > next_rep_char_word_right) {
1058 word = rep_char_it.extract ();
1059 word_it.add_after_then_move (word);
1062 repetition_spacing = find_mean_blob_spacing (word);
1064 current_within_xht_gap = current_gap;
1067 (uint8_t) floor (current_gap / row->
space_size);
1075 (
"Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1078 repetition_spacing, current_gap, blanks);
1086 blob_box.
left () - next_rep_char_word_right;
1088 blanks = (uint8_t) (current_gap / row->
space_size);
1095 tprintf (
" Rgap:%d (%d blanks)\n",
1096 current_gap, blanks);
1100 if (rep_char_it.empty ()) {
1101 next_rep_char_word_right = INT32_MAX;
1104 rep_char_it.forward ();
1105 next_rep_char_word_right =
1106 rep_char_it.data ()->bounding_box ().right ();
1110 if (box_it.at_first () && rep_char_it.empty ()) {
1113 xstarts[1] = prev_x;
1116 prev_blanks = blanks;
1117 prev_fuzzy_sp = fuzzy_sp;
1118 prev_fuzzy_non = fuzzy_non;
1123 while (!box_it.at_first ());
1126 while (!rep_char_it.empty ()) {
1127 word = rep_char_it.extract ();
1128 word_it.add_after_then_move (word);
1131 repetition_spacing = find_mean_blob_spacing (word);
1134 blanks = (uint8_t) floor (current_gap / row->
space_size);
1142 "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1144 repetition_spacing, current_gap, blanks);
1150 if (rep_char_it.empty ()) {
1153 xstarts[1] = prev_x;
1156 rep_char_it.forward ();
1159 real_row =
new ROW (row,
1161 word_it.set_to_list (real_row->
word_list ());
1163 word_it.add_list_after (&words);
1167 tprintf (
"Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1191 C_OUTLINE_IT cout_it;
1193 C_BLOB_IT cblob_it = &cblobs;
1199 int16_t word_count = 0;
1201 cblob_it.set_to_list(&cblobs);
1204 WERD_IT word_it(&words);
1206 if (!box_it.empty()) {
1209 bblob = box_it.data();
1212 if (bblob->
cblob() !=
nullptr) {
1213 cout_it.set_to_list(cblob_it.data()->out_list());
1214 cout_it.move_to_last();
1216 delete bblob->
cblob();
1219 if (bblob->
cblob() !=
nullptr)
1220 cblob_it.add_after_then_move(bblob->
cblob());
1223 bblob = box_it.data();
1227 word =
new WERD(&cblobs, 1,
nullptr);
1229 word_it.add_after_then_move(word);
1234 if (box_it.at_first()) {
1239 while (!box_it.at_first());
1242 word_it.set_to_list(real_row->
word_list());
1244 word_it.add_list_after(&words);
1247 tprintf (
"Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1259 bool Textord::make_a_word_break(
1264 int16_t real_current_gap,
1265 int16_t within_xht_current_gap,
1271 bool& prev_gap_was_a_space,
1272 bool& break_at_next_gap) {
1274 int16_t current_gap;
1275 float fuzzy_sp_to_kn_limit;
1277 if (break_at_next_gap) {
1278 break_at_next_gap =
false;
1289 (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
1291 within_xht_current_gap = real_current_gap;
1294 current_gap = within_xht_current_gap;
1296 current_gap = real_current_gap;
1301 if (space && (current_gap < INT16_MAX)) {
1302 if (current_gap < row->min_space) {
1315 blanks = (uint8_t) (current_gap / row->
space_size);
1327 prev_gap_was_a_space =
true;
1337 int num_blanks = current_gap;
1340 blanks =
static_cast<uint8_t
>(ClipToRange<int>(num_blanks, 1, UINT8_MAX));
1349 (real_current_gap <= row->max_nonspace) &&
1353 #ifndef GRAPHICS_DISABLED 1354 mark_gap (blob_box, 20,
1355 prev_gap, prev_blob_box.
width (),
1356 current_gap, next_blob_box.
width (), next_gap);
1360 (real_current_gap <= row->space_threshold) &&
1367 #ifndef GRAPHICS_DISABLED 1368 mark_gap (blob_box, 21,
1369 prev_gap, prev_blob_box.
width (),
1370 current_gap, next_blob_box.
width (), next_gap);
1374 (real_current_gap < row->min_space) &&
1375 (within_xht_current_gap >= row->
min_space)) {
1377 #ifndef GRAPHICS_DISABLED 1378 mark_gap (blob_box, 22,
1379 prev_gap, prev_blob_box.
width (),
1380 current_gap, next_blob_box.
width (), next_gap);
1384 !suspected_punct_blob(row, prev_blob_box) &&
1385 suspected_punct_blob(row, blob_box)) {
1386 break_at_next_gap =
true;
1389 else if ((current_gap < row->min_space) &&
1397 fuzzy_sp_to_kn_limit = 99999.0f;
1401 if ((prev_blob_box.
width () > 0) &&
1402 narrow_blob (row, prev_blob_box) &&
1403 prev_gap_was_a_space &&
1406 (current_gap > fuzzy_sp_to_kn_limit)) {
1414 #ifndef GRAPHICS_DISABLED 1415 mark_gap (blob_box, 1,
1416 prev_gap, prev_blob_box.
width (),
1417 current_gap, next_blob_box.
width (), next_gap);
1422 else if ((prev_blob_box.
width () > 0) &&
1423 narrow_blob (row, prev_blob_box) &&
1424 !prev_gap_was_a_space &&
1427 (current_gap > fuzzy_sp_to_kn_limit)) {
1435 #ifndef GRAPHICS_DISABLED 1436 mark_gap (blob_box, 2,
1437 prev_gap, prev_blob_box.
width (),
1438 current_gap, next_blob_box.
width (), next_gap);
1441 else if ((next_blob_box.
width () > 0) &&
1442 narrow_blob (row, next_blob_box) &&
1446 (current_gap > fuzzy_sp_to_kn_limit)) {
1454 #ifndef GRAPHICS_DISABLED 1455 mark_gap (blob_box, 3,
1456 prev_gap, prev_blob_box.
width (),
1457 current_gap, next_blob_box.
width (), next_gap);
1460 else if ((next_blob_box.
width () > 0) &&
1461 narrow_blob (row, next_blob_box) &&
1465 (current_gap > fuzzy_sp_to_kn_limit)) {
1473 #ifndef GRAPHICS_DISABLED 1474 mark_gap (blob_box, 4,
1475 prev_gap, prev_blob_box.
width (),
1476 current_gap, next_blob_box.
width (), next_gap);
1479 else if ((((next_blob_box.
width () > 0) &&
1480 narrow_blob (row, next_blob_box)) ||
1481 ((prev_blob_box.
width () > 0) &&
1482 narrow_blob (row, prev_blob_box)))) {
1484 #ifndef GRAPHICS_DISABLED 1485 mark_gap (blob_box, 6,
1486 prev_gap, prev_blob_box.
width (),
1487 current_gap, next_blob_box.
width (), next_gap);
1501 if ((prev_blob_box.
width () > 0) &&
1502 (next_blob_box.
width () > 0) &&
1505 wide_blob (row, prev_blob_box) &&
1506 wide_blob (row, next_blob_box)) {
1520 #ifndef GRAPHICS_DISABLED 1521 mark_gap (blob_box, 7,
1522 prev_gap, prev_blob_box.
width (),
1523 current_gap, next_blob_box.
width (), next_gap);
1525 }
else if (prev_blob_box.
width() > 0 &&
1526 next_blob_box.
width() > 0 &&
1530 !(narrow_blob(row, prev_blob_box) ||
1531 suspected_punct_blob(row, prev_blob_box)) &&
1532 !(narrow_blob(row, next_blob_box) ||
1533 suspected_punct_blob(row, next_blob_box))) {
1536 #ifndef GRAPHICS_DISABLED 1537 mark_gap (blob_box, 8,
1538 prev_gap, prev_blob_box.
width (),
1539 current_gap, next_blob_box.
width (), next_gap);
1543 (prev_blob_box.
width () > 0) &&
1544 (next_blob_box.
width () > 0) &&
1547 (!suspected_punct_blob (row, prev_blob_box) &&
1548 !suspected_punct_blob (row, next_blob_box)))) {
1551 #ifndef GRAPHICS_DISABLED 1552 mark_gap (blob_box, 9,
1553 prev_gap, prev_blob_box.
width (),
1554 current_gap, next_blob_box.
width (), next_gap);
1559 tprintf(
"word break = %d current_gap = %d, prev_gap = %d, " 1560 "next_gap = %d\n", space ? 1 : 0, current_gap,
1561 prev_gap, next_gap);
1562 prev_gap_was_a_space = space && !(fuzzy_non);
1567 bool Textord::narrow_blob(
TO_ROW* row,
TBOX blob_box) {
1570 (((
float) blob_box.
width () / blob_box.
height ()) <=
1575 bool Textord::wide_blob(
TO_ROW* row,
TBOX blob_box) {
1580 (((
float) blob_box.
width () / blob_box.
height ()) >
1586 result = !narrow_blob (row, blob_box);
1590 bool Textord::suspected_punct_blob(
TO_ROW* row,
TBOX box) {
1593 float blob_x_centre;
1595 blob_x_centre = (box.
right () + box.
left ()) / 2.0;
1605 void Textord::peek_at_next_gap(
TO_ROW *row,
1607 TBOX &next_blob_box,
1609 int16_t &next_within_xht_gap) {
1610 TBOX next_reduced_blob_box;
1612 BLOBNBOX_IT reduced_box_it = box_it;
1614 next_blob_box =
box_next (&box_it);
1615 next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
1616 if (box_it.at_first ()) {
1617 next_gap = INT16_MAX;
1618 next_within_xht_gap = INT16_MAX;
1621 bit_beyond = box_it.data ()->bounding_box ();
1622 next_gap = bit_beyond.
left () - next_blob_box.
right ();
1623 bit_beyond = reduced_box_next (row, &reduced_box_it);
1624 next_within_xht_gap =
1625 bit_beyond.
left () - next_reduced_blob_box.
right ();
1630 #ifndef GRAPHICS_DISABLED 1631 void Textord::mark_gap(
1635 int16_t prev_blob_width,
1636 int16_t current_gap,
1637 int16_t next_blob_width,
1693 blob.
left () - current_gap / 2.0f,
1698 tprintf(
" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n",
1699 blob.
left() - current_gap / 2, blob.
bottom(), rule, prev_gap,
1700 prev_blob_width, current_gap, next_blob_width, next_gap);
1704 float Textord::find_mean_blob_spacing(
WERD *word) {
1707 int32_t gap_sum = 0;
1708 int16_t gap_count = 0;
1712 if (!cblob_it.empty ()) {
1713 cblob_it.mark_cycle_pt ();
1714 prev_right = cblob_it.data ()->bounding_box ().
right ();
1716 cblob_it.forward ();
1717 for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
1718 blob_box = cblob_it.data ()->bounding_box ();
1719 gap_sum += blob_box.
left () - prev_right;
1721 prev_right = blob_box.
right ();
1725 return (gap_sum / (
float) gap_count);
1731 bool Textord::ignore_big_gap(
TO_ROW* row,
1736 int16_t gap = right - left + 1;
1744 if ((gap > 2.1 * row->
xheight) && (row_length > 20 * row->
xheight))
1746 if ((gap > 1.75 * row->
xheight) &&
1747 ((row_length > 35 * row->
xheight) ||
1768 TBOX Textord::reduced_box_next(
1776 int16_t left_above_xht;
1777 int16_t new_left_above_xht;
1791 reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
1795 if (blob->
cblob() ==
nullptr)
1800 reduced_box_for_blob(blob, row, &new_left_above_xht);
1801 left_above_xht = std::min(left_above_xht, new_left_above_xht);
1807 if ((reduced_box.
width () > 0) &&
1809 < left_above_xht) && (reduced_box.
height () > 0.7 * row->
xheight)) {
1810 #ifndef GRAPHICS_DISABLED 1816 reduced_box = full_box;
1842 TBOX Textord::reduced_box_for_blob(
1845 int16_t *left_above_xht) {
1847 float blob_x_centre;
1856 blob_x_centre = (blob_box.
left () + blob_box.
right ()) / 2.0;
1863 left_limit = (float) INT32_MAX;
1864 junk = (float) -INT32_MAX;
1866 static_cast<float>(INT16_MAX), left_limit, junk);
1867 if (left_limit > junk)
1868 *left_above_xht = INT16_MAX;
1870 *left_above_xht = (int16_t) floor (left_limit);
1875 left_limit = (float) INT32_MAX;
1876 junk = (float) -INT32_MAX;
1880 if (left_limit > junk)
1885 junk = (float) INT32_MAX;
1886 right_limit = (float) -INT32_MAX;
1889 if (junk > right_limit)
1893 ICOORD ((int16_t) ceil (right_limit), blob_box.
top ()));
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
int tosp_enough_space_samples_for_median
double tosp_threshold_bias2
bool table_gap(int16_t left, int16_t right)
bool tosp_row_use_cert_spaces1
bool tosp_block_use_cert_spaces
int32_t pile_count(int32_t value) const
double tosp_wide_fraction
bool tosp_only_small_gaps_for_kern
double tosp_fuzzy_sp_fraction
double tosp_ignore_big_gaps
bool tosp_stats_use_xht_gaps
const TBOX & reduced_box() const
double tosp_pass_wide_fuzz_sp_to_context
double tosp_old_sp_kn_th_factor
double tosp_dont_fool_with_small_kerns
double tosp_ignore_very_big_gaps
double tosp_enough_small_gaps
TBOX bounding_box() const
double tosp_large_kerning
double tosp_threshold_bias1
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
bool tosp_flip_fuzz_kn_to_sp
void set_flag(WERD_FLAGS mask, bool value)
bool tosp_narrow_blobs_not_cert
TBOX box_next(BLOBNBOX_IT *it)
double tosp_kern_gap_factor1
double tosp_min_sane_kn_sp
double tosp_table_kn_sp_ratio
void Ellipse(int x, int y, int width, int height)
void set_reduced_box(TBOX new_box)
void plot(ScrollView *fd) const
bool tosp_rule_9_test_punct
double tosp_fuzzy_kn_fraction
bool tosp_only_use_prop_rows
double tosp_table_fuzzy_kn_sp_ratio
double tosp_kern_gap_factor3
bool joined_to_prev() const
int IntCastRounded(double x)
void find_cblob_hlimits(C_BLOB *blob, float bottomy, float topy, float &xmin, float &xmax)
void set_blanks(uint8_t new_blanks)
double tosp_fuzzy_space_factor
bool tosp_only_use_xht_gaps
TBOX bounding_box() const
DLLSYM void tprintf(const char *format,...)
double tosp_table_xht_sp_ratio
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
C_BLOB_LIST * cblob_list()
double tosp_fuzzy_space_factor2
double tosp_init_guess_kn_mult
bool tosp_flip_fuzz_sp_to_kn
void add(int32_t value, int32_t count)
EXTERN ScrollView * to_win
EXTERN bool textord_show_initial_words
bool tosp_row_use_cert_spaces
bool tosp_fuzzy_limit_all
void recalc_bounding_box()
bool tosp_recovery_isolated_row_stats
double tosp_narrow_aspect_ratio
double tosp_max_sane_kn_thresh
bool tosp_use_pre_chopping
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
C_OUTLINE_LIST * out_list()
const TBOX & bounding_box() const
double tosp_kern_gap_factor2
double tosp_narrow_fraction
double tosp_silly_kn_sp_gap
double tosp_fuzzy_space_factor1
bool tosp_force_wordbreak_on_punct
double tosp_wide_aspect_ratio
double tosp_init_guess_xht_mult
BLOBNBOX_LIST * blob_list()
int32_t get_total() const
bool tosp_all_flips_fuzzy
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
PITCH_TYPE pitch_decision
bool tosp_old_to_constrain_sp_kn