35 #include "config_auto.h"
41 #define MAXSPACING 128
54 int16_t block_space_gap_width;
56 int16_t block_non_space_gap_width;
57 bool old_text_ord_proportional;
59 block_it.set_to_list (blocks);
61 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
62 block_it.forward ()) {
63 block = block_it.data ();
64 std::unique_ptr<GAPMAP> gapmap(
new GAPMAP (block));
65 block_spacing_stats(block,
67 old_text_ord_proportional,
68 block_space_gap_width,
69 block_non_space_gap_width);
77 static_cast<float>(block_space_gap_width) / block_non_space_gap_width < 3.0) {
78 block_non_space_gap_width = static_cast<int16_t>(floor (block_space_gap_width / 3.0));
83 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
88 tprintf (
"Block %d Row %d: Now Proportional\n",
89 block_index, row_index);
90 row_spacing_stats(row,
94 block_space_gap_width,
95 block_non_space_gap_width);
100 (
"Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
104 #ifndef GRAPHICS_DISABLED
119 void Textord::block_spacing_stats(
122 bool& old_text_ord_proportional,
123 int16_t& block_space_gap_width,
124 int16_t& block_non_space_gap_width
136 int16_t centre_to_centre;
138 float real_space_threshold;
139 float iqr_centre_to_centre;
140 float iqr_all_gap_stats;
145 TO_ROW_IT row_it(block->
get_rows());
146 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
147 row = row_it.data ();
153 blob_it.mark_cycle_pt ();
154 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
158 blob_box = reduced_box_next (row, &blob_it);
161 row_length = end_of_row - blob_box.
left ();
162 if (blob_box.
width () < minwidth)
163 minwidth = blob_box.
width ();
164 prev_blob_box = blob_box;
165 while (!blob_it.cycled_list ()) {
169 blob_box = reduced_box_next (row, &blob_it);
172 if (blob_box.
width () < minwidth)
173 minwidth = blob_box.
width ();
174 int16_t left = prev_blob_box.
right();
175 int16_t right = blob_box.
left();
176 gap_width = right - left;
177 if (!ignore_big_gap(row, row_length, gapmap, left, right)) {
178 all_gap_stats.
add (gap_width, 1);
180 centre_to_centre = (right + blob_box.
right () -
181 (prev_blob_box.
left () + left)) / 2;
183 centre_to_centre_stats.
add (centre_to_centre, 1);
186 prev_blob_box = blob_box;
193 block_non_space_gap_width = minwidth;
194 block_space_gap_width = -1;
196 old_text_ord_proportional =
true;
200 iqr_centre_to_centre = centre_to_centre_stats.
ile (0.75) -
201 centre_to_centre_stats.
ile (0.25);
202 iqr_all_gap_stats = all_gap_stats.
ile (0.75) - all_gap_stats.
ile (0.25);
203 old_text_ord_proportional =
204 iqr_centre_to_centre * 2 > iqr_all_gap_stats;
216 block_non_space_gap_width = static_cast<int16_t>(floor (all_gap_stats.
median ()));
219 row_it.set_to_list (block->
get_rows ());
220 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
221 row = row_it.data ();
226 real_space_threshold =
230 blob_it.mark_cycle_pt ();
232 blob_it.data_relative (-1)->bounding_box ().right ();
236 blob_box = reduced_box_next (row, &blob_it);
239 row_length = blob_box.
left () - end_of_row;
240 prev_blob_box = blob_box;
241 while (!blob_it.cycled_list ()) {
245 blob_box = reduced_box_next (row, &blob_it);
248 int16_t left = prev_blob_box.
right();
249 int16_t right = blob_box.
left();
250 gap_width = right - left;
251 if ((gap_width > real_space_threshold) &&
252 !ignore_big_gap(row, row_length, gapmap, left, right)) {
266 || (!narrow_blob (row, prev_blob_box)
267 && !narrow_blob (row, blob_box))))
268 || (wide_blob (row, prev_blob_box)
269 && wide_blob (row, blob_box)))
270 space_gap_stats.
add (gap_width, 1);
272 prev_blob_box = blob_box;
278 block_space_gap_width = -1;
280 block_space_gap_width =
281 std::max(static_cast<int16_t>(floor(space_gap_stats.
median())),
282 static_cast<int16_t>(3 * block_non_space_gap_width));
292 void Textord::row_spacing_stats(
297 int16_t block_space_gap_width,
298 int16_t block_non_space_gap_width
309 int16_t real_space_threshold = 0;
312 int16_t large_gap_count = 0;
313 bool suspected_table;
314 int32_t max_max_nonspace;
315 bool good_block_space_estimate = block_space_gap_width > 0;
317 int32_t row_length = 0;
319 int32_t sane_threshold;
323 if (!good_block_space_estimate)
324 block_space_gap_width = int16_t (floor (row->
xheight / 2));
327 real_space_threshold =
328 block_non_space_gap_width +
329 int16_t (floor (0.5 +
331 block_non_space_gap_width)));
333 real_space_threshold =
334 (block_space_gap_width + block_non_space_gap_width) / 2;
336 blob_it.mark_cycle_pt ();
337 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
341 blob_box = reduced_box_next (row, &blob_it);
344 row_length = end_of_row - blob_box.
left ();
345 prev_blob_box = blob_box;
346 while (!blob_it.cycled_list ()) {
350 blob_box = reduced_box_next (row, &blob_it);
353 int16_t left = prev_blob_box.
right();
354 int16_t right = blob_box.
left();
355 gap_width = right - left;
356 if (ignore_big_gap(row, row_length, gapmap, left, right)) {
359 if (gap_width >= real_space_threshold) {
364 || (!narrow_blob (row, prev_blob_box)
365 && !narrow_blob (row, blob_box))))
366 || (wide_blob (row, prev_blob_box)
367 && wide_blob (row, blob_box)))
368 cert_space_gap_stats.
add (gap_width, 1);
369 all_space_gap_stats.
add (gap_width, 1);
372 small_gap_stats.
add (gap_width, 1);
373 all_gap_stats.
add (gap_width, 1);
375 prev_blob_box = blob_box;
378 suspected_table = (large_gap_count > 1) ||
379 ((large_gap_count > 0) &&
384 if ((cert_space_gap_stats.
get_total () >=
388 cert_space_gap_stats.
get_total () > 0)) {
391 &cert_space_gap_stats,
393 block_space_gap_width,
394 block_non_space_gap_width);
397 !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
398 block_idx, row_idx)) {
400 tprintf (
"B:%d R:%d -- Inadequate certain spaces.\n",
408 row->
kern_size = block_non_space_gap_width;
416 &all_space_gap_stats,
418 block_space_gap_width,
419 block_non_space_gap_width);
424 improve_row_threshold(row, &all_gap_stats);
429 if (suspected_table &&
432 tprintf(
"B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx,
445 if (good_block_space_estimate &&
447 sane_space = block_space_gap_width;
453 tprintf(
"B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
466 tprintf(
"B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
474 if (suspected_table) {
477 sane_threshold = int32_t (floor ((sane_space + row->
kern_size) / 2));
482 tprintf (
"B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
527 for (index = 0; index <= max_max_nonspace; index++) {
531 (all_gap_stats.
pile_count (index) < 0.1 * max)) {
575 (
"B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
576 block_idx, row_idx, row_length, block_non_space_gap_width,
577 block_space_gap_width, real_space_threshold, row->
kern_size,
581 tprintf(
"row->kern_size = %3.2f, row->space_size = %3.2f, "
582 "row->space_threshold = %d\n",
586 void Textord::old_to_method(
588 STATS *all_gap_stats,
589 STATS *space_gap_stats,
590 STATS *small_gap_stats,
591 int16_t block_space_gap_width,
592 int16_t block_non_space_gap_width
600 if (row->
space_size > block_space_gap_width * 1.5) {
602 row->
space_size = block_space_gap_width * 1.5;
607 if (row->
space_size < (block_non_space_gap_width * 2) + 1)
608 row->
space_size = (block_non_space_gap_width * 2) + 1;
611 else if (space_gap_stats->
get_total () >= 1) {
614 if (row->
space_size > block_space_gap_width * 1.5) {
616 row->
space_size = block_space_gap_width * 1.5;
621 if (row->
space_size < (block_non_space_gap_width * 3) + 1)
622 row->
space_size = (block_non_space_gap_width * 3) + 1;
636 row->
kern_size = block_non_space_gap_width;
677 bool Textord::isolated_row_stats(
TO_ROW* row,
679 STATS* all_gap_stats,
680 bool suspected_table,
684 float crude_threshold_estimate;
685 int16_t small_gaps_count;
698 kern_estimate = all_gap_stats->
median ();
701 small_gaps_count = stats_count_under (all_gap_stats,
702 static_cast<int16_t>(ceil (crude_threshold_estimate)));
707 (total - small_gaps_count < 1)) {
709 tprintf(
"B:%d R:%d -- Can't do isolated row stats.\n", block_idx,
714 blob_it.mark_cycle_pt ();
715 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
719 blob_box = reduced_box_next (row, &blob_it);
722 row_length = end_of_row - blob_box.
left ();
723 prev_blob_box = blob_box;
724 while (!blob_it.cycled_list ()) {
728 blob_box = reduced_box_next (row, &blob_it);
731 int16_t left = prev_blob_box.
right();
732 int16_t right = blob_box.
left();
733 gap_width = right - left;
734 if (!ignore_big_gap(row, row_length, gapmap, left, right) &&
735 (gap_width > crude_threshold_estimate)) {
739 (!narrow_blob (row, prev_blob_box) &&
740 !narrow_blob (row, blob_box)))) ||
741 (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
742 cert_space_gap_stats.
add (gap_width, 1);
743 all_space_gap_stats.
add (gap_width, 1);
745 if (gap_width < crude_threshold_estimate)
746 small_gap_stats.
add (gap_width, 1);
748 prev_blob_box = blob_box;
754 else if (suspected_table && (cert_space_gap_stats.
get_total () > 0))
758 else if (all_space_gap_stats.
get_total () >=
776 tprintf (
"B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
786 tprintf (
"B:%d R:%d -- Isolated row stats: %f %d %f\n",
792 int16_t Textord::stats_count_under(
STATS *stats, int16_t threshold) {
796 for (index = 0; index < threshold; index++)
817 void Textord::improve_row_threshold(
TO_ROW *row,
STATS *all_gap_stats) {
820 int16_t reqd_zero_width = 0;
821 int16_t zero_width = 0;
822 int16_t zero_start = 0;
826 tprintf (
"Improve row threshold 0");
827 if ((all_gap_stats->
get_total () <= 25) ||
830 (stats_count_under (all_gap_stats,
831 static_cast<int16_t>(ceil (kn + (sp - kn) / 3 + 0.5))) <
841 reqd_zero_width = static_cast<int16_t>(floor ((sp - kn) / 3 + 0.5));
842 if (reqd_zero_width < 3)
845 for (index = int16_t (ceil (kn)); index < int16_t (floor (sp)); index++) {
852 if (zero_width >= reqd_zero_width)
861 tprintf (
" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
863 if ((zero_width < reqd_zero_width) ||
872 (
"Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
879 (
"Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
901 bool fuzzy_sp =
false;
902 bool fuzzy_non =
false;
904 bool prev_gap_was_a_space =
false;
905 bool break_at_next_gap =
false;
907 C_OUTLINE_IT cout_it;
909 C_BLOB_IT cblob_it = &cblobs;
912 int32_t next_rep_char_word_right = INT32_MAX;
913 float repetition_spacing;
921 int16_t prev_gap = INT16_MAX;
922 int16_t current_gap = INT16_MAX;
923 int16_t next_gap = INT16_MAX;
924 int16_t prev_within_xht_gap = INT16_MAX;
925 int16_t current_within_xht_gap = INT16_MAX;
926 int16_t next_within_xht_gap = INT16_MAX;
927 int16_t word_count = 0;
931 if (!rep_char_it.empty ()) {
932 next_rep_char_word_right =
933 rep_char_it.data ()->bounding_box ().right ();
937 cblob_it.set_to_list (&cblobs);
940 WERD_IT word_it(&words);
943 prev_fuzzy_sp =
false;
944 prev_fuzzy_non =
false;
945 if (!box_it.empty ()) {
946 xstarts[0] = box_it.data ()->bounding_box ().left ();
947 if (xstarts[0] > next_rep_char_word_right) {
949 word = rep_char_it.extract ();
950 word_it.add_after_then_move (word);
960 repetition_spacing = find_mean_blob_spacing (word);
961 current_gap = box_it.data ()->bounding_box ().left () -
962 next_rep_char_word_right;
963 current_within_xht_gap = current_gap;
965 prev_blanks = static_cast<uint8_t>(floor (current_gap / row->
space_size));
972 tprintf (
"Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
973 box_it.data ()->bounding_box ().left (),
974 box_it.data ()->bounding_box ().bottom (),
975 repetition_spacing, current_gap);
976 prev_fuzzy_sp =
false;
977 prev_fuzzy_non =
false;
978 if (rep_char_it.empty ()) {
979 next_rep_char_word_right = INT32_MAX;
982 rep_char_it.forward ();
983 next_rep_char_word_right =
984 rep_char_it.data ()->bounding_box ().right ();
988 peek_at_next_gap(row,
992 next_within_xht_gap);
994 bblob = box_it.data ();
997 if (bblob->
cblob () !=
nullptr) {
998 cout_it.set_to_list (cblob_it.data ()->out_list ());
999 cout_it.move_to_last ();
1001 delete bblob->
cblob ();
1004 if (bblob->
cblob() !=
nullptr)
1005 cblob_it.add_after_then_move (bblob->
cblob ());
1006 prev_x = blob_box.
right ();
1009 bblob = box_it.data ();
1014 prev_gap = current_gap;
1015 prev_within_xht_gap = current_within_xht_gap;
1016 prev_blob_box = next_blob_box;
1017 current_gap = next_gap;
1018 current_within_xht_gap = next_within_xht_gap;
1019 peek_at_next_gap(row,
1023 next_within_xht_gap);
1025 int16_t prev_gap_arg = prev_gap;
1026 int16_t next_gap_arg = next_gap;
1028 prev_gap_arg = prev_within_xht_gap;
1029 next_gap_arg = next_within_xht_gap;
1032 if (blob_box.
left () > next_rep_char_word_right ||
1033 make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1034 current_gap, current_within_xht_gap,
1035 next_blob_box, next_gap_arg,
1036 blanks, fuzzy_sp, fuzzy_non,
1037 prev_gap_was_a_space,
1038 break_at_next_gap) ||
1039 box_it.at_first()) {
1041 word =
new WERD (&cblobs, prev_blanks,
nullptr);
1043 word_it.add_after_then_move (word);
1051 else if (prev_fuzzy_non)
1055 if (blob_box.
left () > next_rep_char_word_right) {
1057 word = rep_char_it.extract ();
1058 word_it.add_after_then_move (word);
1061 repetition_spacing = find_mean_blob_spacing (word);
1063 current_within_xht_gap = current_gap;
1066 static_cast<uint8_t>(floor (current_gap / row->
space_size));
1074 (
"Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1077 repetition_spacing, current_gap, blanks);
1085 blob_box.
left () - next_rep_char_word_right;
1087 blanks = static_cast<uint8_t>(current_gap / row->
space_size);
1094 tprintf (
" Rgap:%d (%d blanks)\n",
1095 current_gap, blanks);
1099 if (rep_char_it.empty ()) {
1100 next_rep_char_word_right = INT32_MAX;
1103 rep_char_it.forward ();
1104 next_rep_char_word_right =
1105 rep_char_it.data ()->bounding_box ().right ();
1109 if (box_it.at_first () && rep_char_it.empty ()) {
1112 xstarts[1] = prev_x;
1115 prev_blanks = blanks;
1116 prev_fuzzy_sp = fuzzy_sp;
1117 prev_fuzzy_non = fuzzy_non;
1122 while (!box_it.at_first ());
1125 while (!rep_char_it.empty ()) {
1126 word = rep_char_it.extract ();
1127 word_it.add_after_then_move (word);
1130 repetition_spacing = find_mean_blob_spacing (word);
1133 blanks = static_cast<uint8_t>(floor (current_gap / row->
space_size));
1141 "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1143 repetition_spacing, current_gap, blanks);
1149 if (rep_char_it.empty ()) {
1152 xstarts[1] = prev_x;
1155 rep_char_it.forward ();
1158 real_row =
new ROW (row,
1160 word_it.set_to_list (real_row->
word_list ());
1162 word_it.add_list_after (&words);
1166 tprintf (
"Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1190 C_OUTLINE_IT cout_it;
1192 C_BLOB_IT cblob_it = &cblobs;
1198 int16_t word_count = 0;
1200 cblob_it.set_to_list(&cblobs);
1203 WERD_IT word_it(&words);
1205 if (!box_it.empty()) {
1208 bblob = box_it.data();
1211 if (bblob->
cblob() !=
nullptr) {
1212 cout_it.set_to_list(cblob_it.data()->out_list());
1213 cout_it.move_to_last();
1215 delete bblob->
cblob();
1218 if (bblob->
cblob() !=
nullptr)
1219 cblob_it.add_after_then_move(bblob->
cblob());
1222 bblob = box_it.data();
1226 word =
new WERD(&cblobs, 1,
nullptr);
1228 word_it.add_after_then_move(word);
1233 if (box_it.at_first()) {
1238 while (!box_it.at_first());
1241 word_it.set_to_list(real_row->
word_list());
1243 word_it.add_list_after(&words);
1246 tprintf (
"Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1258 bool Textord::make_a_word_break(
1263 int16_t real_current_gap,
1264 int16_t within_xht_current_gap,
1270 bool& prev_gap_was_a_space,
1271 bool& break_at_next_gap) {
1273 int16_t current_gap;
1274 float fuzzy_sp_to_kn_limit;
1276 if (break_at_next_gap) {
1277 break_at_next_gap =
false;
1288 (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
1290 within_xht_current_gap = real_current_gap;
1293 current_gap = within_xht_current_gap;
1295 current_gap = real_current_gap;
1300 if (space && (current_gap < INT16_MAX)) {
1301 if (current_gap < row->min_space) {
1314 blanks = static_cast<uint8_t>(current_gap / row->
space_size);
1326 prev_gap_was_a_space =
true;
1336 int num_blanks = current_gap;
1339 blanks = static_cast<uint8_t>(ClipToRange<int>(num_blanks, 1, UINT8_MAX));
1348 (real_current_gap <= row->max_nonspace) &&
1352 #ifndef GRAPHICS_DISABLED
1353 mark_gap (blob_box, 20,
1354 prev_gap, prev_blob_box.
width (),
1355 current_gap, next_blob_box.
width (), next_gap);
1359 (real_current_gap <= row->space_threshold) &&
1366 #ifndef GRAPHICS_DISABLED
1367 mark_gap (blob_box, 21,
1368 prev_gap, prev_blob_box.
width (),
1369 current_gap, next_blob_box.
width (), next_gap);
1373 (real_current_gap < row->min_space) &&
1374 (within_xht_current_gap >= row->
min_space)) {
1376 #ifndef GRAPHICS_DISABLED
1377 mark_gap (blob_box, 22,
1378 prev_gap, prev_blob_box.
width (),
1379 current_gap, next_blob_box.
width (), next_gap);
1383 !suspected_punct_blob(row, prev_blob_box) &&
1384 suspected_punct_blob(row, blob_box)) {
1385 break_at_next_gap =
true;
1388 else if ((current_gap < row->min_space) &&
1396 fuzzy_sp_to_kn_limit = 99999.0f;
1400 if ((prev_blob_box.
width () > 0) &&
1401 narrow_blob (row, prev_blob_box) &&
1402 prev_gap_was_a_space &&
1405 (current_gap > fuzzy_sp_to_kn_limit)) {
1413 #ifndef GRAPHICS_DISABLED
1414 mark_gap (blob_box, 1,
1415 prev_gap, prev_blob_box.
width (),
1416 current_gap, next_blob_box.
width (), next_gap);
1421 else if ((prev_blob_box.
width () > 0) &&
1422 narrow_blob (row, prev_blob_box) &&
1423 !prev_gap_was_a_space &&
1426 (current_gap > fuzzy_sp_to_kn_limit)) {
1434 #ifndef GRAPHICS_DISABLED
1435 mark_gap (blob_box, 2,
1436 prev_gap, prev_blob_box.
width (),
1437 current_gap, next_blob_box.
width (), next_gap);
1440 else if ((next_blob_box.
width () > 0) &&
1441 narrow_blob (row, next_blob_box) &&
1445 (current_gap > fuzzy_sp_to_kn_limit)) {
1453 #ifndef GRAPHICS_DISABLED
1454 mark_gap (blob_box, 3,
1455 prev_gap, prev_blob_box.
width (),
1456 current_gap, next_blob_box.
width (), next_gap);
1459 else if ((next_blob_box.
width () > 0) &&
1460 narrow_blob (row, next_blob_box) &&
1464 (current_gap > fuzzy_sp_to_kn_limit)) {
1472 #ifndef GRAPHICS_DISABLED
1473 mark_gap (blob_box, 4,
1474 prev_gap, prev_blob_box.
width (),
1475 current_gap, next_blob_box.
width (), next_gap);
1478 else if ((((next_blob_box.
width () > 0) &&
1479 narrow_blob (row, next_blob_box)) ||
1480 ((prev_blob_box.
width () > 0) &&
1481 narrow_blob (row, prev_blob_box)))) {
1483 #ifndef GRAPHICS_DISABLED
1484 mark_gap (blob_box, 6,
1485 prev_gap, prev_blob_box.
width (),
1486 current_gap, next_blob_box.
width (), next_gap);
1500 if ((prev_blob_box.
width () > 0) &&
1501 (next_blob_box.
width () > 0) &&
1504 wide_blob (row, prev_blob_box) &&
1505 wide_blob (row, next_blob_box)) {
1519 #ifndef GRAPHICS_DISABLED
1520 mark_gap (blob_box, 7,
1521 prev_gap, prev_blob_box.
width (),
1522 current_gap, next_blob_box.
width (), next_gap);
1524 }
else if (prev_blob_box.
width() > 0 &&
1525 next_blob_box.
width() > 0 &&
1529 !(narrow_blob(row, prev_blob_box) ||
1530 suspected_punct_blob(row, prev_blob_box)) &&
1531 !(narrow_blob(row, next_blob_box) ||
1532 suspected_punct_blob(row, next_blob_box))) {
1535 #ifndef GRAPHICS_DISABLED
1536 mark_gap (blob_box, 8,
1537 prev_gap, prev_blob_box.
width (),
1538 current_gap, next_blob_box.
width (), next_gap);
1542 (prev_blob_box.
width () > 0) &&
1543 (next_blob_box.
width () > 0) &&
1546 (!suspected_punct_blob (row, prev_blob_box) &&
1547 !suspected_punct_blob (row, next_blob_box)))) {
1550 #ifndef GRAPHICS_DISABLED
1551 mark_gap (blob_box, 9,
1552 prev_gap, prev_blob_box.
width (),
1553 current_gap, next_blob_box.
width (), next_gap);
1558 tprintf(
"word break = %d current_gap = %d, prev_gap = %d, "
1559 "next_gap = %d\n", space ? 1 : 0, current_gap,
1560 prev_gap, next_gap);
1561 prev_gap_was_a_space = space && !(fuzzy_non);
1566 bool Textord::narrow_blob(
TO_ROW* row,
TBOX blob_box) {
1569 ((static_cast<float>(blob_box.
width ()) / blob_box.
height ()) <=
1574 bool Textord::wide_blob(
TO_ROW* row,
TBOX blob_box) {
1579 ((static_cast<float>(blob_box.
width ()) / blob_box.
height ()) >
1585 result = !narrow_blob (row, blob_box);
1589 bool Textord::suspected_punct_blob(
TO_ROW* row,
TBOX box) {
1592 float blob_x_centre;
1594 blob_x_centre = (box.
right () + box.
left ()) / 2.0;
1604 void Textord::peek_at_next_gap(
TO_ROW *row,
1606 TBOX &next_blob_box,
1608 int16_t &next_within_xht_gap) {
1609 TBOX next_reduced_blob_box;
1611 BLOBNBOX_IT reduced_box_it = box_it;
1613 next_blob_box =
box_next (&box_it);
1614 next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
1615 if (box_it.at_first ()) {
1616 next_gap = INT16_MAX;
1617 next_within_xht_gap = INT16_MAX;
1620 bit_beyond = box_it.data ()->bounding_box ();
1621 next_gap = bit_beyond.
left () - next_blob_box.
right ();
1622 bit_beyond = reduced_box_next (row, &reduced_box_it);
1623 next_within_xht_gap =
1624 bit_beyond.
left () - next_reduced_blob_box.
right ();
1629 #ifndef GRAPHICS_DISABLED
1630 void Textord::mark_gap(
1634 int16_t prev_blob_width,
1635 int16_t current_gap,
1636 int16_t next_blob_width,
1692 blob.
left () - current_gap / 2.0f,
1697 tprintf(
" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n",
1698 blob.
left() - current_gap / 2, blob.
bottom(), rule, prev_gap,
1699 prev_blob_width, current_gap, next_blob_width, next_gap);
1703 float Textord::find_mean_blob_spacing(
WERD *word) {
1706 int32_t gap_sum = 0;
1707 int16_t gap_count = 0;
1711 if (!cblob_it.empty ()) {
1712 cblob_it.mark_cycle_pt ();
1713 prev_right = cblob_it.data ()->bounding_box ().
right ();
1715 cblob_it.forward ();
1716 for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
1717 blob_box = cblob_it.data ()->bounding_box ();
1718 gap_sum += blob_box.
left () - prev_right;
1720 prev_right = blob_box.
right ();
1724 return (gap_sum / static_cast<float>(gap_count));
1730 bool Textord::ignore_big_gap(
TO_ROW* row,
1735 int16_t gap = right - left + 1;
1743 if ((gap > 2.1 * row->
xheight) && (row_length > 20 * row->
xheight))
1745 if ((gap > 1.75 * row->
xheight) &&
1746 ((row_length > 35 * row->
xheight) ||
1767 TBOX Textord::reduced_box_next(
1775 int16_t left_above_xht;
1776 int16_t new_left_above_xht;
1790 reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
1794 if (blob->
cblob() ==
nullptr)
1799 reduced_box_for_blob(blob, row, &new_left_above_xht);
1800 left_above_xht = std::min(left_above_xht, new_left_above_xht);
1806 if ((reduced_box.
width () > 0) &&
1808 < left_above_xht) && (reduced_box.
height () > 0.7 * row->
xheight)) {
1809 #ifndef GRAPHICS_DISABLED
1815 reduced_box = full_box;
1841 TBOX Textord::reduced_box_for_blob(
1844 int16_t *left_above_xht) {
1846 float blob_x_centre;
1855 blob_x_centre = (blob_box.
left () + blob_box.
right ()) / 2.0;
1862 left_limit = static_cast<float>(INT32_MAX);
1863 junk = static_cast<float>(-INT32_MAX);
1865 static_cast<float>(INT16_MAX), left_limit, junk);
1866 if (left_limit > junk)
1867 *left_above_xht = INT16_MAX;
1869 *left_above_xht = static_cast<int16_t>(floor (left_limit));
1874 left_limit = static_cast<float>(INT32_MAX);
1875 junk = static_cast<float>(-INT32_MAX);
1879 if (left_limit > junk)
1884 junk = static_cast<float>(INT32_MAX);
1885 right_limit = static_cast<float>(-INT32_MAX);
1888 if (junk > right_limit)
1891 return TBOX (
ICOORD (static_cast<int16_t>(floor (left_limit)), blob_box.
bottom ()),
1892 ICOORD (static_cast<int16_t>(ceil (right_limit)), blob_box.
top ()));