44 #define PERFECT_WERDS 999
55 static int c_blob_comparator(
59 const C_BLOB *blob1 = *reinterpret_cast<const C_BLOB* const*>(blob1p);
60 const C_BLOB *blob2 = *reinterpret_cast<const C_BLOB* const*>(blob2p);
78 BLOCK_RES_IT block_res_it;
79 ROW_RES_IT row_res_it;
80 WERD_RES_IT word_res_it_from;
81 WERD_RES_IT word_res_it_to;
83 WERD_RES_LIST fuzzy_space_words;
85 bool prevent_null_wd_fixsp;
90 for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list();
91 block_res_it.forward()) {
92 row_res_it.set_to_list(&block_res_it.data()->row_res_list);
93 for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
94 row_res_it.forward()) {
95 word_res_it_from.set_to_list(&row_res_it.data()->word_res_list);
96 while (!word_res_it_from.at_last()) {
97 word_res = word_res_it_from.data();
98 while (!word_res_it_from.at_last() &&
100 word_res_it_from.data_relative(1)->word->flag(
W_FUZZY_NON) ||
101 word_res_it_from.data_relative(1)->word->flag(
W_FUZZY_SP))) {
103 block_res_it.data()->block);
104 word_res = word_res_it_from.forward();
106 if (monitor !=
nullptr) {
108 monitor->
progress = 90 + 5 * word_index / word_count;
110 (monitor->
cancel !=
nullptr &&
116 if (!word_res_it_from.at_last()) {
117 word_res_it_to = word_res_it_from;
118 prevent_null_wd_fixsp =
122 word_res_it_to.forward();
124 if (monitor !=
nullptr) {
126 monitor->
progress = 90 + 5 * word_index / word_count;
128 (monitor->
cancel !=
nullptr &&
132 while (!word_res_it_to.at_last () &&
133 (word_res_it_to.data_relative(1)->word->flag(
W_FUZZY_NON) ||
134 word_res_it_to.data_relative(1)->word->flag(
W_FUZZY_SP))) {
138 prevent_null_wd_fixsp =
true;
139 word_res = word_res_it_to.forward();
144 prevent_null_wd_fixsp =
true;
145 if (prevent_null_wd_fixsp) {
146 word_res_it_from = word_res_it_to;
148 fuzzy_space_words.assign_to_sublist(&word_res_it_from,
151 row_res_it.data()->row,
152 block_res_it.data()->block);
153 new_length = fuzzy_space_words.length();
154 word_res_it_from.add_list_before(&fuzzy_space_words);
156 !word_res_it_from.at_last() && new_length > 0;
158 word_res_it_from.forward();
165 block_res_it.data()->block);
176 WERD_RES_LIST current_perm;
177 int16_t current_score;
178 bool improved =
false;
181 dump_words(best_perm, best_score, 1, improved);
186 while ((best_score !=
PERFECT_WERDS) && !current_perm.empty()) {
189 dump_words(current_perm, current_score, 2, improved);
190 if (current_score > best_score) {
193 best_score = current_score;
199 dump_words(best_perm, best_score, 3, improved);
205 WERD_RES_IT src_it(&src_list);
206 WERD_RES_IT new_it(&new_list);
210 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
211 src_wd = src_it.data();
216 new_it.add_after_then_move(new_wd);
225 WERD_RES_IT word_it(&words);
230 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
231 word = word_it.data();
233 WordData word_data(block, row, word);
267 WERD_RES_IT word_res_it(&word_res_list);
268 int16_t total_score = 0;
269 int16_t word_count = 0;
270 int16_t done_word_count = 0;
275 int16_t prev_word_score = 0;
276 bool prev_word_done =
false;
277 bool prev_char_1 =
false;
278 bool prev_char_digit =
false;
279 bool current_char_1 =
false;
280 bool current_word_ok_so_far;
281 STRING punct_chars =
"!\"`',.:;";
282 bool prev_char_punct =
false;
283 bool current_char_punct =
false;
284 bool word_done =
false;
287 word = word_res_it.data();
291 total_score += prev_word_score;
296 prev_char_digit =
false;
297 prev_word_done =
false;
305 current_word_ok_so_far =
false;
307 (prev_char_digit && (
313 total_score += prev_word_score;
316 current_word_ok_so_far = word_done;
319 if (current_word_ok_so_far) {
320 prev_word_done =
true;
321 prev_word_score = word_len;
323 prev_word_done =
false;
329 for (i = 0, prev_char_1 =
false; i < word_len; i++) {
331 if (prev_char_1 || (current_char_1 && (i > 0)))
333 prev_char_1 = current_char_1;
339 for (i = 0, offset = 0, prev_char_punct =
false; i < word_len;
343 if (prev_char_punct || (current_char_punct && i > 0))
345 prev_char_punct = current_char_punct;
349 for (i = 0, offset = 0; i < word_len - 1;
358 word_res_it.forward();
359 }
while (word_res_it.data()->part_of_combo);
360 }
while (!word_res_it.at_first());
361 total_score += prev_word_score;
364 if (done_word_count == word_count)
374 for (i = 0, offset = 0; i < char_position;
400 WERD_RES_IT word_it(&words);
401 WERD_RES_IT prev_word_it(&words);
406 int16_t prev_right = -INT16_MAX;
409 int16_t min_gap = INT16_MAX;
411 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
412 word = word_it.data();
415 if (prev_right > -INT16_MAX) {
416 gap = box.
left() - prev_right;
420 prev_right = box.
right();
423 if (min_gap < INT16_MAX) {
424 prev_right = -INT16_MAX;
425 word_it.set_to_list(&words);
427 for (; (prev_right == -INT16_MAX) || !word_it.at_first();
429 word = word_it.data();
432 if (prev_right > -INT16_MAX) {
433 gap = box.
left() - prev_right;
434 if (gap <= min_gap) {
435 prev_word = prev_word_it.data();
441 copy_word =
new WERD;
442 *copy_word = *(prev_word->
word);
448 prev_word_it.add_before_then_move(combo);
455 delete word_it.extract();
464 prev_word_it = word_it;
467 prev_right = box.
right();
477 int16_t mode,
bool improved) {
478 WERD_RES_IT word_res_it(&perm);
483 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
484 word_res_it.forward()) {
485 if (!word_res_it.data()->part_of_combo) {
487 word_res_it.data()->best_choice->unichar_string();
496 tprintf(
"EXTRACTED (%d): \"", score);
499 tprintf(
"TESTED (%d): \"", score);
502 tprintf(
"RETURNED (%d): \"", score);
506 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
507 word_res_it.forward()) {
508 if (!word_res_it.data()->part_of_combo) {
510 word_res_it.data()->best_choice->unichar_string().c_str(),
511 static_cast<int>(word_res_it.data()->best_choice->permuter()));
515 }
else if (improved) {
517 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
518 word_res_it.forward()) {
519 if (!word_res_it.data()->part_of_combo) {
521 word_res_it.data()->best_choice->unichar_string().c_str(),
522 static_cast<int>(word_res_it.data()->best_choice->permuter()));
565 WERD_RES_LIST sub_word_list;
566 WERD_RES_IT sub_word_list_it(&sub_word_list);
571 word_res = word_res_it.data();
583 tprintf(
"FP fixspace working on \"%s\"\n",
587 sub_word_list_it.add_after_stay_put(word_res_it.extract());
589 new_length = sub_word_list.length();
590 word_res_it.add_list_before(&sub_word_list);
591 for (; !word_res_it.at_last() && new_length > 1; new_length--) {
592 word_res_it.forward();
599 WERD_RES_IT best_perm_it(&best_perm);
600 WERD_RES_LIST current_perm;
601 WERD_RES_IT current_perm_it(¤t_perm);
603 int16_t current_score;
604 bool improved =
false;
608 dump_words(best_perm, best_score, 1, improved);
610 old_word_res = best_perm_it.data();
619 while (best_score !=
PERFECT_WERDS && !current_perm.empty()) {
622 dump_words(current_perm, current_score, 2, improved);
623 if (current_score > best_score) {
626 best_score = current_score;
633 dump_words(best_perm, best_score, 3, improved);
643 WERD_RES_IT word_it(&words);
644 WERD_RES_IT worst_word_it;
645 float worst_noise_score = 9999;
646 int worst_blob_index = -1;
651 C_BLOB_IT rej_cblob_it;
652 C_BLOB_LIST new_blob_list;
653 C_BLOB_IT new_blob_it;
654 C_BLOB_IT new_rej_cblob_it;
656 int16_t start_of_noise_blob;
659 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
661 if (blob_index > -1 && worst_noise_score > noise_score) {
662 worst_noise_score = noise_score;
663 worst_blob_index = blob_index;
664 worst_word_it = word_it;
667 if (worst_blob_index < 0) {
674 word_res = worst_word_it.data();
678 new_blob_it.set_to_list(&new_blob_list);
680 for (i = 0; i < worst_blob_index; i++, blob_it.forward()) {
681 new_blob_it.add_after_then_move(blob_it.extract());
683 start_of_noise_blob = blob_it.data()->bounding_box().left();
684 delete blob_it.extract();
686 new_word =
new WERD(&new_blob_list, word_res->
word);
694 (!rej_cblob_it.empty() &&
695 (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob));
696 rej_cblob_it.forward()) {
697 new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract());
700 auto* new_word_res =
new WERD_RES(new_word);
701 new_word_res->combination =
true;
702 worst_word_it.add_before_then_move(new_word_res);
708 float *worst_noise_score) {
709 float noise_score[512];
731 tprintf(
"FP fixspace Noise metrics for \"%s\": ",
738 noise_score[i] = non_noise_limit;
743 tprintf(
"%1.1f ", noise_score[i]);
752 if (noise_score[i] >= non_noise_limit) {
764 if (noise_score[i] >= non_noise_limit) {
773 if (min_noise_blob > max_noise_blob)
776 *worst_noise_score = small_limit;
778 for (i = min_noise_blob; i <= max_noise_blob; i++) {
779 if (noise_score[i] < *worst_noise_score) {
781 *worst_noise_score = noise_score[i];
789 int16_t outline_count = 0;
790 int16_t max_dimension;
791 int16_t largest_outline_dimension = 0;
795 box = ol->bounding_box();
797 max_dimension = box.
height();
799 max_dimension = box.
width();
802 if (largest_outline_dimension < max_dimension)
803 largest_outline_dimension = max_dimension;
806 if (outline_count > 5) {
808 largest_outline_dimension *= 2;
815 largest_outline_dimension /= 2;
818 return largest_outline_dimension;
824 const bool show_map_detail =
false;
829 tprintf(
"Blob count: %d (word); %d/%d (rebuild word)\n",
835 if (show_map_detail) {
844 tprintf(
"Done flag: %s\n\n", word->
done ?
"TRUE" :
"FALSE");
858 WERD_RES_IT word_it(&word_res_list);
864 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
865 word = word_it.data();