21 #include "config_auto.h"
31 #ifndef DISABLED_LEGACY_ENGINE
40 #ifndef DISABLED_LEGACY_ENGINE
63 TBOX &selection_box) {
79 int16_t good_char_qual;
84 if (lstm_recognizer_ ==
nullptr) {
85 #ifndef DISABLED_LEGACY_ENGINE
87 #endif // ndef DISABLED_LEGACY_ENGINE
91 #ifndef DISABLED_LEGACY_ENGINE
95 tprintf(
"\n%d chars; word_blob_quality: %d; outline_errs: %d; "
96 "char_quality: %d; good_char_quality: %d\n",
101 #endif // ndef DISABLED_LEGACY_ENGINE
121 const TBOX& target_word_box,
122 const char* word_config,
124 if (word_config !=
nullptr) {
126 if (backup_config_file_ ==
nullptr) {
128 FILE* config_fp = fopen(backup_config_file_,
"wb");
129 if (config_fp ==
nullptr) {
130 tprintf(
"Error, failed to open file \"%s\"\n", backup_config_file_);
140 if (backup_config_file_ !=
nullptr) {
144 backup_config_file_ =
nullptr;
147 }
else if (pass > 1 && !word_box.
major_overlap(target_word_box)) {
155 const TBOX* target_word_box,
156 const char* word_config,
163 if (target_word_box ==
nullptr ||
165 *target_word_box, word_config, 1)) {
170 for (
int w = 0; w < words->
size(); ++w) {
172 if (w > 0) (*words)[w].prev_word = &(*words)[w - 1];
178 if (pass_n == 1 || !word->
word->
done) {
186 }
else if (pass_n == 2) {
193 for (
int s = 0; s <= sub_langs_.size(); ++s) {
195 Tesseract* lang_t = s < sub_langs_.size() ? sub_langs_[s] :
this;
201 word_res->SetupForRecognition(
222 for (
int w = 0; w < words->
size(); ++w) {
224 if (w > 0) word->
prev_word = &(*words)[w - 1];
225 if (monitor !=
nullptr) {
241 for (; w < words->
size(); ++w) {
255 while (pr_it->
word() !=
nullptr && pr_it->
word() != word->
word)
258 bool make_next_word_fuzzy =
false;
259 #ifndef DISABLED_LEGACY_ENGINE
265 #endif // ndef DISABLED_LEGACY_ENGINE
269 tprintf(
"Pass%d: %s [%s]\n", pass_n,
274 if (make_next_word_fuzzy && pr_it->
word() !=
nullptr) {
304 const TBOX* target_word_box,
305 const char* word_config,
314 if (dopasses==0 || dopasses==1) {
318 #ifndef DISABLED_LEGACY_ENGINE
329 for (
int i = 0; i < sub_langs_.size(); ++i) {
331 sub_langs_[i]->SwitchAdaptiveClassifier();
333 sub_langs_[i]->StartBackupAdaptiveClassifier();
337 #endif // ndef DISABLED_LEGACY_ENGINE
343 #ifndef DISABLED_LEGACY_ENGINE
347 #endif // ndef DISABLED_LEGACY_ENGINE
358 most_recently_used_ =
this;
383 if (dopasses == 1)
return true;
385 #ifndef DISABLED_LEGACY_ENGINE
396 most_recently_used_ =
this;
427 #endif // ndef DISABLED_LEGACY_ENGINE
434 #ifndef DISABLED_LEGACY_ENGINE
440 #endif //ndef DISABLED_LEGACY_ENGINE
442 const auto pageseg_mode = static_cast<PageSegMode>(
459 if (monitor !=
nullptr) {
465 #ifndef DISABLED_LEGACY_ENGINE
474 while (word_it.
forward() !=
nullptr &&
478 if (!word_it.
word())
break;
485 tprintf(
"Skipping because one of the words is W_REP_CHAR\n");
510 tprintf(
"Top choice \"%s %s\" verified by bigram model.\n",
516 tprintf(
"Examining alt choices for \"%s %s\".\n",
527 float best_rating = 0.0;
530 for (prev_it.mark_cycle_pt(); !prev_it.cycled_list(); prev_it.forward()) {
539 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
550 if (overrides_word1.
size() == 1 ||
553 best_idx = overrides_word1.
size() - 1;
558 if (!overrides_word1.
empty()) {
561 *overrides_word1[best_idx]) &&
563 *overrides_word2[best_idx])) {
565 tprintf(
"Top choice \"%s %s\" verified (sans case) by bigram "
566 "model.\n", orig_w1_str.
c_str(), orig_w2_str.
c_str());
570 const STRING new_w1_str = overrides_word1[best_idx]->unichar_string();
571 const STRING new_w2_str = overrides_word2[best_idx]->unichar_string();
572 if (new_w1_str != orig_w1_str) {
575 if (new_w2_str != orig_w2_str) {
579 STRING choices_description;
580 int num_bigram_choices
581 = overrides_word1.
size() * overrides_word2.
size();
582 if (num_bigram_choices == 1) {
583 choices_description =
"This was the unique bigram choice.";
587 const int kMaxChoicesToPrint = 20;
588 for (
int i = 0; i < overrides_word1.
size() &&
589 i < kMaxChoicesToPrint; i++) {
590 if (i > 0) { bigrams_list +=
", "; }
595 choices_description =
"There were many choices: {";
596 choices_description += bigrams_list;
597 choices_description +=
"}";
599 choices_description.
add_str_int(
"There were ", num_bigram_choices);
600 choices_description +=
" compatible bigrams.";
603 tprintf(
"Replaced \"%s %s\" with \"%s %s\" with bigram model. %s\n",
606 choices_description.
c_str());
614 const TBOX* target_word_box,
615 const char* word_config) {
624 if (monitor !=
nullptr) {
637 if (target_word_box &&
639 *target_word_box, word_config, 4)) {
653 int16_t all_char_quality;
654 int16_t accepted_all_char_quality;
666 (blob_quality == 0) && (outline_errs >= chars_in_word))
674 (
"QUALITY: num_chs= %d num_rejs= %d %5.3f blob_qual= %d %5.3f"
675 " outline_errs= %d %5.3f char_qual= %d %5.3f good_ch_qual= %d %5.3f\n",
689 bool good_quality_doc =
707 #endif // ndef DISABLED_LEGACY_ENGINE
721 static_cast<IncorrectResultReason>(bl)),
743 float word_x_height = word->
x_height;
744 if (word_x_height < word->best_choice->min_x_height() ||
752 const double small_cap_delta = (x_height - small_cap_xheight) / 2.0;
754 small_cap_xheight - small_cap_delta <= word_x_height &&
755 word_x_height <= small_cap_xheight + small_cap_delta) {
765 if (num_upper > 0 && num_lower == 0)
776 *next_left = INT32_MAX;
777 if (index < words.
size()) {
778 *right = words[index]->word->bounding_box().right();
779 if (index + 1 < words.
size())
780 *next_left = words[index + 1]->word->bounding_box().left();
786 static void EvaluateWordSpan(
const PointerVector<WERD_RES>& words,
787 int first_index,
int end_index,
float* rating,
788 float* certainty,
bool* bad,
789 bool* valid_permuter) {
790 if (end_index <= first_index) {
792 *valid_permuter =
false;
794 for (
int index = first_index; index < end_index && index < words.size();
797 if (choice ==
nullptr) {
800 *rating += choice->
rating();
801 *certainty = std::min(*certainty, choice->
certainty());
803 *valid_permuter =
false;
815 static int SelectBestWords(
double rating_ratio,
816 double certainty_margin,
818 PointerVector<WERD_RES>* new_words,
819 PointerVector<WERD_RES>* best_words) {
825 int num_best = 0, num_new = 0;
826 while (b < best_words->size() || n < new_words->size()) {
828 int start_b = b, start_n = n;
829 while (b < best_words->size() || n < new_words->size()) {
830 int b_right = -INT32_MAX;
831 int next_b_left = INT32_MAX;
832 WordGap(*best_words, b, &b_right, &next_b_left);
833 int n_right = -INT32_MAX;
834 int next_n_left = INT32_MAX;
835 WordGap(*new_words, n, &n_right, &next_n_left);
836 if (std::max(b_right, n_right) < std::min(next_b_left, next_n_left)) {
841 if ((b_right < n_right && b < best_words->size()) ||
842 n == new_words->size())
848 float b_rating = 0.0f, n_rating = 0.0f;
850 float b_certainty = 0.0f, n_certainty = 0.0f;
852 bool b_bad =
false, n_bad =
false;
854 bool b_valid_permuter =
true, n_valid_permuter =
true;
855 const int end_b = b < best_words->size() ? b + 1 : b;
856 const int end_n = n < new_words->size() ? n + 1 : n;
857 EvaluateWordSpan(*best_words, start_b, end_b, &b_rating, &b_certainty,
858 &b_bad, &b_valid_permuter);
859 EvaluateWordSpan(*new_words, start_n, end_n, &n_rating, &n_certainty,
860 &n_bad, &n_valid_permuter);
861 bool new_better =
false;
862 if (!n_bad && (b_bad || (n_certainty > b_certainty &&
863 n_rating < b_rating) ||
864 (!b_valid_permuter && n_valid_permuter &&
865 n_rating < b_rating * rating_ratio &&
866 n_certainty > b_certainty - certainty_margin))) {
868 for (
int i = start_n; i < end_n; ++i) {
870 (*new_words)[i] =
nullptr;
876 for (
int i = start_b; i < end_b; ++i) {
878 (*best_words)[i] =
nullptr;
883 tprintf(
"%d new words %s than %d old words: r: %g v %g c: %g v %g"
884 " valid dict: %d v %d\n",
885 end_n - start_n, new_better ?
"better" :
"worse",
886 end_b - start_b, n_rating, b_rating,
887 n_certainty, b_certainty, n_valid_permuter, b_valid_permuter);
895 for (
int i = 0; i < out_words.
size(); ++i)
896 best_words->push_back(out_words[i]);
897 return num_new - num_best;
908 tprintf(
"Trying word using lang %s, oem %d\n",
913 (this->*recognizer)(word_data, in_word, &new_words);
914 if (new_words.
empty()) {
921 for (
int i = 0; i < new_words.
size(); ++i)
922 new_words[i]->DebugTopChoice(
"Lang result");
928 debug, &new_words, best_words);
933 for (
int w = 0; w < words.
size(); ++w) {
934 if (words[w]->tess_failed || !words[w]->tess_accepted)
return false;
939 #ifndef DISABLED_LEGACY_ENGINE
945 bool* make_next_word_fuzzy) {
946 *make_next_word_fuzzy =
false;
960 &word_wanted, &overlapped_any_blob,
968 int num_overlapped = 0;
969 int num_overlapped_used = 0;
970 for (
int i = 0; i < overlapped_any_blob.
size(); ++i) {
971 if (overlapped_any_blob[i]) {
973 if (word_wanted[i]) ++num_overlapped_used;
977 outlines[i] =
nullptr;
983 int non_overlapped = 0;
984 int non_overlapped_used = 0;
985 for (
int i = 0; i < word_wanted.
size(); ++i) {
986 if (word_wanted[i]) ++non_overlapped_used;
987 if (outlines[i] !=
nullptr) ++non_overlapped_used;
990 tprintf(
"Used %d/%d overlapped %d/%d non-overlaped diacritics on word:",
991 num_overlapped_used, num_overlapped, non_overlapped_used,
997 make_next_word_fuzzy)) {
1002 return num_overlapped_used != 0 || non_overlapped_used != 0;
1024 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1025 C_BLOB* blob = blob_it.data();
1028 int num_blob_outlines = 0;
1029 for (
int i = 0; i < outlines.
size(); ++i) {
1031 !(*word_wanted)[i]) {
1032 blob_wanted[i] =
true;
1033 (*overlapped_any_blob)[i] =
true;
1034 ++num_blob_outlines;
1038 tprintf(
"%d noise outlines overlap blob at:", num_blob_outlines);
1047 outlines, num_blob_outlines,
1049 for (
int i = 0; i < blob_wanted.
size(); ++i) {
1050 if (blob_wanted[i]) {
1052 (*word_wanted)[i] =
true;
1053 (*target_blobs)[i] = blob;
1071 for (
int i = 0; i < outlines.
size(); ++i) {
1072 if (outlines[i] ==
nullptr)
continue;
1075 int num_blob_outlines = 0;
1076 TBOX total_ol_box(outlines[i]->bounding_box());
1077 while (i < outlines.
size() && outlines[i] !=
nullptr) {
1078 blob_wanted[i] =
true;
1079 total_ol_box += outlines[i]->bounding_box();
1081 ++num_blob_outlines;
1085 while (!blob_it.at_last() &&
1086 blob_it.data_relative(1)->bounding_box().left() <=
1087 total_ol_box.
left()) {
1093 tprintf(
"Num blobless outlines = %d\n", num_blob_outlines);
1094 C_BLOB* left_blob = blob_it.data();
1096 C_BLOB* right_blob = blob_it.at_last() ? nullptr : blob_it.data_relative(1);
1097 if ((left_box.
x_overlap(total_ol_box) || right_blob ==
nullptr ||
1100 outlines, num_blob_outlines,
1103 for (
int j = 0; j < blob_wanted.
size(); ++j) {
1104 if (blob_wanted[j]) {
1105 (*word_wanted)[j] =
true;
1106 (*target_blobs)[j] = left_blob;
1109 }
else if (right_blob !=
nullptr &&
1113 right_blob, outlines,
1114 num_blob_outlines, &blob_wanted)) {
1116 for (
int j = 0; j < blob_wanted.
size(); ++j) {
1117 if (blob_wanted[j]) {
1118 (*word_wanted)[j] =
true;
1119 (*target_blobs)[j] = right_blob;
1123 outlines, num_blob_outlines,
1126 for (
int j = 0; j < blob_wanted.
size(); ++j) {
1127 if (blob_wanted[j]) {
1128 (*word_wanted)[j] =
true;
1129 (*target_blobs)[j] =
nullptr;
1144 float target_cert = certainty_threshold;
1145 if (blob !=
nullptr) {
1149 tprintf(
"No Noise blob classified as %s=%g(%g) at:", best_str.
c_str(),
1150 target_cert, target_c2);
1160 pr_it, blob, &all_str);
1163 for (
int i = 0; i < test_outlines.
size(); ++i) {
1164 if (test_outlines[i]) ol_box += outlines[i]->bounding_box();
1166 tprintf(
"All Noise blob classified as %s=%g, delta=%g at:",
1167 all_str.
c_str(), best_cert, best_cert - target_cert);
1173 while (num_outlines > 1 && best_index >= 0 &&
1174 (blob ==
nullptr || best_cert < target_cert || blob !=
nullptr)) {
1177 for (
int i = 0; i < outlines.
size(); ++i) {
1178 if (test_outlines[i]) {
1179 test_outlines[i] =
false;
1185 for (
int j = 0; j < outlines.
size(); ++j) {
1186 if (test_outlines[j]) ol_box += outlines[j]->bounding_box();
1187 tprintf(
"%d", test_outlines[j]);
1189 tprintf(
" blob classified as %s=%g, delta=%g) at:", str.
c_str(),
1190 cert, cert - target_cert);
1193 if (cert > best_cert) {
1196 best_outlines = test_outlines;
1198 test_outlines[i] =
true;
1201 if (best_index >= 0) {
1202 test_outlines[best_index] =
false;
1206 if (best_cert >= target_cert) {
1208 *ok_outlines = best_outlines;
1210 tprintf(
"%s noise combination ", blob ?
"Adding" :
"New");
1211 for (
int i = 0; i < best_outlines.
size(); ++i) {
1212 tprintf(
"%d", best_outlines[i]);
1214 tprintf(
" yields certainty %g, beating target of %g\n", best_cert,
1231 C_BLOB* local_blob =
nullptr;
1232 if (blob !=
nullptr) {
1234 ol_it.set_to_list(blob->
out_list());
1235 first_to_keep = ol_it.data();
1237 for (
int i = 0; i < ok_outlines.
size(); ++i) {
1238 if (ok_outlines[i]) {
1240 if (blob ==
nullptr) {
1241 local_blob =
new C_BLOB(outlines[i]);
1243 ol_it.set_to_list(blob->
out_list());
1245 ol_it.add_before_stay_put(outlines[i]);
1251 ol_it.move_to_first();
1252 if (first_to_keep ==
nullptr) {
1254 for (; !ol_it.empty(); ol_it.forward()) ol_it.extract();
1259 for (; ol_it.data() != first_to_keep; ol_it.forward()) {
1284 if (wd.word->raw_choice !=
nullptr) {
1285 tprintf(
"word xheight=%g, row=%g, range=[%g,%g]\n", word_res->
x_height,
1286 wd.row->x_height(), wd.word->raw_choice->min_x_height(),
1287 wd.word->raw_choice->max_x_height());
1289 tprintf(
"Got word with null raw choice xheight=%g, row=%g\n", word_res->
x_height,
1290 wd.row->x_height());
1294 if (wd.word->raw_choice !=
nullptr) {
1295 cert = wd.word->raw_choice->certainty();
1296 float rat = wd.word->raw_choice->rating();
1297 *c2 = rat > 0.0f ? cert * cert / rat : 0.0f;
1298 *best_str = wd.word->raw_choice->unichar_string();
1308 #endif // ndef DISABLED_LEGACY_ENGINE
1320 #ifdef DISABLED_LEGACY_ENGINE
1325 #endif // def DISABLED_LEGACY_ENGINE
1331 clock_t start_t = clock();
1334 tprintf(
"%s word with lang %s at:",
1335 word->
done ?
"Already done" :
"Processing",
1345 int sub = sub_langs_.size();
1346 if (most_recently_used_ !=
this) {
1348 for (sub = 0; sub < sub_langs_.size() &&
1349 most_recently_used_ != sub_langs_[sub]; ++sub) {}
1352 *word_data, recognizer, debug, &word_data->
lang_words[sub], &best_words);
1353 Tesseract* best_lang_tess = most_recently_used_;
1354 if (!WordsAcceptable(best_words)) {
1356 if (most_recently_used_ !=
this &&
1360 best_lang_tess =
this;
1362 for (
int i = 0; !WordsAcceptable(best_words) && i < sub_langs_.size();
1364 if (most_recently_used_ != sub_langs_[i] &&
1368 best_lang_tess = sub_langs_[i];
1372 most_recently_used_ = best_lang_tess;
1373 if (!best_words.
empty()) {
1374 if (best_words.
size() == 1 && !best_words[0]->combination) {
1379 word_data->
word = best_words.
back();
1386 clock_t ocr_t = clock();
1388 tprintf(
"%s (ocr took %.2f sec)\n",
1390 static_cast<double>(ocr_t-start_t)/CLOCKS_PER_SEC);
1403 ROW* row = word_data.
row;
1407 #ifndef ANDROID_BUILD
1408 #ifdef DISABLED_LEGACY_ENGINE
1413 #endif // def DISABLED_LEGACY_ENGINE
1416 if (!out_words->
empty())
1425 #ifndef DISABLED_LEGACY_ENGINE
1432 #endif // ndef DISABLED_LEGACY_ENGINE
1434 #endif // ndef ANDROID_BUILD
1436 #ifndef DISABLED_LEGACY_ENGINE
1457 #endif // ndef DISABLED_LEGACY_ENGINE
1463 tprintf(
"New XHT Match:%s = %s ",
1474 new_x_ht > 0.1 ?
"STILL DOUBT" :
"OK",
1475 accept_new_word ?
"ACCEPTED" :
"");
1478 #ifndef DISABLED_LEGACY_ENGINE
1486 if (original_misfits == 0)
1488 float baseline_shift = 0.0f;
1490 if (baseline_shift != 0.0f) {
1496 if (original_misfits > 0) {
1497 float new_baseline_shift;
1519 float baseline_shift,
float new_x_ht,
1521 bool accept_new_x_ht =
false;
1538 tprintf(
"Old misfits=%d with x-height %f, new=%d with x-height %f\n",
1540 new_misfits, new_x_ht);
1541 tprintf(
"Old rating= %f, certainty=%f, new=%f, %f\n",
1547 accept_new_x_ht = new_misfits < original_misfits &&
1556 if (accept_new_x_ht) {
1563 #endif // ndef DISABLED_LEGACY_ENGINE
1578 #ifndef DISABLED_LEGACY_ENGINE
1579 ROW* row = word_data.
row;
1606 #ifndef GRAPHICS_DISABLED
1620 #endif // ndef DISABLED_LEGACY_ENGINE
1623 #ifndef DISABLED_LEGACY_ENGINE
1641 tprintf(
"POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;"
1658 #endif // ndef DISABLED_LEGACY_ENGINE
1669 if (choice !=
nullptr) {
1670 if (best_choice ==
nullptr || choice->
rating() < best_choice->
rating())
1671 best_choice = choice;
1680 static void CorrectRepcharChoices(
BLOB_CHOICE* blob_choice,
1686 if (choice ==
nullptr) {
1688 choice_it.add_before_stay_put(
new BLOB_CHOICE(*blob_choice));
1692 for (
int i = 0; i < word->
length(); ++i) {
1711 for (
int i = 0; i < word.
length(); ++i) {
1717 int max_count = rep_ch.MaxCount(&maxch_id);
1719 BLOB_CHOICE* best_choice = FindBestMatchingChoice(maxch_id, word_res);
1720 if (best_choice ==
nullptr) {
1721 tprintf(
"Failed to find a choice for %s, occurring %d times\n",
1725 word_res->
done =
true;
1731 C_BLOB* prev_blob = blob_it.data();
1732 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1733 C_BLOB* blob = blob_it.data();
1735 gap -= prev_blob->bounding_box().right();
1740 CorrectRepcharChoices(best_choice, word_res);
1745 const UNICHARSET& char_set,
const char *s,
const char *lengths) {
1748 int leading_punct_count;
1749 int upper_count = 0;
1750 int hyphen_pos = -1;
1753 if (strlen (lengths) > 20)
1759 offset += lengths[i++];
1760 leading_punct_count = i;
1763 while (s[offset] !=
'\0' && char_set.
get_isupper(s + offset, lengths[i])) {
1764 offset += lengths[i++];
1767 if (upper_count > 1) {
1771 while (s[offset] !=
'\0' && char_set.
get_islower(s + offset, lengths[i])) {
1772 offset += lengths[i++];
1780 if (lengths[i] == 1 && s[offset] ==
'-') {
1782 offset += lengths[i++];
1783 if (s[offset] !=
'\0') {
1784 while ((s[offset] !=
'\0') &&
1786 offset += lengths[i++];
1788 if (i < hyphen_pos + 3)
1793 if (lengths[i] == 1 && (s[offset] ==
'\'') &&
1794 lengths[i + 1] == 1 && (s[offset + lengths[i]] ==
's')) {
1795 offset += lengths[i++];
1796 offset += lengths[i++];
1799 if (upper_count > 0)
1806 if (lengths[i] == 1 && s[offset] !=
'\0' &&
1808 offset += lengths[i++];
1809 if (lengths[i] == 1 && s[offset] !=
'\0' && i > 0 &&
1810 s[offset - lengths[i - 1]] != s[offset] &&
1812 offset += lengths[i++];
1814 if (s[offset] !=
'\0')
1823 if (s[0] !=
'\0' && char_set.
get_isupper(s, lengths[0])) {
1825 while (s[offset] !=
'\0' &&
1827 lengths[i + 1] == 1 && s[offset + lengths[i]] ==
'.') {
1828 offset += lengths[i++];
1829 offset += lengths[i++];
1832 else if (s[0] !=
'\0' && char_set.
get_islower(s, lengths[0])) {
1834 while (s[offset] !=
'\0' &&
1836 lengths[i + 1] == 1 && s[offset + lengths[i]] ==
'.') {
1837 offset += lengths[i++];
1838 offset += lengths[i++];
1841 if (s[offset] !=
'\0')
1849 bool show_map_detail =
false;
1866 tprintf (
"classify_word_pass1 start\n");
1870 tprintf (
"make_reject_map: initial map");
1873 tprintf (
"make_reject_map: after NN");
1876 tprintf (
"classify_word_pass2 - START");
1879 tprintf (
"classify_word_pass2 - Pre Xht");
1882 tprintf (
"classify_word_pass2 - END");
1883 show_map_detail =
true;
1895 tprintf (
"After Poor quality rejection");
1898 tprintf (
"unrej_good_quality_words - START");
1901 tprintf (
"unrej_good_quality_words - END");
1904 tprintf (
"Write results pass");
1905 show_map_detail =
true;
1912 if (show_map_detail) {
1920 tprintf(
"null best choice\n");
1923 tprintf (
"Done flag: %s\n\n", word->
done ?
"TRUE" :
"FALSE");
1935 static void find_modal_font(
1944 font = static_cast<int16_t>(fonts->
mode ());
1947 *font_count =
count < INT8_MAX ?
count : INT8_MAX;
1948 fonts->
add (font, -*font_count);
1967 #ifndef DISABLED_LEGACY_ENGINE
1969 if (fontinfo_size == 0)
return;
1975 tprintf(
"Examining fonts in %s\n",
1980 if (choice ==
nullptr)
continue;
1982 for (
int f = 0; f < fonts.
size(); ++f) {
1983 const int fontinfo_id = fonts[f].fontinfo_id;
1984 if (0 <= fontinfo_id && fontinfo_id < fontinfo_size) {
1985 font_total_score[fontinfo_id] += fonts[f].score;
1990 int score1 = 0, score2 = 0;
1991 int16_t font_id1 = -1, font_id2 = -1;
1992 for (
int f = 0; f < fontinfo_size; ++f) {
1994 tprintf(
"Font %s, total score = %d\n",
1997 if (font_total_score[f] > score1) {
1999 font_id2 = font_id1;
2000 score1 = font_total_score[f];
2002 }
else if (font_total_score[f] > score2) {
2003 score2 = font_total_score[f];
2017 tprintf(
"Word modal font=%s, score=%d, 2nd choice %s/%d\n",
2022 tprintf(
"Word modal font=%s, score=%d. No 2nd choice\n",
2027 #endif // ndef DISABLED_LEGACY_ENGINE
2030 #ifndef DISABLED_LEGACY_ENGINE
2039 STATS doc_fonts(0, font_table_size_);
2044 word = page_res_it.
word();
2053 int8_t doc_font_count;
2054 find_modal_font(&doc_fonts, &doc_font, &doc_font_count);
2055 if (doc_font_count == 0)
2058 const FontInfo* modal_font =
nullptr;
2061 word = page_res_it.
word();
2076 word = page_res_it.
word();
2080 if (!(
count == length || (length > 3 &&
count >= length * 3 / 4))) {
2087 #endif // ndef DISABLED_LEGACY_ENGINE
2096 if (word->best_choices.singleton())
2100 if (word->tesseract->getDict().valid_word(*best) != 0)
2103 WERD_CHOICE_IT choice_it(&word->best_choices);
2104 for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2105 choice_it.forward()) {
2107 if (word->tesseract->getDict().valid_word(*alternate)) {
2110 tprintf(
"Dictionary correction replaces best choice '%s' with '%s'\n",
2115 word->ReplaceBestChoice(alternate);