24 #ifndef DISABLED_LEGACY_ENGINE
49 if (best_choice.
length() == 0)
return false;
53 bool is_case_ok =
case_ok(best_choice);
56 const char *xht =
"UNKNOWN";
57 switch (xheight_consistency) {
58 case XH_GOOD: xht =
"NORMAL";
break;
61 default: xht =
"UNKNOWN";
63 tprintf(
"\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
65 (is_valid_word ?
'y' :
'n'),
66 (is_case_ok ?
'y' :
'n'),
72 if (reject_offset_ <= 0.0f && !is_valid_word)
return false;
73 if (is_valid_word && is_case_ok) {
82 tprintf(
"Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
86 best_choice.
certainty() > CertaintyThreshold &&
92 tprintf(
"AcceptableChoice() returned false"
93 " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
108 tprintf(
"\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n",
127 tprintf(
"Rejecter: Certainty = %4.1f, Threshold = %4.1f ",
142 #if !defined(DISABLED_LEGACY_ENGINE)
146 bool fix_replaceable,
149 tprintf(
"\nRunning NoDangerousAmbig() for %s\n",
157 bool ambigs_found =
false;
173 for (
int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) {
174 bool replace = (fix_replaceable && pass == 0);
182 for (i = 0; i < best_choice->
length(); ++i) {
183 auto *lst =
new BLOB_CHOICE_LIST();
184 BLOB_CHOICE_IT lst_it(lst);
192 int wrong_ngram_index;
195 for (i = 0; i < best_choice->
length(); blob_index += best_choice->
state(i),
199 tprintf(
"Looking for %s ngrams starting with %s:\n",
200 replace ?
"replaceable" :
"ambiguous",
203 int num_wrong_blobs = best_choice->
state(i);
204 wrong_ngram_index = 0;
205 wrong_ngram[wrong_ngram_index] = curr_unichar_id;
206 if (curr_unichar_id == INVALID_UNICHAR_ID ||
207 curr_unichar_id >= table.
size() ||
208 table[curr_unichar_id] ==
nullptr) {
211 AmbigSpec_IT spec_it(table[curr_unichar_id]);
212 for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) {
213 const AmbigSpec *ambig_spec = spec_it.data();
214 wrong_ngram[wrong_ngram_index+1] = INVALID_UNICHAR_ID;
220 tprintf(
"current ngram from spec: ");
222 tprintf(
"comparison result: %d\n", compare);
226 if (fixpt !=
nullptr) {
229 blob_index, blob_index + num_wrong_blobs, replace,
233 tprintf(
"fixpt+=(%d %d %d %d %s)\n", blob_index,
234 blob_index + num_wrong_blobs,
false,
243 tprintf(
"replace ambiguity with %s : ",
251 best_choice, ratings);
260 for (
int tmp_index = 0; tmp_index <= wrong_ngram_index;
269 BLOB_CHOICE_IT bc_it(ambig_blob_choices[i+tmp_index]);
276 }
else if (compare == -1) {
278 ((next_index = wrong_ngram_index+1+i) < best_choice->
length())) {
281 wrong_ngram[++wrong_ngram_index] =
283 num_wrong_blobs += best_choice->
state(next_index);
298 tprintf(
"\nResulting ambig_blob_choices:\n");
299 for (i = 0; i < ambig_blob_choices.
size(); ++i) {
305 ambigs_found = (alt_word->
rating() < 0.0);
308 tprintf (
"Stopper: Possible ambiguous word = %s\n",
311 if (fixpt !=
nullptr) {
317 for (i = 0; i < alt_word->
length(); ++i) {
319 bool replacement_is_ngram =
322 if (replacement_is_ngram) {
325 int step = uchset.
step(str);
328 int end_i = orig_i + alt_word->
state(i);
329 if (alt_word->
state(i) > 1 ||
330 (orig_i + 1 == end_i && replacement_is_ngram)) {
333 for (
int j = 0; j < orig_i; ++j)
334 blob_start += best_choice->
state(j);
335 int blob_end = blob_start;
336 for (
int j = orig_i; j < end_i; ++j)
337 blob_end += best_choice->
state(j);
339 replacement_is_ngram, leftmost_id));
341 tprintf(
"fixpt->dangerous+=(%d %d %d %d %s)\n", orig_i, end_i,
342 true, replacement_is_ngram,
346 orig_i += alt_word->
state(i);
352 if (output_ambig_words_file_ !=
nullptr) {
353 fprintf(output_ambig_words_file_,
"\n");
357 return !ambigs_found;
362 #endif // !defined(DISABLED_LEGACY_ENGINE)
365 reject_offset_ = 0.0;
375 int num_blobs_to_replace = 0;
376 int begin_blob_index = 0;
380 float new_rating = 0.0f;
381 float new_certainty = 0.0f;
383 for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
384 if (i >= wrong_ngram_begin_index) {
385 int num_blobs = werd_choice->
state(i);
386 int col = begin_blob_index + num_blobs_to_replace;
387 int row = col + num_blobs - 1;
388 BLOB_CHOICE_LIST* choices = ratings->
get(col, row);
392 new_rating += old_choice->
rating();
393 new_certainty += old_choice->
certainty();
394 num_blobs_to_replace += num_blobs;
396 begin_blob_index += werd_choice->
state(i);
399 new_certainty /= wrong_ngram_size;
402 begin_blob_index + num_blobs_to_replace - 1);
403 if (!coord.Valid(*ratings)) {
406 if (ratings->
get(coord.col, coord.row) ==
nullptr)
407 ratings->
put(coord.col, coord.row,
new BLOB_CHOICE_LIST);
408 BLOB_CHOICE_LIST* new_choices = ratings->
get(coord.col, coord.row);
410 if (choice !=
nullptr) {
412 if (new_rating < choice->rating())
414 if (new_certainty < choice->certainty())
425 BLOB_CHOICE_IT it (new_choices);
426 it.add_to_end(choice);
430 for (
int replaced_count = 0; replaced_count < wrong_ngram_size;
432 if (replaced_count + 1 == wrong_ngram_size) {
434 num_blobs_to_replace, choice);
440 werd_choice->
print(
"ReplaceAmbig() ");
441 tprintf(
"Modified blob_choices: ");
447 int shortest = INT32_MAX;
449 for (
int w = 0; w < WordChoice.
length(); ++w) {
452 }
else if (curr_len > 0) {
453 if (curr_len < shortest) shortest = curr_len;
457 if (curr_len > 0 && curr_len < shortest) {
459 }
else if (shortest == INT32_MAX) {
467 float WorstCertainty = FLT_MAX;
468 float CertaintyThreshold;
469 double TotalCertainty;
470 double TotalCertaintySquared;
473 int word_length = word.
length();
478 TotalCertainty = TotalCertaintySquared = 0.0;
479 for (
int i = 0; i < word_length; ++i) {
481 TotalCertainty += Certainty;
482 TotalCertaintySquared += static_cast<double>(Certainty) * Certainty;
483 if (Certainty < WorstCertainty)
484 WorstCertainty = Certainty;
489 TotalCertainty -= WorstCertainty;
490 TotalCertaintySquared -= static_cast<double>(WorstCertainty) * WorstCertainty;
492 Mean = TotalCertainty / word_length;
493 Variance = ((word_length * TotalCertaintySquared -
494 TotalCertainty * TotalCertainty) /
495 (word_length * (word_length - 1)));
498 StdDev = sqrt(Variance);
504 if (word.
certainty() < CertaintyThreshold) {
506 tprintf(
"Stopper: Non-uniform certainty = %4.1f"
507 " (m=%4.1f, s=%4.1f, t=%4.1f)\n",