tesseract  4.0.0-1-g2a2b
stopper.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: stopper.c
3  ** Purpose: Stopping criteria for word classifier.
4  ** Author: Dan Johnson
5  ** History: Mon Apr 29 14:56:49 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 #include <cstdio>
20 #include <cstring>
21 #include <cctype>
22 #include <cmath>
23 
24 #include "stopper.h"
25 #include "ambigs.h"
26 #include "ccutil.h"
27 #include "dict.h"
28 #include "helpers.h"
29 #include "matchdefs.h"
30 #include "pageres.h"
31 #include "params.h"
32 #include "ratngs.h"
33 #include "unichar.h"
34 
35 /*----------------------------------------------------------------------------
36  Private Code
37 ----------------------------------------------------------------------------*/
38 
39 namespace tesseract {
40 
41 bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
42  XHeightConsistencyEnum xheight_consistency) {
43  float CertaintyThreshold = stopper_nondict_certainty_base;
44  int WordSize;
45 
46  if (stopper_no_acceptable_choices) return false;
47 
48  if (best_choice.length() == 0) return false;
49 
50  bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
51  bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
52  bool is_case_ok = case_ok(best_choice, getUnicharset());
53 
54  if (stopper_debug_level >= 1) {
55  const char *xht = "UNKNOWN";
56  switch (xheight_consistency) {
57  case XH_GOOD: xht = "NORMAL"; break;
58  case XH_SUBNORMAL: xht = "SUBNORMAL"; break;
59  case XH_INCONSISTENT: xht = "INCONSISTENT"; break;
60  default: xht = "UNKNOWN";
61  }
62  tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
63  best_choice.unichar_string().string(),
64  (is_valid_word ? 'y' : 'n'),
65  (is_case_ok ? 'y' : 'n'),
66  xht,
67  best_choice.min_x_height(),
68  best_choice.max_x_height());
69  }
70  // Do not accept invalid words in PASS1.
71  if (reject_offset_ <= 0.0f && !is_valid_word) return false;
72  if (is_valid_word && is_case_ok) {
73  WordSize = LengthOfShortestAlphaRun(best_choice);
74  WordSize -= stopper_smallword_size;
75  if (WordSize < 0)
76  WordSize = 0;
77  CertaintyThreshold += WordSize * stopper_certainty_per_char;
78  }
79 
80  if (stopper_debug_level >= 1)
81  tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
82  best_choice.rating(), best_choice.certainty(), CertaintyThreshold);
83 
84  if (no_dang_ambigs &&
85  best_choice.certainty() > CertaintyThreshold &&
86  xheight_consistency < XH_INCONSISTENT &&
87  UniformCertainties(best_choice)) {
88  return true;
89  } else {
90  if (stopper_debug_level >= 1) {
91  tprintf("AcceptableChoice() returned false"
92  " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
93  no_dang_ambigs, best_choice.certainty(),
94  CertaintyThreshold,
95  UniformCertainties(best_choice));
96  }
97  return false;
98  }
99 }
100 
101 bool Dict::AcceptableResult(WERD_RES *word) const {
102  if (word->best_choice == nullptr) return false;
103  float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
104  int WordSize;
105 
106  if (stopper_debug_level >= 1) {
107  tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n",
108  word->best_choice->debug_string().string(),
109  (valid_word(*word->best_choice) ? 'y' : 'n'),
110  (case_ok(*word->best_choice, getUnicharset()) ? 'y' : 'n'),
111  word->best_choice->dangerous_ambig_found() ? 'n' : 'y',
112  word->best_choices.singleton() ? 'n' : 'y');
113  }
114 
115  if (word->best_choice->length() == 0 || !word->best_choices.singleton())
116  return false;
117  if (valid_word(*word->best_choice) &&
118  case_ok(*word->best_choice, getUnicharset())) {
119  WordSize = LengthOfShortestAlphaRun(*word->best_choice);
120  WordSize -= stopper_smallword_size;
121  if (WordSize < 0)
122  WordSize = 0;
123  CertaintyThreshold += WordSize * stopper_certainty_per_char;
124  }
125 
126  if (stopper_debug_level >= 1)
127  tprintf("Rejecter: Certainty = %4.1f, Threshold = %4.1f ",
128  word->best_choice->certainty(), CertaintyThreshold);
129 
130  if (word->best_choice->certainty() > CertaintyThreshold &&
132  if (stopper_debug_level >= 1)
133  tprintf("ACCEPTED\n");
134  return true;
135  } else {
136  if (stopper_debug_level >= 1)
137  tprintf("REJECTED\n");
138  return false;
139  }
140 }
141 
143  DANGERR *fixpt,
144  bool fix_replaceable,
145  MATRIX *ratings) {
146  if (stopper_debug_level > 2) {
147  tprintf("\nRunning NoDangerousAmbig() for %s\n",
148  best_choice->debug_string().string());
149  }
150 
151  // Construct BLOB_CHOICE_LIST_VECTOR with ambiguities
152  // for each unichar id in BestChoice.
153  BLOB_CHOICE_LIST_VECTOR ambig_blob_choices;
154  int i;
155  bool ambigs_found = false;
156  // For each position in best_choice:
157  // -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i]
158  // -- initialize wrong_ngram with a single unichar_id at best_choice[i]
159  // -- look for ambiguities corresponding to wrong_ngram in the list while
160  // adding the following unichar_ids from best_choice to wrong_ngram
161  //
162  // Repeat the above procedure twice: first time look through
163  // ambigs to be replaced and replace all the ambiguities found;
164  // second time look through dangerous ambiguities and construct
165  // ambig_blob_choices with fake a blob choice for each ambiguity
166  // and pass them to dawg_permute_and_select() to search for
167  // ambiguous words in the dictionaries.
168  //
169  // Note that during the execution of the for loop (on the first pass)
170  // if replacements are made the length of best_choice might change.
171  for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) {
172  bool replace = (fix_replaceable && pass == 0);
173  const UnicharAmbigsVector &table = replace ?
175  if (!replace) {
176  // Initialize ambig_blob_choices with lists containing a single
177  // unichar id for the corresponding position in best_choice.
178  // best_choice consisting from only the original letters will
179  // have a rating of 0.0.
180  for (i = 0; i < best_choice->length(); ++i) {
181  BLOB_CHOICE_LIST *lst = new BLOB_CHOICE_LIST();
182  BLOB_CHOICE_IT lst_it(lst);
183  // TODO(rays/antonova) Put real xheights and y shifts here.
184  lst_it.add_to_end(new BLOB_CHOICE(best_choice->unichar_id(i),
185  0.0, 0.0, -1, 0, 1, 0, BCC_AMBIG));
186  ambig_blob_choices.push_back(lst);
187  }
188  }
189  UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
190  int wrong_ngram_index;
191  int next_index;
192  int blob_index = 0;
193  for (i = 0; i < best_choice->length(); blob_index += best_choice->state(i),
194  ++i) {
195  UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i);
196  if (stopper_debug_level > 2) {
197  tprintf("Looking for %s ngrams starting with %s:\n",
198  replace ? "replaceable" : "ambiguous",
199  getUnicharset().debug_str(curr_unichar_id).string());
200  }
201  int num_wrong_blobs = best_choice->state(i);
202  wrong_ngram_index = 0;
203  wrong_ngram[wrong_ngram_index] = curr_unichar_id;
204  if (curr_unichar_id == INVALID_UNICHAR_ID ||
205  curr_unichar_id >= table.size() ||
206  table[curr_unichar_id] == nullptr) {
207  continue; // there is no ambig spec for this unichar id
208  }
209  AmbigSpec_IT spec_it(table[curr_unichar_id]);
210  for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) {
211  const AmbigSpec *ambig_spec = spec_it.data();
212  wrong_ngram[wrong_ngram_index+1] = INVALID_UNICHAR_ID;
213  int compare = UnicharIdArrayUtils::compare(wrong_ngram,
214  ambig_spec->wrong_ngram);
215  if (stopper_debug_level > 2) {
216  tprintf("candidate ngram: ");
218  tprintf("current ngram from spec: ");
220  tprintf("comparison result: %d\n", compare);
221  }
222  if (compare == 0) {
223  // Record the place where we found an ambiguity.
224  if (fixpt != nullptr) {
225  UNICHAR_ID leftmost_id = ambig_spec->correct_fragments[0];
226  fixpt->push_back(DANGERR_INFO(
227  blob_index, blob_index + num_wrong_blobs, replace,
228  getUnicharset().get_isngram(ambig_spec->correct_ngram_id),
229  leftmost_id));
230  if (stopper_debug_level > 1) {
231  tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index,
232  blob_index + num_wrong_blobs, false,
233  getUnicharset().get_isngram(
234  ambig_spec->correct_ngram_id),
235  getUnicharset().id_to_unichar(leftmost_id));
236  }
237  }
238 
239  if (replace) {
240  if (stopper_debug_level > 2) {
241  tprintf("replace ambiguity with %s : ",
242  getUnicharset().id_to_unichar(
243  ambig_spec->correct_ngram_id));
245  ambig_spec->correct_fragments, getUnicharset());
246  }
247  ReplaceAmbig(i, ambig_spec->wrong_ngram_size,
248  ambig_spec->correct_ngram_id,
249  best_choice, ratings);
250  } else if (i > 0 || ambig_spec->type != CASE_AMBIG) {
251  // We found dang ambig - update ambig_blob_choices.
252  if (stopper_debug_level > 2) {
253  tprintf("found ambiguity: ");
255  ambig_spec->correct_fragments, getUnicharset());
256  }
257  ambigs_found = true;
258  for (int tmp_index = 0; tmp_index <= wrong_ngram_index;
259  ++tmp_index) {
260  // Add a blob choice for the corresponding fragment of the
261  // ambiguity. These fake blob choices are initialized with
262  // negative ratings (which are not possible for real blob
263  // choices), so that dawg_permute_and_select() considers any
264  // word not consisting of only the original letters a better
265  // choice and stops searching for alternatives once such a
266  // choice is found.
267  BLOB_CHOICE_IT bc_it(ambig_blob_choices[i+tmp_index]);
268  bc_it.add_to_end(new BLOB_CHOICE(
269  ambig_spec->correct_fragments[tmp_index], -1.0, 0.0,
270  -1, 0, 1, 0, BCC_AMBIG));
271  }
272  }
273  spec_it.forward();
274  } else if (compare == -1) {
275  if (wrong_ngram_index+1 < ambig_spec->wrong_ngram_size &&
276  ((next_index = wrong_ngram_index+1+i) < best_choice->length())) {
277  // Add the next unichar id to wrong_ngram and keep looking for
278  // more ambigs starting with curr_unichar_id in AMBIG_SPEC_LIST.
279  wrong_ngram[++wrong_ngram_index] =
280  best_choice->unichar_id(next_index);
281  num_wrong_blobs += best_choice->state(next_index);
282  } else {
283  break; // no more matching ambigs in this AMBIG_SPEC_LIST
284  }
285  } else {
286  spec_it.forward();
287  }
288  } // end searching AmbigSpec_LIST
289  } // end searching best_choice
290  } // end searching replace and dangerous ambigs
291 
292  // If any ambiguities were found permute the constructed ambig_blob_choices
293  // to see if an alternative dictionary word can be found.
294  if (ambigs_found) {
295  if (stopper_debug_level > 2) {
296  tprintf("\nResulting ambig_blob_choices:\n");
297  for (i = 0; i < ambig_blob_choices.length(); ++i) {
298  print_ratings_list("", ambig_blob_choices.get(i), getUnicharset());
299  tprintf("\n");
300  }
301  }
302  WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0);
303  ambigs_found = (alt_word->rating() < 0.0);
304  if (ambigs_found) {
305  if (stopper_debug_level >= 1) {
306  tprintf ("Stopper: Possible ambiguous word = %s\n",
307  alt_word->debug_string().string());
308  }
309  if (fixpt != nullptr) {
310  // Note: Currently character choices combined from fragments can only
311  // be generated by NoDangrousAmbigs(). This code should be updated if
312  // the capability to produce classifications combined from character
313  // fragments is added to other functions.
314  int orig_i = 0;
315  for (i = 0; i < alt_word->length(); ++i) {
316  const UNICHARSET &uchset = getUnicharset();
317  bool replacement_is_ngram =
318  uchset.get_isngram(alt_word->unichar_id(i));
319  UNICHAR_ID leftmost_id = alt_word->unichar_id(i);
320  if (replacement_is_ngram) {
321  // we have to extract the leftmost unichar from the ngram.
322  const char *str = uchset.id_to_unichar(leftmost_id);
323  int step = uchset.step(str);
324  if (step) leftmost_id = uchset.unichar_to_id(str, step);
325  }
326  int end_i = orig_i + alt_word->state(i);
327  if (alt_word->state(i) > 1 ||
328  (orig_i + 1 == end_i && replacement_is_ngram)) {
329  // Compute proper blob indices.
330  int blob_start = 0;
331  for (int j = 0; j < orig_i; ++j)
332  blob_start += best_choice->state(j);
333  int blob_end = blob_start;
334  for (int j = orig_i; j < end_i; ++j)
335  blob_end += best_choice->state(j);
336  fixpt->push_back(DANGERR_INFO(blob_start, blob_end, true,
337  replacement_is_ngram, leftmost_id));
338  if (stopper_debug_level > 1) {
339  tprintf("fixpt->dangerous+=(%d %d %d %d %s)\n", orig_i, end_i,
340  true, replacement_is_ngram,
341  uchset.id_to_unichar(leftmost_id));
342  }
343  }
344  orig_i += alt_word->state(i);
345  }
346  }
347  }
348  delete alt_word;
349  }
350  if (output_ambig_words_file_ != nullptr) {
351  fprintf(output_ambig_words_file_, "\n");
352  }
353 
354  ambig_blob_choices.delete_data_pointers();
355  return !ambigs_found;
356 }
357 
359 
361  reject_offset_ = 0.0;
362 }
363 
366 }
367 
368 void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
369  UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice,
370  MATRIX *ratings) {
371  int num_blobs_to_replace = 0;
372  int begin_blob_index = 0;
373  int i;
374  // Rating and certainty for the new BLOB_CHOICE are derived from the
375  // replaced choices.
376  float new_rating = 0.0f;
377  float new_certainty = 0.0f;
378  BLOB_CHOICE* old_choice = nullptr;
379  for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
380  if (i >= wrong_ngram_begin_index) {
381  int num_blobs = werd_choice->state(i);
382  int col = begin_blob_index + num_blobs_to_replace;
383  int row = col + num_blobs - 1;
384  BLOB_CHOICE_LIST* choices = ratings->get(col, row);
385  ASSERT_HOST(choices != nullptr);
386  old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);
387  ASSERT_HOST(old_choice != nullptr);
388  new_rating += old_choice->rating();
389  new_certainty += old_choice->certainty();
390  num_blobs_to_replace += num_blobs;
391  } else {
392  begin_blob_index += werd_choice->state(i);
393  }
394  }
395  new_certainty /= wrong_ngram_size;
396  // If there is no entry in the ratings matrix, add it.
397  MATRIX_COORD coord(begin_blob_index,
398  begin_blob_index + num_blobs_to_replace - 1);
399  if (!coord.Valid(*ratings)) {
400  ratings->IncreaseBandSize(coord.row - coord.col + 1);
401  }
402  if (ratings->get(coord.col, coord.row) == nullptr)
403  ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);
404  BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row);
405  BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices);
406  if (choice != nullptr) {
407  // Already there. Upgrade if new rating better.
408  if (new_rating < choice->rating())
409  choice->set_rating(new_rating);
410  if (new_certainty < choice->certainty())
411  choice->set_certainty(new_certainty);
412  // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.
413  } else {
414  // Need a new choice with the correct_ngram_id.
415  choice = new BLOB_CHOICE(*old_choice);
416  choice->set_unichar_id(correct_ngram_id);
417  choice->set_rating(new_rating);
418  choice->set_certainty(new_certainty);
419  choice->set_classifier(BCC_AMBIG);
420  choice->set_matrix_cell(coord.col, coord.row);
421  BLOB_CHOICE_IT it (new_choices);
422  it.add_to_end(choice);
423  }
424  // Remove current unichar from werd_choice. On the last iteration
425  // set the correct replacement unichar instead of removing a unichar.
426  for (int replaced_count = 0; replaced_count < wrong_ngram_size;
427  ++replaced_count) {
428  if (replaced_count + 1 == wrong_ngram_size) {
429  werd_choice->set_blob_choice(wrong_ngram_begin_index,
430  num_blobs_to_replace, choice);
431  } else {
432  werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);
433  }
434  }
435  if (stopper_debug_level >= 1) {
436  werd_choice->print("ReplaceAmbig() ");
437  tprintf("Modified blob_choices: ");
438  print_ratings_list("\n", new_choices, getUnicharset());
439  }
440 }
441 
442 int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
443  int shortest = INT32_MAX;
444  int curr_len = 0;
445  for (int w = 0; w < WordChoice.length(); ++w) {
446  if (getUnicharset().get_isalpha(WordChoice.unichar_id(w))) {
447  curr_len++;
448  } else if (curr_len > 0) {
449  if (curr_len < shortest) shortest = curr_len;
450  curr_len = 0;
451  }
452  }
453  if (curr_len > 0 && curr_len < shortest) {
454  shortest = curr_len;
455  } else if (shortest == INT32_MAX) {
456  shortest = 0;
457  }
458  return shortest;
459 }
460 
462  float Certainty;
463  float WorstCertainty = FLT_MAX;
464  float CertaintyThreshold;
465  double TotalCertainty;
466  double TotalCertaintySquared;
467  double Variance;
468  float Mean, StdDev;
469  int word_length = word.length();
470 
471  if (word_length < 3)
472  return true;
473 
474  TotalCertainty = TotalCertaintySquared = 0.0;
475  for (int i = 0; i < word_length; ++i) {
476  Certainty = word.certainty(i);
477  TotalCertainty += Certainty;
478  TotalCertaintySquared += static_cast<double>(Certainty) * Certainty;
479  if (Certainty < WorstCertainty)
480  WorstCertainty = Certainty;
481  }
482 
483  // Subtract off worst certainty from statistics.
484  word_length--;
485  TotalCertainty -= WorstCertainty;
486  TotalCertaintySquared -= static_cast<double>(WorstCertainty) * WorstCertainty;
487 
488  Mean = TotalCertainty / word_length;
489  Variance = ((word_length * TotalCertaintySquared -
490  TotalCertainty * TotalCertainty) /
491  (word_length * (word_length - 1)));
492  if (Variance < 0.0)
493  Variance = 0.0;
494  StdDev = sqrt(Variance);
495 
496  CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev;
497  if (CertaintyThreshold > stopper_nondict_certainty_base)
498  CertaintyThreshold = stopper_nondict_certainty_base;
499 
500  if (word.certainty() < CertaintyThreshold) {
501  if (stopper_debug_level >= 1)
502  tprintf("Stopper: Non-uniform certainty = %4.1f"
503  " (m=%4.1f, s=%4.1f, t=%4.1f)\n",
504  word.certainty(), Mean, StdDev, CertaintyThreshold);
505  return false;
506  } else {
507  return true;
508  }
509 }
510 
511 } // namespace tesseract
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const
Check a string to see if it matches a set of lexical rules.
Definition: context.cpp:52
int UniformCertainties(const WERD_CHOICE &word)
Definition: stopper.cpp:461
float certainty() const
Definition: ratngs.h:83
int UNICHAR_ID
Definition: unichar.h:35
void remove_unichar_id(int index)
Definition: ratngs.h:484
int size() const
Definition: genericvector.h:71
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:753
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:167
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:360
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:104
#define MAX_AMBIG_SIZE
Definition: ambigs.h:30
const char * string() const
Definition: strngs.cpp:196
void print() const
Definition: ratngs.h:580
int state(int index) const
Definition: ratngs.h:319
void set_matrix_cell(int col, int row)
Definition: ratngs.h:157
uint8_t permuter() const
Definition: ratngs.h:346
bool AcceptableChoice(const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency)
Returns true if the given best_choice is good enough to stop.
Definition: stopper.cpp:41
bool stopper_no_acceptable_choices
Definition: dict.h:625
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
float rating() const
Definition: ratngs.h:327
float certainty() const
Definition: ratngs.h:330
int stopper_debug_level
Definition: dict.h:622
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:486
void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings)
Definition: stopper.cpp:368
void EndDangerousAmbigs()
Definition: stopper.cpp:358
static int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2)
Definition: ambigs.h:62
bool Valid(const MATRIX &m) const
Definition: matrix.h:615
bool dangerous_ambig_found() const
Definition: ratngs.h:363
float max_x_height() const
Definition: ratngs.h:339
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:54
bool get_isngram(UNICHAR_ID unichar_id) const
Definition: unicharset.h:521
void set_certainty(float newrat)
Definition: ratngs.h:151
T & get(int index) const
static void print(const UNICHAR_ID array[], const UNICHARSET &unicharset)
Definition: ambigs.h:98
void set_rating(float newrat)
Definition: ratngs.h:148
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:145
void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice)
Definition: ratngs.cpp:312
double stopper_allowable_character_badness
Definition: dict.h:621
double stopper_nondict_certainty_base
Definition: dict.h:613
const UnicharAmbigsVector & dang_ambigs() const
Definition: ambigs.h:152
int length() const
Definition: genericvector.h:85
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
XHeightConsistencyEnum
Definition: dict.h:75
void put(ICOORD pos, const T &thing)
Definition: matrix.h:220
int length() const
Definition: ratngs.h:303
float Mean(PROTOTYPE *Proto, uint16_t Dimension)
Definition: cluster.cpp:628
int step(const char *str) const
Definition: unicharset.cpp:232
float min_x_height() const
Definition: ratngs.h:336
int push_back(T object)
double stopper_certainty_per_char
Definition: dict.h:619
float rating() const
Definition: ratngs.h:80
UNICHAR_ID correct_ngram_id
Definition: ambigs.h:133
UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:132
const STRING debug_string() const
Definition: ratngs.h:505
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
void delete_data_pointers()
AmbigType type
Definition: ambigs.h:134
const UNICHARSET & getUnicharset() const
Definition: dict.h:98
const STRING & unichar_string() const
Definition: ratngs.h:541
int stopper_smallword_size
Definition: dict.h:617
bool AcceptableResult(WERD_RES *word) const
Definition: stopper.cpp:101
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:836
double stopper_phase2_certainty_rejection_offset
Definition: dict.h:615
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:180
Definition: matrix.h:575
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:364
WERD_CHOICE * dawg_permute_and_select(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit)
Definition: permdawg.cpp:174
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:459
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:131
WERD_CHOICE * best_choice
Definition: pageres.h:235
T get(ICOORD pos) const
Definition: matrix.h:228
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const
Returns the length of the shortest alpha run in WordChoice.
Definition: stopper.cpp:442
bool NoDangerousAmbig(WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, MATRIX *ratings)
Definition: stopper.cpp:142
const UnicharAmbigsVector & replace_ambigs() const
Definition: ambigs.h:153
#define ASSERT_HOST(x)
Definition: errcode.h:84