tesseract  4.0.0-1-g2a2b
reject.h File Reference

Go to the source code of this file.

Functions

void reject_blanks (WERD_RES *word)
 
void reject_poor_matches (WERD_RES *word)
 
float compute_reject_threshold (WERD_CHOICE *word)
 
bool word_contains_non_1_digit (const char *word, const char *word_lengths)
 
void dont_allow_1Il (WERD_RES *word)
 
void flip_hyphens (WERD_RES *word)
 
void flip_0O (WERD_RES *word)
 
bool non_0_digit (const char *str, int length)
 

Function Documentation

◆ compute_reject_threshold()

float compute_reject_threshold ( WERD_CHOICE word)

Definition at line 233 of file reject.cpp.

233  {
234  float threshold; // rejection threshold
235  float bestgap = 0.0f; // biggest gap
236  float gapstart; // bottom of gap
237 
238  int blob_count = word->length();
239  GenericVector<float> ratings;
240  ratings.resize_no_init(blob_count);
241  for (int i = 0; i < blob_count; ++i) {
242  ratings[i] = word->certainty(i);
243  }
244  ratings.sort();
245  gapstart = ratings[0] - 1; // all reject if none better
246  if (blob_count >= 3) {
247  for (int index = 0; index < blob_count - 1; index++) {
248  if (ratings[index + 1] - ratings[index] > bestgap) {
249  bestgap = ratings[index + 1] - ratings[index];
250  // find biggest
251  gapstart = ratings[index];
252  }
253  }
254  }
255  threshold = gapstart + bestgap / 2;
256 
257  return threshold;
258 }
void resize_no_init(int size)
Definition: genericvector.h:65
float certainty() const
Definition: ratngs.h:330
int length() const
Definition: ratngs.h:303

◆ dont_allow_1Il()

void dont_allow_1Il ( WERD_RES word)

◆ flip_0O()

void flip_0O ( WERD_RES word)

◆ flip_hyphens()

void flip_hyphens ( WERD_RES word)

◆ non_0_digit()

bool non_0_digit ( const char *  str,
int  length 
)

◆ reject_blanks()

void reject_blanks ( WERD_RES word)

Definition at line 185 of file reject.cpp.

185  {
186  int16_t i;
187  int16_t offset;
188 
189  for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
190  offset += word->best_choice->unichar_lengths()[i], i += 1) {
191  if (word->best_choice->unichar_string()[offset] == ' ')
192  //rej unrecognised blobs
193  word->reject_map[i].setrej_tess_failure ();
194  }
195 }
REJMAP reject_map
Definition: pageres.h:287
const STRING & unichar_lengths() const
Definition: ratngs.h:548
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ reject_poor_matches()

void reject_poor_matches ( WERD_RES word)

Definition at line 214 of file reject.cpp.

214  {
215  float threshold = compute_reject_threshold(word->best_choice);
216  for (int i = 0; i < word->best_choice->length(); ++i) {
217  if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
218  word->reject_map[i].setrej_tess_failure();
219  else if (word->best_choice->certainty(i) < threshold)
220  word->reject_map[i].setrej_poor_match();
221  }
222 }
float compute_reject_threshold(WERD_CHOICE *word)
Definition: reject.cpp:233
REJMAP reject_map
Definition: pageres.h:287
float certainty() const
Definition: ratngs.h:330
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ word_contains_non_1_digit()

bool word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)