tesseract  5.0.0-alpha-619-ge9db
reject.h File Reference

Go to the source code of this file.

Functions

void reject_blanks (WERD_RES *word)
 
void reject_poor_matches (WERD_RES *word)
 
float compute_reject_threshold (WERD_CHOICE *word)
 
bool word_contains_non_1_digit (const char *word, const char *word_lengths)
 
void dont_allow_1Il (WERD_RES *word)
 
void flip_hyphens (WERD_RES *word)
 
void flip_0O (WERD_RES *word)
 
bool non_0_digit (const char *str, int length)
 

Function Documentation

◆ compute_reject_threshold()

float compute_reject_threshold ( WERD_CHOICE word)

Definition at line 225 of file reject.cpp.

229  {
230  float threshold; // rejection threshold
231  float bestgap = 0.0f; // biggest gap
232  float gapstart; // bottom of gap
233 
234  int blob_count = word->length();
235  GenericVector<float> ratings;
236  ratings.resize_no_init(blob_count);
237  for (int i = 0; i < blob_count; ++i) {
238  ratings[i] = word->certainty(i);
239  }
240  ratings.sort();
241  gapstart = ratings[0] - 1; // all reject if none better
242  if (blob_count >= 3) {
243  for (int index = 0; index < blob_count - 1; index++) {
244  if (ratings[index + 1] - ratings[index] > bestgap) {
245  bestgap = ratings[index + 1] - ratings[index];
246  // find biggest
247  gapstart = ratings[index];
248  }
249  }
250  }

◆ dont_allow_1Il()

void dont_allow_1Il ( WERD_RES word)

◆ flip_0O()

void flip_0O ( WERD_RES word)

◆ flip_hyphens()

void flip_hyphens ( WERD_RES word)

◆ non_0_digit()

bool non_0_digit ( const char *  str,
int  length 
)

◆ reject_blanks()

void reject_blanks ( WERD_RES word)

Definition at line 178 of file reject.cpp.

181  {
182  int16_t i;
183  int16_t offset;
184 
185  for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
186  offset += word->best_choice->unichar_lengths()[i], i += 1) {
187  if (word->best_choice->unichar_string()[offset] == ' ')
188  //rej unrecognised blobs

◆ reject_poor_matches()

void reject_poor_matches ( WERD_RES word)

Definition at line 207 of file reject.cpp.

210  {
211  float threshold = compute_reject_threshold(word->best_choice);
212  for (int i = 0; i < word->best_choice->length(); ++i) {
213  if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
214  word->reject_map[i].setrej_tess_failure();
215  else if (word->best_choice->certainty(i) < threshold)

◆ word_contains_non_1_digit()

bool word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)
WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:529
WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:303
WERD_CHOICE::certainty
float certainty() const
Definition: ratngs.h:318
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
compute_reject_threshold
float compute_reject_threshold(WERD_CHOICE *word)
Definition: reject.cpp:225
UNICHAR_SPACE
Definition: unicharset.h:34
GenericVector::resize_no_init
void resize_no_init(int size)
Definition: genericvector.h:65
WERD_RES::reject_map
REJMAP reject_map
Definition: pageres.h:288
GenericVector< float >
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
GenericVector::sort
void sort()
Definition: genericvector.h:1102
WERD_CHOICE::unichar_lengths
const STRING & unichar_lengths() const
Definition: ratngs.h:536