22 #include "config_auto.h" 25 #ifdef DISABLED_LEGACY_ENGINE 71 if (word->
done && (pass == 1) && (!word_from_dict || word_is_ambig) &&
76 if (word->
done && ((!word_from_dict &&
146 for (i = 0, offset = 0;
164 tprintf(
"BAD tessedit_reject_mode\n");
174 tprintf(
"Certainty: %f Rating: %f\n",
235 float bestgap = 0.0f;
238 int blob_count = word->
length();
241 for (
int i = 0; i < blob_count; ++i) {
245 gapstart = ratings[0] - 1;
246 if (blob_count >= 3) {
247 for (
int index = 0; index < blob_count - 1; index++) {
248 if (ratings[index + 1] - ratings[index] > bestgap) {
249 bestgap = ratings[index + 1] - ratings[index];
251 gapstart = ratings[index];
255 threshold = gapstart + bestgap / 2;
278 for (
int blobindex = 0; blobindex < blobcount; blobindex++) {
284 word->
reject_map[blobindex].setrej_edge_char();
301 int16_t first_alphanum_index_;
302 int16_t first_alphanum_offset_;
305 bool non_conflict_set_char;
306 bool conflict =
false;
315 word_len = strlen(lengths);
328 for (i = 0, offset = 0, non_conflict_set_char =
false;
329 (i < word_len) && !non_conflict_set_char; offset += lengths[i++])
330 non_conflict_set_char =
334 if (!non_conflict_set_char) {
352 dict_word_ok = (dict_word_type > 0) &&
357 (dict_perm_type && dict_word_ok)) {
360 if (lengths[first_alphanum_index_] == 1 &&
361 word[first_alphanum_offset_] ==
'I') {
367 setrej_1Il_conflict();
376 if (lengths[first_alphanum_index_] == 1 &&
377 word[first_alphanum_offset_] ==
'l') {
383 setrej_1Il_conflict();
407 if (lengths[first_alphanum_index_] == 1 &&
408 word[first_alphanum_offset_] ==
'l') {
415 else if (lengths[first_alphanum_index_] == 1 &&
416 word[first_alphanum_offset_] ==
'I') {
435 for (i = 0, offset = 0; word[offset] !=
'\0';
437 if ((!allow_1s || (word[offset] !=
'1')) &&
440 word_res->
reject_map[i].setrej_1Il_conflict ();
457 setrej_1Il_conflict ();
475 const char *word_lengths) {
479 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
488 const char *word_lengths) {
492 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
501 const char *word_lengths) {
506 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
515 const char* word_lengths) {
519 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
521 (word_lengths[i] != 1 || word[offset] !=
'1'))
537 bool accepted_1Il =
false;
539 for (i = 0, offset = 0; i < word_len;
554 for (i = 0, offset = 0; i < word_len;
588 int16_t char_quality;
589 int16_t accepted_char_quality;
606 (char_quality == accepted_char_quality))
624 int prev_right = -9999;
634 for (i = 0; i < best_choice->
length() && i < num_blobs; ++i) {
637 if (i + 1 == num_blobs)
643 (out_box.
left() > prev_right) && (out_box.
right() < next_left)) {
644 aspect_ratio = out_box.
width() / (float) out_box.
height();
652 word_res->
reject_map[i].setrej_hyphen_accept();
659 else if (best_choice->
unichar_id(i) == unichar_dash) {
662 word_res->
reject_map[i].setrej_hyphen_accept();
671 prev_right = out_box.
right();
687 for (i = 0; i < best_choice->
length() && i < num_blobs; ++i) {
699 if (unichar_0 == INVALID_UNICHAR_ID ||
701 unichar_O == INVALID_UNICHAR_ID ||
705 for (i = 1; i < best_choice->
length(); ++i) {
706 if (best_choice->
unichar_id(i) == unichar_0 ||
709 if ((i+1) < best_choice->
length() &&
716 (i+1) < best_choice->
length() &&
719 (i+2) < best_choice->
length() &&
728 (((i+1) < best_choice->
length() &&
732 (i == best_choice->
length() - 1))) {
737 (i+1) < best_choice->
length() &&
743 (i+2) < best_choice->
length() &&
755 (i+2) < best_choice->
length() &&
765 (i+1) < best_choice->
length() &&
775 if (best_choice->
unichar_id(i-2) == unichar_O) {
778 while (i < best_choice->length() &&
791 return ch_set.
get_isupper(unichar_id) && !ch_set.
eq(unichar_id,
"O");
795 return ch_set.
get_isdigit(unichar_id) && !ch_set.
eq(unichar_id,
"0");
799 #endif // def DISABLED_LEGACY_ENGINE void set_unichar_id(UNICHAR_ID unichar_id, int index)
int16_t alpha_count(const char *word, const char *word_lengths)
int16_t first_alphanum_index(const char *word, const char *word_lengths)
void resize_no_init(int size)
void dont_allow_1Il(WERD_RES *word)
int dict_word(const WERD_CHOICE &word)
float compute_reject_threshold(WERD_CHOICE *word)
bool rej_1Il_use_dict_word
bool tessedit_rejection_debug
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
const char * string() const
TBOX bounding_box() const
bool word_contains_non_1_digit(const char *word, const char *word_lengths)
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
const int kBlnBaselineOffset
bool get_isalpha(UNICHAR_ID unichar_id) const
bool repeated_nonalphanum_wd(WERD_RES *word, ROW *row)
bool non_0_digit(const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
const TBOX & BlobBox(int index) const
bool dangerous_ambig_found() const
int16_t first_alphanum_offset(const char *word, const char *word_lengths)
double tessedit_lower_flip_hyphen
int tessedit_image_border
const STRING & unichar_lengths() const
bool rej_alphas_in_number_perm
bool non_O_upper(const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
bool get_isdigit(UNICHAR_ID unichar_id) const
CLISTIZEH(STRING) CLISTIZE(STRING) namespace tesseract
int16_t safe_dict_word(const WERD_RES *werd_res)
bool rej_1Il_trust_permuter_type
void make_reject_map(WERD_RES *word, ROW *row, int16_t pass)
void rej_word_bad_permuter()
char * ok_repeated_ch_non_alphanum_wds
void set_done(WERD_RES *word, int16_t pass)
bool get_enabled(UNICHAR_ID unichar_id) const
void reject_blanks(WERD_RES *word)
UNICHAR_ID unichar_id(int index) const
double tessedit_upper_flip_hyphen
DLLSYM void tprintf(const char *format,...)
TBOX bounding_box() const
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
void flip_0O(WERD_RES *word)
bool contains_unichar_id(UNICHAR_ID unichar_id) const
void reject_mostly_rejects(WERD_RES *word)
GenericVector< TBLOB * > blobs
void rej_word_small_xht()
tesseract::Tesseract * tesseract
void reject_poor_matches(WERD_RES *word)
bool contains(const char c) const
void reject_edge_blobs(WERD_RES *word)
const UNICHARSET * uch_set
const STRING & unichar_string() const
char * conflict_set_I_l_1
bool check_debug_pt(WERD_RES *word, int location)
bool rej_use_tess_accepted
void rej_word_not_tess_accepted()
bool get_isupper(UNICHAR_ID unichar_id) const
CLISTIZE(BLOCK_RES) ELISTIZE(ROW_RES) ELISTIZE(WERD_RES) static const double kStopperAmbiguityThresholdGain
bool one_ell_conflict(WERD_RES *word_res, bool update_map)
void word_char_quality(WERD_RES *word, ROW *row, int16_t *match_count, int16_t *accepted_match_count)
void rej_word_contains_blanks()
double rej_whole_of_mostly_reject_word_fract
void flip_hyphens(WERD_RES *word)
int16_t count_alphanums(const WERD_CHOICE &word)
WERD_CHOICE * best_choice
tesseract::BoxWord * box_word
void rej_word_mostly_rej()
void reject_I_1_L(WERD_RES *word)
void initialise(int16_t length)