tesseract  5.0.0-alpha-619-ge9db
adaptions.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: adaptions.cpp (Formerly adaptions.c)
3  * Description: Functions used to adapt to blobs already confidently
4  * identified
5  * Author: Chris Newton
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <cctype>
21 #include <cstring>
22 #include "tessvars.h"
23 #include "reject.h"
24 #include "control.h"
25 #include "stopper.h"
26 #include "tesseractclass.h"
27 
28 // Include automatically generated configuration file if running autoconf.
29 #ifdef HAVE_CONFIG_H
30 #include "config_auto.h"
31 #endif
32 
33 namespace tesseract {
34 bool Tesseract::word_adaptable( //should we adapt?
35  WERD_RES* word,
36  uint16_t mode) {
38  tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
39  word->best_choice->unichar_string().c_str(),
40  word->best_choice->rating(), word->best_choice->certainty());
41  }
42 
43  bool status = false;
44  BITS16 flags(mode);
45 
46  enum MODES
47  {
48  ADAPTABLE_WERD,
49  ACCEPTABLE_WERD,
50  CHECK_DAWGS,
51  CHECK_SPACES,
52  CHECK_ONE_ELL_CONFLICT,
53  CHECK_AMBIG_WERD
54  };
55 
56  /*
57  0: NO adaption
58  */
59  if (mode == 0) {
60  if (tessedit_adaption_debug) tprintf("adaption disabled\n");
61  return false;
62  }
63 
64  if (flags.bit (ADAPTABLE_WERD)) {
65  status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
66  if (tessedit_adaption_debug && !status) {
67  tprintf("tess_would_adapt bit is false\n");
68  }
69  }
70 
71  if (flags.bit (ACCEPTABLE_WERD)) {
72  status |= word->tess_accepted;
73  if (tessedit_adaption_debug && !status) {
74  tprintf("tess_accepted bit is false\n");
75  }
76  }
77 
78  if (!status) { // If not set then
79  return false; // ignore other checks
80  }
81 
82  if (flags.bit (CHECK_DAWGS) &&
83  (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
84  (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
85  (word->best_choice->permuter () != USER_DAWG_PERM) &&
86  (word->best_choice->permuter () != NUMBER_PERM)) {
87  if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
88  return false;
89  }
90 
91  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, false)) {
92  if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
93  return false;
94  }
95 
96  if (flags.bit (CHECK_SPACES) &&
97  (strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
98  if (tessedit_adaption_debug) tprintf("word contains spaces\n");
99  return false;
100  }
101 
102  if (flags.bit (CHECK_AMBIG_WERD) &&
104  if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
105  return false;
106  }
107 
109  tprintf("returning status %d\n", status);
110  }
111  return status;
112 }
113 
114 } // namespace tesseract
WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:529
tessvars.h
WERD_CHOICE::dangerous_ambig_found
bool dangerous_ambig_found() const
Definition: ratngs.h:351
tesseractclass.h
SYSTEM_DAWG_PERM
Definition: ratngs.h:239
control.h
WERD_CHOICE::certainty
float certainty() const
Definition: ratngs.h:318
BITS16
Definition: bits16.h:24
WERD_CHOICE::permuter
uint8_t permuter() const
Definition: ratngs.h:334
WERD_RES
Definition: pageres.h:160
stopper.h
tesseract::Tesseract::tessedit_adaption_debug
bool tessedit_adaption_debug
Definition: tesseractclass.h:821
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::Tesseract::word_adaptable
bool word_adaptable(WERD_RES *word, uint16_t mode)
Definition: adaptions.cpp:50
WERD_RES::tess_accepted
bool tess_accepted
Definition: pageres.h:297
tesseract
Definition: baseapi.h:65
WERD_RES::tess_would_adapt
bool tess_would_adapt
Definition: pageres.h:298
reject.h
tesseract::Tesseract::one_ell_conflict
bool one_ell_conflict(WERD_RES *word_res, bool update_map)
Definition: reject.cpp:291
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
WERD_CHOICE::rating
float rating() const
Definition: ratngs.h:315
FREQ_DAWG_PERM
Definition: ratngs.h:242
NUMBER_PERM
Definition: ratngs.h:237
USER_DAWG_PERM
Definition: ratngs.h:241