tesseract  4.0.0-1-g2a2b
adaptions.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: adaptions.cpp (Formerly adaptions.c)
3  * Description: Functions used to adapt to blobs already confidently
4  * identified
5  * Author: Chris Newton
6  * Created: Thu Oct 7 10:17:28 BST 1993
7  *
8  * (C) Copyright 1992, Hewlett-Packard Ltd.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  *
19  **********************************************************************/
20 
21 #include <cctype>
22 #include <cstring>
23 #include "tessvars.h"
24 #include "reject.h"
25 #include "control.h"
26 #include "stopper.h"
27 #include "tesseractclass.h"
28 
29 // Include automatically generated configuration file if running autoconf.
30 #ifdef HAVE_CONFIG_H
31 #include "config_auto.h"
32 #endif
33 
34 namespace tesseract {
35 bool Tesseract::word_adaptable( //should we adapt?
36  WERD_RES* word,
37  uint16_t mode) {
39  tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
41  word->best_choice->rating(), word->best_choice->certainty());
42  }
43 
44  BOOL8 status = FALSE;
45  BITS16 flags(mode);
46 
47  enum MODES
48  {
49  ADAPTABLE_WERD,
50  ACCEPTABLE_WERD,
51  CHECK_DAWGS,
52  CHECK_SPACES,
53  CHECK_ONE_ELL_CONFLICT,
54  CHECK_AMBIG_WERD
55  };
56 
57  /*
58  0: NO adaption
59  */
60  if (mode == 0) {
61  if (tessedit_adaption_debug) tprintf("adaption disabled\n");
62  return false;
63  }
64 
65  if (flags.bit (ADAPTABLE_WERD)) {
66  status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
67  if (tessedit_adaption_debug && !status) {
68  tprintf("tess_would_adapt bit is false\n");
69  }
70  }
71 
72  if (flags.bit (ACCEPTABLE_WERD)) {
73  status |= word->tess_accepted;
74  if (tessedit_adaption_debug && !status) {
75  tprintf("tess_accepted bit is false\n");
76  }
77  }
78 
79  if (!status) { // If not set then
80  return false; // ignore other checks
81  }
82 
83  if (flags.bit (CHECK_DAWGS) &&
84  (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
85  (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
86  (word->best_choice->permuter () != USER_DAWG_PERM) &&
87  (word->best_choice->permuter () != NUMBER_PERM)) {
88  if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
89  return false;
90  }
91 
92  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, false)) {
93  if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
94  return false;
95  }
96 
97  if (flags.bit (CHECK_SPACES) &&
98  (strchr(word->best_choice->unichar_string().string(), ' ') != nullptr)) {
99  if (tessedit_adaption_debug) tprintf("word contains spaces\n");
100  return false;
101  }
102 
103  if (flags.bit (CHECK_AMBIG_WERD) &&
105  if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
106  return false;
107  }
108 
110  tprintf("returning status %d\n", status);
111  }
112  return status;
113 }
114 
115 } // namespace tesseract
const char * string() const
Definition: strngs.cpp:196
uint8_t permuter() const
Definition: ratngs.h:346
float rating() const
Definition: ratngs.h:327
float certainty() const
Definition: ratngs.h:330
bool bit(uint8_t bit_num) const
Definition: bits16.h:57
bool dangerous_ambig_found() const
Definition: ratngs.h:363
bool tess_would_adapt
Definition: pageres.h:297
bool word_adaptable(WERD_RES *word, uint16_t mode)
Definition: adaptions.cpp:35
#define FALSE
Definition: capi.h:52
bool tess_accepted
Definition: pageres.h:296
unsigned char BOOL8
Definition: host.h:34
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
const STRING & unichar_string() const
Definition: ratngs.h:541
Definition: bits16.h:25
bool one_ell_conflict(WERD_RES *word_res, bool update_map)
Definition: reject.cpp:297
WERD_CHOICE * best_choice
Definition: pageres.h:235