tesseract  5.0.0-alpha-619-ge9db
rejctmap.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: rejctmap.h (Formerly rejmap.h)
3  * Description: REJ and REJMAP class functions.
4  * Author: Phil Cheatle
5  * Created: Thu Jun 9 13:46:38 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18 
19 This module may look unnecessarily verbose, but here's the philosophy...
20 
21 ALL processing of the reject map is done in this module. There are lots of
22 separate calls to set reject/accept flags. These have DELIBERATELY been kept
23 distinct so that this module can decide what to do.
24 
25 Basically, there is a flag for each sort of rejection or acceptance. This
26 provides a history of what has happened to EACH character.
27 
28 Determining whether a character is CURRENTLY rejected depends on implicit
29 understanding of the SEQUENCE of possible calls. The flags are defined and
30 grouped in the REJ_FLAGS enum. These groupings are used in determining a
31 characters CURRENT rejection status. Basically, a character is ACCEPTED if
32 
33  none of the permanent rej flags are set
34  AND ( the character has never been rejected
35  OR an accept flag is set which is LATER than the latest reject flag )
36 
37 IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
38 OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
39 **********************************************************************/
40 
41 #ifndef REJCTMAP_H
42 #define REJCTMAP_H
43 
44 #include <memory>
45 #include "bits16.h"
46 #include "errcode.h"
47 #include "params.h"
48 
49 enum REJ_FLAGS {
50  /* Reject modes which are NEVER overridden */
51  R_TESS_FAILURE, // PERM Tess didn't classify
52  R_SMALL_XHT, // PERM Xht too small
53  R_EDGE_CHAR, // PERM Too close to edge of image
54  R_1IL_CONFLICT, // PERM 1Il confusion
55  R_POSTNN_1IL, // PERM 1Il unrejected by NN
56  R_REJ_CBLOB, // PERM Odd blob
57  R_MM_REJECT, // PERM Matrix match rejection (m's)
58  R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
59 
60  /* Initial reject modes (pre NN_ACCEPT) */
61  R_POOR_MATCH, // TEMP Ray's original heuristic (Not used)
62  R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD
63  R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD
64  R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD
65 
66  /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
67  R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop
68  R_DUBIOUS, // TEMP Post NN dodgy chars
69  R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
70  R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest
71  R_XHT_FIXUP, // TEMP Xht tests unsure
72 
73  /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
74  R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
75 
76  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
77  R_DOC_REJ, // TEMP Document rejection
78  R_BLOCK_REJ, // TEMP Block rejection
79  R_ROW_REJ, // TEMP Row rejection
80  R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
81 
82  /* Accept modes which occur between the above rejection groups */
83  R_NN_ACCEPT, // NN acceptance
84  R_HYPHEN_ACCEPT, // Hyphen acceptance
85  R_MM_ACCEPT, // Matrix match acceptance
86  R_QUALITY_ACCEPT, // Accept word in good quality doc
87  R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures
88 };
89 
90 /* REJECT MAP VALUES */
91 
92 #define MAP_ACCEPT '1'
93 #define MAP_REJECT_PERM '0'
94 #define MAP_REJECT_TEMP '2'
95 #define MAP_REJECT_POTENTIAL '3'
96 
97 class REJ
98 {
99  BITS16 flags1;
100  BITS16 flags2;
101 
102  void set_flag(REJ_FLAGS rej_flag) {
103  if (rej_flag < 16)
104  flags1.turn_on_bit (rej_flag);
105  else
106  flags2.turn_on_bit (rej_flag - 16);
107  }
108 
109  bool rej_before_nn_accept();
110  bool rej_between_nn_and_mm();
111  bool rej_between_mm_and_quality_accept();
112  bool rej_between_quality_and_minimal_rej_accept();
113  bool rej_before_mm_accept();
114  bool rej_before_quality_accept();
115 
116  public:
117  REJ() = default;
118 
119  REJ( //classwise copy
120  const REJ &source) {
121  flags1 = source.flags1;
122  flags2 = source.flags2;
123  }
124 
125  REJ & operator= ( //assign REJ
126  const REJ & source) { //from this
127  flags1 = source.flags1;
128  flags2 = source.flags2;
129  return *this;
130  }
131 
132  bool flag(REJ_FLAGS rej_flag) {
133  if (rej_flag < 16)
134  return flags1.bit (rej_flag);
135  else
136  return flags2.bit (rej_flag - 16);
137  }
138 
139  char display_char() {
140  if (perm_rejected ())
141  return MAP_REJECT_PERM;
142  else if (accept_if_good_quality ())
143  return MAP_REJECT_POTENTIAL;
144  else if (rejected ())
145  return MAP_REJECT_TEMP;
146  else
147  return MAP_ACCEPT;
148  }
149 
150  bool perm_rejected(); //Is char perm reject?
151 
152  bool rejected(); //Is char rejected?
153 
154  bool accepted() { //Is char accepted?
155  return !rejected ();
156  }
157 
158  //potential rej?
159  bool accept_if_good_quality();
160 
161  bool recoverable() {
162  return (rejected () && !perm_rejected ());
163  }
164 
165  void setrej_tess_failure(); //Tess generated blank
166  void setrej_small_xht(); //Small xht char/wd
167  void setrej_edge_char(); //Close to image edge
168  void setrej_1Il_conflict(); //Initial reject map
169  void setrej_postNN_1Il(); //1Il after NN
170  void setrej_rej_cblob(); //Insert duff blob
171  void setrej_mm_reject(); //Matrix matcher
172  //Odd repeated char
173  void setrej_bad_repetition();
174  void setrej_poor_match(); //Failed Rays heuristic
175  //TEMP reject_word
177  //TEMP reject_word
178  void setrej_contains_blanks();
179  void setrej_bad_permuter(); //POTENTIAL reject_word
180  void setrej_hyphen(); //PostNN dubious hyph or .
181  void setrej_dubious(); //PostNN dubious limit
182  void setrej_no_alphanums(); //TEMP reject_word
183  void setrej_mostly_rej(); //TEMP reject_word
184  void setrej_xht_fixup(); //xht fixup
185  void setrej_bad_quality(); //TEMP reject_word
186  void setrej_doc_rej(); //TEMP reject_word
187  void setrej_block_rej(); //TEMP reject_word
188  void setrej_row_rej(); //TEMP reject_word
189  void setrej_unlv_rej(); //TEMP reject_word
190  void setrej_nn_accept(); //NN Flipped a char
191  void setrej_hyphen_accept(); //Good aspect ratio
192  void setrej_mm_accept(); //Matrix matcher
193  //Quality flip a char
194  void setrej_quality_accept();
195  //Accept all except blank
197 
198  void full_print(FILE *fp);
199 };
200 
201 class REJMAP
202 {
203  std::unique_ptr<REJ[]> ptr; // ptr to the chars
204  int16_t len; //Number of chars
205 
206  public:
207  REJMAP() : len(0) {}
208 
209  REJMAP(const REJMAP &rejmap) { *this = rejmap; }
210 
211  REJMAP &operator=(const REJMAP &source);
212 
213  // Sets up the ptr array to length, whatever it was before.
214  void initialise(int16_t length);
215 
216  REJ &operator[]( // access function
217  int16_t index) const // map index
218  {
219  ASSERT_HOST(index < len);
220  return ptr[index]; // no bounds checks
221  }
222 
223  int32_t length() const { //map length
224  return len;
225  }
226 
227  int16_t accept_count(); //How many accepted?
228 
229  int16_t reject_count() { //How many rejects?
230  return len - accept_count ();
231  }
232 
233  void remove_pos( //Cut out an element
234  int16_t pos); //element to remove
235 
236  void print(FILE *fp);
237 
238  void full_print(FILE *fp);
239 
240  bool recoverable_rejects(); //Any non perm rejs?
241 
243  //Any potential rejs?
244 
245  void rej_word_small_xht(); //Reject whole word
246  //Reject whole word
247  void rej_word_tess_failure();
249  //Reject whole word
250  //Reject whole word
252  //Reject whole word
253  void rej_word_bad_permuter();
254  void rej_word_xht_fixup(); //Reject whole word
255  //Reject whole word
256  void rej_word_no_alphanums();
257  void rej_word_mostly_rej(); //Reject whole word
258  void rej_word_bad_quality(); //Reject whole word
259  void rej_word_doc_rej(); //Reject whole word
260  void rej_word_block_rej(); //Reject whole word
261  void rej_word_row_rej(); //Reject whole word
262 };
263 #endif
REJMAP::full_print
void full_print(FILE *fp)
Definition: rejctmap.cpp:332
REJMAP::rej_word_row_rej
void rej_word_row_rej()
Definition: rejctmap.cpp:441
REJMAP::recoverable_rejects
bool recoverable_rejects()
Definition: rejctmap.cpp:290
R_TESS_FAILURE
Definition: rejctmap.h:87
R_BAD_PERMUTER
Definition: rejctmap.h:100
BITS16::turn_on_bit
void turn_on_bit(uint8_t bit_num)
Definition: bits16.h:46
REJMAP::rej_word_block_rej
void rej_word_block_rej()
Definition: rejctmap.cpp:432
REJ::setrej_hyphen_accept
void setrej_hyphen_accept()
Definition: rejctmap.cpp:205
REJ::setrej_small_xht
void setrej_small_xht()
Definition: rejctmap.cpp:98
BITS16::bit
bool bit(uint8_t bit_num) const
Definition: bits16.h:65
REJMAP::accept_count
int16_t accept_count()
Definition: rejctmap.cpp:278
REJMAP::rej_word_xht_fixup
void rej_word_xht_fixup()
Definition: rejctmap.cpp:387
REJ::setrej_quality_accept
void setrej_quality_accept()
Definition: rejctmap.cpp:220
REJMAP::initialise
void initialise(int16_t length)
Definition: rejctmap.cpp:272
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
params.h
REJ::setrej_row_rej
void setrej_row_rej()
Definition: rejctmap.cpp:195
REJ::setrej_mm_accept
void setrej_mm_accept()
Definition: rejctmap.cpp:215
R_BAD_QUALITY
Definition: rejctmap.h:110
R_EDGE_CHAR
Definition: rejctmap.h:89
BITS16
Definition: bits16.h:24
REJ::setrej_block_rej
void setrej_block_rej()
Definition: rejctmap.cpp:190
REJ::setrej_hyphen
void setrej_hyphen()
Definition: rejctmap.cpp:155
R_POOR_MATCH
Definition: rejctmap.h:97
REJ::recoverable
bool recoverable()
Definition: rejctmap.h:160
REJ::display_char
char display_char()
Definition: rejctmap.h:138
R_SMALL_XHT
Definition: rejctmap.h:88
R_CONTAINS_BLANKS
Definition: rejctmap.h:99
REJ::setrej_nn_accept
void setrej_nn_accept()
Definition: rejctmap.cpp:210
R_DUBIOUS
Definition: rejctmap.h:104
REJMAP::REJMAP
REJMAP()
Definition: rejctmap.h:206
REJMAP::operator[]
REJ & operator[](int16_t index) const
Definition: rejctmap.h:215
R_MOSTLY_REJ
Definition: rejctmap.h:106
R_QUALITY_ACCEPT
Definition: rejctmap.h:122
REJ::setrej_poor_match
void setrej_poor_match()
Definition: rejctmap.cpp:133
REJ::setrej_xht_fixup
void setrej_xht_fixup()
Definition: rejctmap.cpp:175
REJMAP::remove_pos
void remove_pos(int16_t pos)
Definition: rejctmap.cpp:308
REJMAP::rej_word_bad_quality
void rej_word_bad_quality()
Definition: rejctmap.cpp:414
R_HYPHEN
Definition: rejctmap.h:103
REJ
Definition: rejctmap.h:96
R_DOC_REJ
Definition: rejctmap.h:113
REJMAP::length
int32_t length() const
Definition: rejctmap.h:222
REJMAP::rej_word_contains_blanks
void rej_word_contains_blanks()
Definition: rejctmap.cpp:369
REJ::setrej_minimal_rej_accept
void setrej_minimal_rej_accept()
Definition: rejctmap.cpp:225
R_UNLV_REJ
Definition: rejctmap.h:116
REJ::setrej_mostly_rej
void setrej_mostly_rej()
Definition: rejctmap.cpp:170
R_NN_ACCEPT
Definition: rejctmap.h:119
bits16.h
REJ::REJ
REJ()=default
MAP_REJECT_POTENTIAL
#define MAP_REJECT_POTENTIAL
Definition: rejctmap.h:94
REJ::perm_rejected
bool perm_rejected()
Definition: rejctmap.cpp:21
REJ::setrej_1Il_conflict
void setrej_1Il_conflict()
Definition: rejctmap.cpp:108
REJ::setrej_bad_repetition
void setrej_bad_repetition()
Definition: rejctmap.cpp:128
R_REJ_CBLOB
Definition: rejctmap.h:92
REJ::setrej_no_alphanums
void setrej_no_alphanums()
Definition: rejctmap.cpp:165
R_MINIMAL_REJ_ACCEPT
Definition: rejctmap.h:123
REJMAP::rej_word_not_tess_accepted
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:360
R_NOT_TESS_ACCEPTED
Definition: rejctmap.h:98
REJMAP::reject_count
int16_t reject_count()
Definition: rejctmap.h:228
REJMAP::rej_word_tess_failure
void rej_word_tess_failure()
Definition: rejctmap.cpp:351
R_BAD_REPETITION
Definition: rejctmap.h:94
R_1IL_CONFLICT
Definition: rejctmap.h:90
REJ::setrej_doc_rej
void setrej_doc_rej()
Definition: rejctmap.cpp:185
REJMAP::operator=
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:264
REJ_FLAGS
REJ_FLAGS
Definition: rejctmap.h:48
REJ::setrej_contains_blanks
void setrej_contains_blanks()
Definition: rejctmap.cpp:144
REJMAP::rej_word_no_alphanums
void rej_word_no_alphanums()
Definition: rejctmap.cpp:396
REJ::setrej_dubious
void setrej_dubious()
Definition: rejctmap.cpp:160
MAP_ACCEPT
#define MAP_ACCEPT
Definition: rejctmap.h:91
REJ::accepted
bool accepted()
Definition: rejctmap.h:153
R_HYPHEN_ACCEPT
Definition: rejctmap.h:120
REJ::setrej_rej_cblob
void setrej_rej_cblob()
Definition: rejctmap.cpp:118
REJMAP::rej_word_bad_permuter
void rej_word_bad_permuter()
Definition: rejctmap.cpp:378
REJ::setrej_bad_permuter
void setrej_bad_permuter()
Definition: rejctmap.cpp:150
REJMAP::rej_word_doc_rej
void rej_word_doc_rej()
Definition: rejctmap.cpp:423
REJMAP::rej_word_small_xht
void rej_word_small_xht()
Definition: rejctmap.cpp:342
REJ::setrej_edge_char
void setrej_edge_char()
Definition: rejctmap.cpp:103
R_XHT_FIXUP
Definition: rejctmap.h:107
MAP_REJECT_TEMP
#define MAP_REJECT_TEMP
Definition: rejctmap.h:93
R_ROW_REJ
Definition: rejctmap.h:115
REJ::setrej_unlv_rej
void setrej_unlv_rej()
Definition: rejctmap.cpp:200
REJ::rejected
bool rejected()
Definition: rejctmap.cpp:70
R_MM_ACCEPT
Definition: rejctmap.h:121
REJ::accept_if_good_quality
bool accept_if_good_quality()
Definition: rejctmap.cpp:80
R_MM_REJECT
Definition: rejctmap.h:93
errcode.h
REJMAP::quality_recoverable_rejects
bool quality_recoverable_rejects()
Definition: rejctmap.cpp:299
REJMAP
Definition: rejctmap.h:200
MAP_REJECT_PERM
#define MAP_REJECT_PERM
Definition: rejctmap.h:92
R_NO_ALPHANUMS
Definition: rejctmap.h:105
R_BLOCK_REJ
Definition: rejctmap.h:114
REJMAP::rej_word_mostly_rej
void rej_word_mostly_rej()
Definition: rejctmap.cpp:405
REJ::setrej_bad_quality
void setrej_bad_quality()
Definition: rejctmap.cpp:180
REJMAP::print
void print(FILE *fp)
Definition: rejctmap.cpp:320
R_POSTNN_1IL
Definition: rejctmap.h:91
REJ::setrej_tess_failure
void setrej_tess_failure()
Definition: rejctmap.cpp:93
REJ::flag
bool flag(REJ_FLAGS rej_flag)
Definition: rejctmap.h:131
REJ::setrej_not_tess_accepted
void setrej_not_tess_accepted()
Definition: rejctmap.cpp:138
REJ::setrej_postNN_1Il
void setrej_postNN_1Il()
Definition: rejctmap.cpp:113
REJ::operator=
REJ & operator=(const REJ &source)
Definition: rejctmap.h:124
REJ::setrej_mm_reject
void setrej_mm_reject()
Definition: rejctmap.cpp:123
REJ::full_print
void full_print(FILE *fp)
Definition: rejctmap.cpp:231