tesseract  5.0.0-alpha-619-ge9db
rejctmap.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: rejctmap.cpp (Formerly rejmap.c)
3  * Description: REJ and REJMAP class functions.
4  * Author: Phil Cheatle
5  *
6  * (C) Copyright 1994, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "rejctmap.h"
20 #include "params.h"
21 
22 bool REJ::perm_rejected() { //Is char perm reject?
23  return (flag (R_TESS_FAILURE) ||
24  flag (R_SMALL_XHT) ||
25  flag (R_EDGE_CHAR) ||
26  flag (R_1IL_CONFLICT) ||
27  flag (R_POSTNN_1IL) ||
28  flag (R_REJ_CBLOB) ||
30 }
31 
32 
33 bool REJ::rej_before_nn_accept() {
34  return flag (R_POOR_MATCH) ||
37 }
38 
39 
40 bool REJ::rej_between_nn_and_mm() {
41  return flag (R_HYPHEN) ||
42  flag (R_DUBIOUS) ||
44 }
45 
46 
47 bool REJ::rej_between_mm_and_quality_accept() {
48  return flag (R_BAD_QUALITY);
49 }
50 
51 
52 bool REJ::rej_between_quality_and_minimal_rej_accept() {
53  return flag (R_DOC_REJ) ||
55 }
56 
57 
58 bool REJ::rej_before_mm_accept() {
59  return rej_between_nn_and_mm () ||
60  (rej_before_nn_accept () &&
62 }
63 
64 
65 bool REJ::rej_before_quality_accept() {
66  return rej_between_mm_and_quality_accept () ||
67  (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
68 }
69 
70 
71 bool REJ::rejected() { //Is char rejected?
73  return false;
74  else
75  return (perm_rejected () ||
76  rej_between_quality_and_minimal_rej_accept () ||
77  (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
78 }
79 
80 
81 bool REJ::accept_if_good_quality() { //potential rej?
82  return (rejected () &&
83  !perm_rejected () &&
84  flag (R_BAD_PERMUTER) &&
85  !flag (R_POOR_MATCH) &&
88  (!rej_between_nn_and_mm () &&
89  !rej_between_mm_and_quality_accept () &&
90  !rej_between_quality_and_minimal_rej_accept ()));
91 }
92 
93 
94 void REJ::setrej_tess_failure() { //Tess generated blank
95  set_flag(R_TESS_FAILURE);
96 }
97 
98 
99 void REJ::setrej_small_xht() { //Small xht char/wd
100  set_flag(R_SMALL_XHT);
101 }
102 
103 
104 void REJ::setrej_edge_char() { //Close to image edge
105  set_flag(R_EDGE_CHAR);
106 }
107 
108 
109 void REJ::setrej_1Il_conflict() { //Initial reject map
110  set_flag(R_1IL_CONFLICT);
111 }
112 
113 
114 void REJ::setrej_postNN_1Il() { //1Il after NN
115  set_flag(R_POSTNN_1IL);
116 }
117 
118 
119 void REJ::setrej_rej_cblob() { //Insert duff blob
120  set_flag(R_REJ_CBLOB);
121 }
122 
123 
124 void REJ::setrej_mm_reject() { //Matrix matcher
125  set_flag(R_MM_REJECT);
126 }
127 
128 
129 void REJ::setrej_bad_repetition() { //Odd repeated char
130  set_flag(R_BAD_REPETITION);
131 }
132 
133 
134 void REJ::setrej_poor_match() { //Failed Rays heuristic
135  set_flag(R_POOR_MATCH);
136 }
137 
138 
140  //TEMP reject_word
141  set_flag(R_NOT_TESS_ACCEPTED);
142 }
143 
144 
146  //TEMP reject_word
147  set_flag(R_CONTAINS_BLANKS);
148 }
149 
150 
151 void REJ::setrej_bad_permuter() { //POTENTIAL reject_word
152  set_flag(R_BAD_PERMUTER);
153 }
154 
155 
156 void REJ::setrej_hyphen() { //PostNN dubious hyphen or .
157  set_flag(R_HYPHEN);
158 }
159 
160 
161 void REJ::setrej_dubious() { //PostNN dubious limit
162  set_flag(R_DUBIOUS);
163 }
164 
165 
166 void REJ::setrej_no_alphanums() { //TEMP reject_word
167  set_flag(R_NO_ALPHANUMS);
168 }
169 
170 
171 void REJ::setrej_mostly_rej() { //TEMP reject_word
172  set_flag(R_MOSTLY_REJ);
173 }
174 
175 
176 void REJ::setrej_xht_fixup() { //xht fixup
177  set_flag(R_XHT_FIXUP);
178 }
179 
180 
181 void REJ::setrej_bad_quality() { //TEMP reject_word
182  set_flag(R_BAD_QUALITY);
183 }
184 
185 
186 void REJ::setrej_doc_rej() { //TEMP reject_word
187  set_flag(R_DOC_REJ);
188 }
189 
190 
191 void REJ::setrej_block_rej() { //TEMP reject_word
192  set_flag(R_BLOCK_REJ);
193 }
194 
195 
196 void REJ::setrej_row_rej() { //TEMP reject_word
197  set_flag(R_ROW_REJ);
198 }
199 
200 
201 void REJ::setrej_unlv_rej() { //TEMP reject_word
202  set_flag(R_UNLV_REJ);
203 }
204 
205 
206 void REJ::setrej_hyphen_accept() { //NN Flipped a char
207  set_flag(R_HYPHEN_ACCEPT);
208 }
209 
210 
211 void REJ::setrej_nn_accept() { //NN Flipped a char
212  set_flag(R_NN_ACCEPT);
213 }
214 
215 
216 void REJ::setrej_mm_accept() { //Matrix matcher
217  set_flag(R_MM_ACCEPT);
218 }
219 
220 
221 void REJ::setrej_quality_accept() { //Quality flip a char
222  set_flag(R_QUALITY_ACCEPT);
223 }
224 
225 
227  //Accept all except blank
228  set_flag(R_MINIMAL_REJ_ACCEPT);
229 }
230 
231 
232 void REJ::full_print(FILE *fp) {
233  fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
234  fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
235  fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
236  fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
237  fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
238  fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
239  fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
240  fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
241  fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
242  fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
243  flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
244  fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
245  flag (R_CONTAINS_BLANKS) ? "T" : "F");
246  fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
247  fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
248  fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
249  fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
250  fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
251  fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
252  fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
253  fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
254  fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
255  fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
256  fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
257  fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
258  fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
259  fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
260  fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
261  fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
262  flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
263 }
264 
265 REJMAP &REJMAP::operator=(const REJMAP &source) {
266  initialise(source.len);
267  for (int i = 0; i < len; i++) {
268  ptr[i] = source.ptr[i];
269  }
270  return *this;
271 }
272 
273 void REJMAP::initialise(int16_t length) {
274  ptr.reset(new REJ[length]);
275  len = length;
276 }
277 
278 
279 int16_t REJMAP::accept_count() { //How many accepted?
280  int i;
281  int16_t count = 0;
282 
283  for (i = 0; i < len; i++) {
284  if (ptr[i].accepted ())
285  count++;
286  }
287  return count;
288 }
289 
290 
291 bool REJMAP::recoverable_rejects() { //Any non perm rejs?
292  for (int i = 0; i < len; i++) {
293  if (ptr[i].recoverable ())
294  return true;
295  }
296  return false;
297 }
298 
299 
300 bool REJMAP::quality_recoverable_rejects() { //Any potential rejs?
301  for (int i = 0; i < len; i++) {
302  if (ptr[i].accept_if_good_quality ())
303  return true;
304  }
305  return false;
306 }
307 
308 
309 void REJMAP::remove_pos( //Cut out an element
310  int16_t pos //element to remove
311  ) {
312  ASSERT_HOST (pos >= 0);
313  ASSERT_HOST (pos < len);
314  ASSERT_HOST (len > 0);
315 
316  len--;
317  for (; pos < len; pos++) ptr[pos] = ptr[pos + 1];
318 }
319 
320 
321 void REJMAP::print(FILE *fp) {
322  int i;
323  char buff[512];
324 
325  for (i = 0; i < len; i++) {
326  buff[i] = ptr[i].display_char ();
327  }
328  buff[i] = '\0';
329  fprintf (fp, "\"%s\"", buff);
330 }
331 
332 
333 void REJMAP::full_print(FILE *fp) {
334  int i;
335 
336  for (i = 0; i < len; i++) {
337  ptr[i].full_print (fp);
338  fprintf (fp, "\n");
339  }
340 }
341 
342 
343 void REJMAP::rej_word_small_xht() { //Reject whole word
344  int i;
345 
346  for (i = 0; i < len; i++) {
347  ptr[i].setrej_small_xht ();
348  }
349 }
350 
351 
352 void REJMAP::rej_word_tess_failure() { //Reject whole word
353  int i;
354 
355  for (i = 0; i < len; i++) {
356  ptr[i].setrej_tess_failure ();
357  }
358 }
359 
360 
361 void REJMAP::rej_word_not_tess_accepted() { //Reject whole word
362  int i;
363 
364  for (i = 0; i < len; i++) {
365  if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted();
366  }
367 }
368 
369 
370 void REJMAP::rej_word_contains_blanks() { //Reject whole word
371  int i;
372 
373  for (i = 0; i < len; i++) {
374  if (ptr[i].accepted()) ptr[i].setrej_contains_blanks();
375  }
376 }
377 
378 
379 void REJMAP::rej_word_bad_permuter() { //Reject whole word
380  int i;
381 
382  for (i = 0; i < len; i++) {
383  if (ptr[i].accepted()) ptr[i].setrej_bad_permuter ();
384  }
385 }
386 
387 
388 void REJMAP::rej_word_xht_fixup() { //Reject whole word
389  int i;
390 
391  for (i = 0; i < len; i++) {
392  if (ptr[i].accepted()) ptr[i].setrej_xht_fixup();
393  }
394 }
395 
396 
397 void REJMAP::rej_word_no_alphanums() { //Reject whole word
398  int i;
399 
400  for (i = 0; i < len; i++) {
401  if (ptr[i].accepted()) ptr[i].setrej_no_alphanums();
402  }
403 }
404 
405 
406 void REJMAP::rej_word_mostly_rej() { //Reject whole word
407  int i;
408 
409  for (i = 0; i < len; i++) {
410  if (ptr[i].accepted()) ptr[i].setrej_mostly_rej();
411  }
412 }
413 
414 
415 void REJMAP::rej_word_bad_quality() { //Reject whole word
416  int i;
417 
418  for (i = 0; i < len; i++) {
419  if (ptr[i].accepted()) ptr[i].setrej_bad_quality();
420  }
421 }
422 
423 
424 void REJMAP::rej_word_doc_rej() { //Reject whole word
425  int i;
426 
427  for (i = 0; i < len; i++) {
428  if (ptr[i].accepted()) ptr[i].setrej_doc_rej();
429  }
430 }
431 
432 
433 void REJMAP::rej_word_block_rej() { //Reject whole word
434  int i;
435 
436  for (i = 0; i < len; i++) {
437  if (ptr[i].accepted()) ptr[i].setrej_block_rej();
438  }
439 }
440 
441 
442 void REJMAP::rej_word_row_rej() { //Reject whole word
443  int i;
444 
445  for (i = 0; i < len; i++) {
446  if (ptr[i].accepted()) ptr[i].setrej_row_rej();
447  }
448 }
REJMAP::full_print
void full_print(FILE *fp)
Definition: rejctmap.cpp:332
REJMAP::rej_word_row_rej
void rej_word_row_rej()
Definition: rejctmap.cpp:441
REJMAP::recoverable_rejects
bool recoverable_rejects()
Definition: rejctmap.cpp:290
R_TESS_FAILURE
Definition: rejctmap.h:87
R_BAD_PERMUTER
Definition: rejctmap.h:100
REJMAP::rej_word_block_rej
void rej_word_block_rej()
Definition: rejctmap.cpp:432
REJ::setrej_hyphen_accept
void setrej_hyphen_accept()
Definition: rejctmap.cpp:205
REJ::setrej_small_xht
void setrej_small_xht()
Definition: rejctmap.cpp:98
REJMAP::accept_count
int16_t accept_count()
Definition: rejctmap.cpp:278
REJMAP::rej_word_xht_fixup
void rej_word_xht_fixup()
Definition: rejctmap.cpp:387
REJ::setrej_quality_accept
void setrej_quality_accept()
Definition: rejctmap.cpp:220
REJMAP::initialise
void initialise(int16_t length)
Definition: rejctmap.cpp:272
rejctmap.h
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
params.h
REJ::setrej_row_rej
void setrej_row_rej()
Definition: rejctmap.cpp:195
REJ::setrej_mm_accept
void setrej_mm_accept()
Definition: rejctmap.cpp:215
R_BAD_QUALITY
Definition: rejctmap.h:110
R_EDGE_CHAR
Definition: rejctmap.h:89
REJ::setrej_block_rej
void setrej_block_rej()
Definition: rejctmap.cpp:190
REJ::setrej_hyphen
void setrej_hyphen()
Definition: rejctmap.cpp:155
R_POOR_MATCH
Definition: rejctmap.h:97
R_SMALL_XHT
Definition: rejctmap.h:88
R_CONTAINS_BLANKS
Definition: rejctmap.h:99
REJ::setrej_nn_accept
void setrej_nn_accept()
Definition: rejctmap.cpp:210
R_DUBIOUS
Definition: rejctmap.h:104
R_MOSTLY_REJ
Definition: rejctmap.h:106
R_QUALITY_ACCEPT
Definition: rejctmap.h:122
REJ::setrej_poor_match
void setrej_poor_match()
Definition: rejctmap.cpp:133
REJ::setrej_xht_fixup
void setrej_xht_fixup()
Definition: rejctmap.cpp:175
REJMAP::remove_pos
void remove_pos(int16_t pos)
Definition: rejctmap.cpp:308
REJMAP::rej_word_bad_quality
void rej_word_bad_quality()
Definition: rejctmap.cpp:414
R_HYPHEN
Definition: rejctmap.h:103
REJ
Definition: rejctmap.h:96
R_DOC_REJ
Definition: rejctmap.h:113
REJMAP::length
int32_t length() const
Definition: rejctmap.h:222
REJMAP::rej_word_contains_blanks
void rej_word_contains_blanks()
Definition: rejctmap.cpp:369
REJ::setrej_minimal_rej_accept
void setrej_minimal_rej_accept()
Definition: rejctmap.cpp:225
R_UNLV_REJ
Definition: rejctmap.h:116
REJ::setrej_mostly_rej
void setrej_mostly_rej()
Definition: rejctmap.cpp:170
R_NN_ACCEPT
Definition: rejctmap.h:119
REJ::perm_rejected
bool perm_rejected()
Definition: rejctmap.cpp:21
REJ::setrej_1Il_conflict
void setrej_1Il_conflict()
Definition: rejctmap.cpp:108
REJ::setrej_bad_repetition
void setrej_bad_repetition()
Definition: rejctmap.cpp:128
R_REJ_CBLOB
Definition: rejctmap.h:92
REJ::setrej_no_alphanums
void setrej_no_alphanums()
Definition: rejctmap.cpp:165
R_MINIMAL_REJ_ACCEPT
Definition: rejctmap.h:123
REJMAP::rej_word_not_tess_accepted
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:360
R_NOT_TESS_ACCEPTED
Definition: rejctmap.h:98
REJMAP::rej_word_tess_failure
void rej_word_tess_failure()
Definition: rejctmap.cpp:351
R_BAD_REPETITION
Definition: rejctmap.h:94
R_1IL_CONFLICT
Definition: rejctmap.h:90
REJ::setrej_doc_rej
void setrej_doc_rej()
Definition: rejctmap.cpp:185
REJMAP::operator=
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:264
REJ::setrej_contains_blanks
void setrej_contains_blanks()
Definition: rejctmap.cpp:144
REJMAP::rej_word_no_alphanums
void rej_word_no_alphanums()
Definition: rejctmap.cpp:396
REJ::setrej_dubious
void setrej_dubious()
Definition: rejctmap.cpp:160
R_HYPHEN_ACCEPT
Definition: rejctmap.h:120
REJ::setrej_rej_cblob
void setrej_rej_cblob()
Definition: rejctmap.cpp:118
REJMAP::rej_word_bad_permuter
void rej_word_bad_permuter()
Definition: rejctmap.cpp:378
REJ::setrej_bad_permuter
void setrej_bad_permuter()
Definition: rejctmap.cpp:150
REJMAP::rej_word_doc_rej
void rej_word_doc_rej()
Definition: rejctmap.cpp:423
REJMAP::rej_word_small_xht
void rej_word_small_xht()
Definition: rejctmap.cpp:342
count
int count(LIST var_list)
Definition: oldlist.cpp:79
REJ::setrej_edge_char
void setrej_edge_char()
Definition: rejctmap.cpp:103
R_XHT_FIXUP
Definition: rejctmap.h:107
R_ROW_REJ
Definition: rejctmap.h:115
REJ::setrej_unlv_rej
void setrej_unlv_rej()
Definition: rejctmap.cpp:200
REJ::rejected
bool rejected()
Definition: rejctmap.cpp:70
R_MM_ACCEPT
Definition: rejctmap.h:121
REJ::accept_if_good_quality
bool accept_if_good_quality()
Definition: rejctmap.cpp:80
R_MM_REJECT
Definition: rejctmap.h:93
REJMAP::quality_recoverable_rejects
bool quality_recoverable_rejects()
Definition: rejctmap.cpp:299
REJMAP
Definition: rejctmap.h:200
R_NO_ALPHANUMS
Definition: rejctmap.h:105
R_BLOCK_REJ
Definition: rejctmap.h:114
REJMAP::rej_word_mostly_rej
void rej_word_mostly_rej()
Definition: rejctmap.cpp:405
REJ::setrej_bad_quality
void setrej_bad_quality()
Definition: rejctmap.cpp:180
REJMAP::print
void print(FILE *fp)
Definition: rejctmap.cpp:320
R_POSTNN_1IL
Definition: rejctmap.h:91
REJ::setrej_tess_failure
void setrej_tess_failure()
Definition: rejctmap.cpp:93
REJ::flag
bool flag(REJ_FLAGS rej_flag)
Definition: rejctmap.h:131
REJ::setrej_not_tess_accepted
void setrej_not_tess_accepted()
Definition: rejctmap.cpp:138
REJ::setrej_postNN_1Il
void setrej_postNN_1Il()
Definition: rejctmap.cpp:113
REJ::setrej_mm_reject
void setrej_mm_reject()
Definition: rejctmap.cpp:123
REJ::full_print
void full_print(FILE *fp)
Definition: rejctmap.cpp:231