tesseract  4.0.0-1-g2a2b
lm_consistency.h
Go to the documentation of this file.
1 // File: lm_consistency.h
3 // Description: Struct for recording consistency of the paths representing
4 // OCR hypotheses.
5 // Author: Rika Antonova
6 // Created: Mon Jun 20 11:26:43 PST 2012
7 //
8 // (C) Copyright 2012, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_WORDREC_LM_CONSISTENCY_H_
22 #define TESSERACT_WORDREC_LM_CONSISTENCY_H_
23 
24 #include <cstdint> // for INT16_MAX
25 #include "dawg.h" // for EDGE_REF, NO_EDGE
26 #include "dict.h" // for XH_GOOD, XH_INCONSISTENT, XHeightConsi...
27 
28 class BLOB_CHOICE;
29 
30 namespace tesseract {
31 
32 static const char * const XHeightConsistencyEnumName[] = {
33  "XH_GOOD",
34  "XH_SUBNORMAL",
35  "XH_INCONSISTENT",
36 };
37 
38 // Struct for keeping track of the consistency of the path.
41 
42  // How much do characters have to be shifted away from normal parameters
43  // before we say they're not normal?
44  static const int kShiftThresh = 1;
45 
46  // How much shifting from subscript to superscript and back
47  // before we declare shenanigans?
48  static const int kMaxEntropy = 1;
49 
50  // Script positions - order important for entropy calculation.
51  static const int kSUB = 0, kNORM = 1, kSUP = 2;
52  static const int kNumPos = 3;
53 
54  explicit LMConsistencyInfo(const LMConsistencyInfo* parent_info) {
55  if (parent_info == nullptr) {
56  // Initialize from scratch.
57  num_alphas = 0;
58  num_digits = 0;
59  num_punc = 0;
60  num_other = 0;
61  chartype = CT_NONE;
62  punc_ref = NO_EDGE;
63  invalid_punc = false;
65  num_lower = 0;
66  script_id = 0;
67  inconsistent_script = false;
69  inconsistent_font = false;
70  // Initialize XHeight stats.
71  for (int i = 0; i < kNumPos; i++) {
72  xht_count[i] = 0;
73  xht_count_punc[i] = 0;
74  xht_lo[i] = 0;
75  xht_hi[i] = 256; // kBlnCellHeight
76  }
77  xht_sp = -1; // This invalid value indicates that there was no parent.
78  xpos_entropy = 0;
80  } else {
81  // Copy parent info
82  *this = *parent_info;
83  }
84  }
85  inline int NumInconsistentPunc() const {
86  return invalid_punc ? num_punc : 0;
87  }
88  inline int NumInconsistentCase() const {
90  }
91  inline int NumInconsistentChartype() const {
92  return (NumInconsistentPunc() + num_other +
94  }
95  inline bool Consistent() const {
96  return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 &&
99  }
100  inline int NumInconsistentSpaces() const {
102  }
103  inline int InconsistentXHeight() const {
104  return xht_decision == XH_INCONSISTENT;
105  }
106  void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc);
107  float BodyMinXHeight() const {
108  if (InconsistentXHeight())
109  return 0.0f;
110  return xht_lo[kNORM];
111  }
112  float BodyMaxXHeight() const {
113  if (InconsistentXHeight())
114  return static_cast<float>(INT16_MAX);
115  return xht_hi[kNORM];
116  }
117 
120  int num_punc;
131  // Metrics clumped by position.
132  float xht_lo[kNumPos];
133  float xht_hi[kNumPos];
134  int16_t xht_count[kNumPos];
136  int16_t xht_sp;
137  int16_t xpos_entropy;
139 };
140 
141 } // namespace tesseract
142 
143 #endif // TESSERACT_WORDREC_LM_CONSISTENCY_H_
static const int kMaxEntropy
int16_t xht_count_punc[kNumPos]
LMConsistencyInfo(const LMConsistencyInfo *parent_info)
int64_t EDGE_REF
Definition: dawg.h:55
static const int kShiftThresh
XHeightConsistencyEnum
Definition: dict.h:75
XHeightConsistencyEnum xht_decision
void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc)