tesseract  5.0.0-alpha-619-ge9db
lm_consistency.h
Go to the documentation of this file.
1 // File: lm_consistency.h
3 // Description: Struct for recording consistency of the paths representing
4 // OCR hypotheses.
5 // Author: Rika Antonova
6 //
7 // (C) Copyright 2012, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_WORDREC_LM_CONSISTENCY_H_
21 #define TESSERACT_WORDREC_LM_CONSISTENCY_H_
22 
23 #include <cstdint> // for INT16_MAX
24 #include "dawg.h" // for EDGE_REF, NO_EDGE
25 #include "dict.h" // for XH_GOOD, XH_INCONSISTENT, XHeightConsi...
26 
27 class BLOB_CHOICE;
28 
29 namespace tesseract {
30 
31 static const char * const XHeightConsistencyEnumName[] = {
32  "XH_GOOD",
33  "XH_SUBNORMAL",
34  "XH_INCONSISTENT",
35 };
36 
37 // Struct for keeping track of the consistency of the path.
40 
41  // How much do characters have to be shifted away from normal parameters
42  // before we say they're not normal?
43  static const int kShiftThresh = 1;
44 
45  // How much shifting from subscript to superscript and back
46  // before we declare shenanigans?
47  static const int kMaxEntropy = 1;
48 
49  // Script positions - order important for entropy calculation.
50  static const int kSUB = 0, kNORM = 1, kSUP = 2;
51  static const int kNumPos = 3;
52 
53  explicit LMConsistencyInfo(const LMConsistencyInfo* parent_info) {
54  if (parent_info == nullptr) {
55  // Initialize from scratch.
56  num_alphas = 0;
57  num_digits = 0;
58  num_punc = 0;
59  num_other = 0;
60  chartype = CT_NONE;
61  punc_ref = NO_EDGE;
62  invalid_punc = false;
64  num_lower = 0;
65  script_id = 0;
66  inconsistent_script = false;
68  inconsistent_font = false;
69  // Initialize XHeight stats.
70  for (int i = 0; i < kNumPos; i++) {
71  xht_count[i] = 0;
72  xht_count_punc[i] = 0;
73  xht_lo[i] = 0;
74  xht_hi[i] = 256; // kBlnCellHeight
75  }
76  xht_sp = -1; // This invalid value indicates that there was no parent.
77  xpos_entropy = 0;
79  } else {
80  // Copy parent info
81  *this = *parent_info;
82  }
83  }
84  inline int NumInconsistentPunc() const {
85  return invalid_punc ? num_punc : 0;
86  }
87  inline int NumInconsistentCase() const {
89  }
90  inline int NumInconsistentChartype() const {
91  return (NumInconsistentPunc() + num_other +
93  }
94  inline bool Consistent() const {
95  return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 &&
98  }
99  inline int NumInconsistentSpaces() const {
101  }
102  inline int InconsistentXHeight() const {
103  return xht_decision == XH_INCONSISTENT;
104  }
105  void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc);
106  float BodyMinXHeight() const {
107  if (InconsistentXHeight())
108  return 0.0f;
109  return xht_lo[kNORM];
110  }
111  float BodyMaxXHeight() const {
112  if (InconsistentXHeight())
113  return static_cast<float>(INT16_MAX);
114  return xht_hi[kNORM];
115  }
116 
120  int num_punc;
128  // Metrics clumped by position.
129  float xht_lo[kNumPos];
130  float xht_hi[kNumPos];
131  int16_t xht_count[kNumPos];
133  int16_t xht_sp;
134  int16_t xpos_entropy;
138 };
139 
140 } // namespace tesseract
141 
142 #endif // TESSERACT_WORDREC_LM_CONSISTENCY_H_
tesseract::LMConsistencyInfo::kNORM
static const int kNORM
Definition: lm_consistency.h:50
tesseract::LMConsistencyInfo::chartype
ChartypeEnum chartype
Definition: lm_consistency.h:122
dict.h
tesseract::LMConsistencyInfo::kShiftThresh
static const int kShiftThresh
Definition: lm_consistency.h:43
tesseract::LMConsistencyInfo::NumInconsistentSpaces
int NumInconsistentSpaces() const
Definition: lm_consistency.h:99
tesseract::LMConsistencyInfo::xht_count_punc
int16_t xht_count_punc[kNumPos]
Definition: lm_consistency.h:132
tesseract::LMConsistencyInfo::inconsistent_script
bool inconsistent_script
Definition: lm_consistency.h:136
tesseract::XH_GOOD
Definition: dict.h:78
tesseract::LMConsistencyInfo::xht_sp
int16_t xht_sp
Definition: lm_consistency.h:133
tesseract::LMConsistencyInfo::Consistent
bool Consistent() const
Definition: lm_consistency.h:94
tesseract::LMConsistencyInfo::LMConsistencyInfo
LMConsistencyInfo(const LMConsistencyInfo *parent_info)
Definition: lm_consistency.h:53
tesseract::LMConsistencyInfo::CT_ALPHA
Definition: lm_consistency.h:39
tesseract::XHeightConsistencyEnum
XHeightConsistencyEnum
Definition: dict.h:78
tesseract::LMConsistencyInfo::num_punc
int num_punc
Definition: lm_consistency.h:120
tesseract::LMConsistencyInfo::BodyMinXHeight
float BodyMinXHeight() const
Definition: lm_consistency.h:106
tesseract::LMConsistencyInfo::xht_decision
XHeightConsistencyEnum xht_decision
Definition: lm_consistency.h:123
tesseract::LMConsistencyInfo::kNumPos
static const int kNumPos
Definition: lm_consistency.h:51
tesseract::LMConsistencyInfo::kMaxEntropy
static const int kMaxEntropy
Definition: lm_consistency.h:47
tesseract::LMConsistencyInfo::xht_hi
float xht_hi[kNumPos]
Definition: lm_consistency.h:130
dawg.h
tesseract::LMConsistencyInfo::NumInconsistentCase
int NumInconsistentCase() const
Definition: lm_consistency.h:87
tesseract::LMConsistencyInfo::num_lower
int num_lower
Definition: lm_consistency.h:125
tesseract::LMConsistencyInfo::xht_count
int16_t xht_count[kNumPos]
Definition: lm_consistency.h:131
tesseract::LMConsistencyInfo::kSUB
static const int kSUB
Definition: lm_consistency.h:50
tesseract::LMConsistencyInfo::punc_ref
EDGE_REF punc_ref
Definition: lm_consistency.h:117
tesseract::XH_INCONSISTENT
Definition: dict.h:78
tesseract::LMConsistencyInfo::xpos_entropy
int16_t xpos_entropy
Definition: lm_consistency.h:134
tesseract::LMConsistencyInfo::num_inconsistent_spaces
int num_inconsistent_spaces
Definition: lm_consistency.h:127
tesseract::LMConsistencyInfo::xht_lo
float xht_lo[kNumPos]
Definition: lm_consistency.h:129
tesseract
Definition: baseapi.h:65
tesseract::LMConsistencyInfo::num_digits
int num_digits
Definition: lm_consistency.h:119
tesseract::LMConsistencyInfo::script_id
int script_id
Definition: lm_consistency.h:126
tesseract::LMConsistencyInfo
Definition: lm_consistency.h:38
BLOB_CHOICE
Definition: ratngs.h:49
EDGE_REF
int64_t EDGE_REF
Definition: dawg.h:49
tesseract::LMConsistencyInfo::CT_DIGIT
Definition: lm_consistency.h:39
tesseract::LMConsistencyInfo::num_other
int num_other
Definition: lm_consistency.h:121
tesseract::LMConsistencyInfo::num_non_first_upper
int num_non_first_upper
Definition: lm_consistency.h:124
tesseract::LMConsistencyInfo::invalid_punc
bool invalid_punc
Definition: lm_consistency.h:135
tesseract::LMConsistencyInfo::BodyMaxXHeight
float BodyMaxXHeight() const
Definition: lm_consistency.h:111
tesseract::LMConsistencyInfo::CT_OTHER
Definition: lm_consistency.h:39
tesseract::LMConsistencyInfo::kSUP
static const int kSUP
Definition: lm_consistency.h:50
tesseract::LMConsistencyInfo::ComputeXheightConsistency
void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc)
Definition: lm_consistency.cpp:29
tesseract::LMConsistencyInfo::CT_NONE
Definition: lm_consistency.h:39
tesseract::LMConsistencyInfo::InconsistentXHeight
int InconsistentXHeight() const
Definition: lm_consistency.h:102
tesseract::LMConsistencyInfo::ChartypeEnum
ChartypeEnum
Definition: lm_consistency.h:39
tesseract::LMConsistencyInfo::num_alphas
int num_alphas
Definition: lm_consistency.h:118
tesseract::LMConsistencyInfo::NumInconsistentChartype
int NumInconsistentChartype() const
Definition: lm_consistency.h:90
tesseract::LMConsistencyInfo::inconsistent_font
bool inconsistent_font
Definition: lm_consistency.h:137
tesseract::LMConsistencyInfo::NumInconsistentPunc
int NumInconsistentPunc() const
Definition: lm_consistency.h:84