tesseract  4.0.0-1-g2a2b
intmatcher.h
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.h
3  ** Purpose: Interface to high level generic classifier routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 #ifndef INTMATCHER_H
19 #define INTMATCHER_H
20 
21 #include "params.h"
22 
23 // Character fragments could be present in the trained templaes
24 // but turned on/off on the language-by-language basis or depending
25 // on particular properties of the corpus (e.g. when we expect the
26 // images to have low exposure).
28  "Do not include character fragments in the"
29  " results of the classifier");
30 
32  "Integer Matcher Multiplier 0-255: ");
33 
34 
38 #include "intproto.h"
39 #include "cutoffs.h"
40 
41 namespace tesseract {
42 struct UnicharRating;
43 }
44 
46  CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
47 
48  float Rating;
50 };
51 
52 /*----------------------------------------------------------------------------
53  Variables
54 -----------------------------------------------------------------------------*/
55 
57  "Threshold for good protos during adaptive 0-255: ");
58 
60  "Threshold for good features during adaptive 0-255: ");
61 
66 #define SE_TABLE_BITS 9
67 #define SE_TABLE_SIZE 512
68 
73 
74  void Clear(const INT_CLASS class_template);
75  void ClearFeatureEvidence(const INT_CLASS class_template);
76  void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures,
77  int32_t used_features);
79  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, int16_t NumFeatures);
80 };
81 
82 
84  public:
85  // Integer Matcher Theta Fudge (0-255).
86  static const int kIntThetaFudge = 128;
87  // Bits in Similarity to Evidence Lookup (8-9).
88  static const int kEvidenceTableBits = 9;
89  // Integer Evidence Truncation Bits (8-14).
90  static const int kIntEvidenceTruncBits = 14;
91  // Similarity to Evidence Table Exponential Multiplier.
92  static const float kSEExponentialMultiplier;
93  // Center of Similarity Curve.
94  static const float kSimilarityCenter;
95 
96  IntegerMatcher(tesseract::IntParam *classify_debug_level);
97 
98  void Match(INT_CLASS ClassTemplate,
99  BIT_VECTOR ProtoMask,
100  BIT_VECTOR ConfigMask,
101  int16_t NumFeatures,
102  const INT_FEATURE_STRUCT* Features,
103  tesseract::UnicharRating* Result,
104  int AdaptFeatureThreshold,
105  int Debug,
106  bool SeparateDebugWindows);
107 
108  // Applies the CN normalization factor to the given rating and returns
109  // the modified rating.
110  float ApplyCNCorrection(float rating, int blob_length,
111  int normalization_factor, int matcher_multiplier);
112 
113  int FindGoodProtos(INT_CLASS ClassTemplate,
114  BIT_VECTOR ProtoMask,
115  BIT_VECTOR ConfigMask,
116  uint16_t BlobLength,
117  int16_t NumFeatures,
118  INT_FEATURE_ARRAY Features,
119  PROTO_ID *ProtoArray,
120  int AdaptProtoThreshold,
121  int Debug);
122 
123  int FindBadFeatures(INT_CLASS ClassTemplate,
124  BIT_VECTOR ProtoMask,
125  BIT_VECTOR ConfigMask,
126  uint16_t BlobLength,
127  int16_t NumFeatures,
128  INT_FEATURE_ARRAY Features,
129  FEATURE_ID *FeatureArray,
130  int AdaptFeatureThreshold,
131  int Debug);
132 
133  private:
134  int UpdateTablesForFeature(
135  INT_CLASS ClassTemplate,
136  BIT_VECTOR ProtoMask,
137  BIT_VECTOR ConfigMask,
138  int FeatureNum,
139  const INT_FEATURE_STRUCT* Feature,
140  ScratchEvidence *evidence,
141  int Debug);
142 
143  int FindBestMatch(INT_CLASS ClassTemplate,
144  const ScratchEvidence &tables,
145  tesseract::UnicharRating* Result);
146 
147 #ifndef GRAPHICS_DISABLED
148  void DebugFeatureProtoError(
149  INT_CLASS ClassTemplate,
150  BIT_VECTOR ProtoMask,
151  BIT_VECTOR ConfigMask,
152  const ScratchEvidence &tables,
153  int16_t NumFeatures,
154  int Debug);
155 
156  void DisplayProtoDebugInfo(
157  INT_CLASS ClassTemplate,
158  BIT_VECTOR ProtoMask,
159  BIT_VECTOR ConfigMask,
160  const ScratchEvidence &tables,
161  bool SeparateDebugWindows);
162 
163  void DisplayFeatureDebugInfo(
164  INT_CLASS ClassTemplate,
165  BIT_VECTOR ProtoMask,
166  BIT_VECTOR ConfigMask,
167  int16_t NumFeatures,
168  const INT_FEATURE_STRUCT* Features,
169  int AdaptFeatureThreshold,
170  int Debug,
171  bool SeparateDebugWindows);
172 #endif
173 
174  private:
175  tesseract::IntParam *classify_debug_level_;
176  uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
177  uint32_t evidence_table_mask_;
178  uint32_t mult_trunc_shift_bits_;
179  uint32_t table_trunc_shift_bits_;
180  uint32_t evidence_mult_mask_;
181 };
182 
186 void IMDebugConfiguration(INT_FEATURE FeatureNum,
187  uint16_t ActualProtoNum,
188  uint8_t Evidence,
189  BIT_VECTOR ConfigMask,
190  uint32_t ConfigWord);
191 
192 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
193  uint8_t *FeatureEvidence,
194  int32_t ConfigCount);
195 
196 void HeapSort (int n, int ra[], int rb[]);
197 
201 #endif
void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures, int32_t used_features)
#define INT_VAR_H(name, val, comment)
Definition: params.h:264
static const float kSEExponentialMultiplier
Definition: intmatcher.h:92
CLASS_ID Class
Definition: intmatcher.h:49
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:90
void IMDebugConfigurationSum(INT_FEATURE FeatureNum, uint8_t *FeatureEvidence, int32_t ConfigCount)
uint8_t FEATURE_ID
Definition: matchdefs.h:48
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:267
#define SE_TABLE_SIZE
Definition: intmatcher.h:67
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uint16_t BlobLength, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:549
static const float kSimilarityCenter
Definition: intmatcher.h:94
static const int kEvidenceTableBits
Definition: intmatcher.h:88
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, int16_t NumFeatures)
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:36
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:470
void HeapSort(int n, int ra[], int rb[])
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:700
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define MAX_NUM_PROTOS
Definition: intproto.h:48
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:707
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
#define FALSE
Definition: capi.h:52
void IMDebugConfiguration(INT_FEATURE FeatureNum, uint16_t ActualProtoNum, uint8_t Evidence, BIT_VECTOR ConfigMask, uint32_t ConfigWord)
#define MAX_PROTO_INDEX
Definition: intproto.h:44
IntegerMatcher(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:670
int classify_adapt_proto_thresh
uint8_t feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:70
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:150
int16_t PROTO_ID
Definition: matchdefs.h:42
static const int kIntThetaFudge
Definition: intmatcher.h:86
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uint16_t BlobLength, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:618
bool disable_character_fragments
int classify_adapt_feature_thresh
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:71
int classify_integer_matcher_multiplier
uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:72