tesseract  5.0.0-alpha-619-ge9db
intmatcher.h
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.h
3  ** Purpose: Interface to high level generic classifier routines.
4  ** Author: Robert Moss
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 #ifndef INTMATCHER_H
18 #define INTMATCHER_H
19 
20 #include "params.h"
21 
22 // Character fragments could be present in the trained templaes
23 // but turned on/off on the language-by-language basis or depending
24 // on particular properties of the corpus (e.g. when we expect the
25 // images to have low exposure).
27  "Do not include character fragments in the"
28  " results of the classifier");
29 
31  "Integer Matcher Multiplier 0-255: ");
32 
33 
37 #include "intproto.h"
38 
39 namespace tesseract {
40 struct UnicharRating;
41 }
42 
44  CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
45 
46  float Rating;
48 };
49 
50 
55 #define SE_TABLE_BITS 9
56 #define SE_TABLE_SIZE 512
57 
62 
63  void Clear(const INT_CLASS class_template);
64  void ClearFeatureEvidence(const INT_CLASS class_template);
65  void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures);
67  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask);
68 };
69 
70 
71 class IntegerMatcher {
72  public:
73  // Integer Matcher Theta Fudge (0-255).
74  static const int kIntThetaFudge = 128;
75  // Bits in Similarity to Evidence Lookup (8-9).
76  static const int kEvidenceTableBits = 9;
77  // Integer Evidence Truncation Bits (8-14).
78  static const int kIntEvidenceTruncBits = 14;
79  // Similarity to Evidence Table Exponential Multiplier.
80  static const float kSEExponentialMultiplier;
81  // Center of Similarity Curve.
82  static const float kSimilarityCenter;
83 
84  IntegerMatcher(tesseract::IntParam *classify_debug_level);
85 
86  void Match(INT_CLASS ClassTemplate,
87  BIT_VECTOR ProtoMask,
88  BIT_VECTOR ConfigMask,
89  int16_t NumFeatures,
90  const INT_FEATURE_STRUCT* Features,
92  int AdaptFeatureThreshold,
93  int Debug,
94  bool SeparateDebugWindows);
95 
96  // Applies the CN normalization factor to the given rating and returns
97  // the modified rating.
98  float ApplyCNCorrection(float rating, int blob_length,
99  int normalization_factor, int matcher_multiplier);
100 
101  int FindGoodProtos(INT_CLASS ClassTemplate,
102  BIT_VECTOR ProtoMask,
103  BIT_VECTOR ConfigMask,
104  int16_t NumFeatures,
105  INT_FEATURE_ARRAY Features,
106  PROTO_ID *ProtoArray,
107  int AdaptProtoThreshold,
108  int Debug);
109 
110  int FindBadFeatures(INT_CLASS ClassTemplate,
111  BIT_VECTOR ProtoMask,
112  BIT_VECTOR ConfigMask,
113  int16_t NumFeatures,
114  INT_FEATURE_ARRAY Features,
115  FEATURE_ID *FeatureArray,
116  int AdaptFeatureThreshold,
117  int Debug);
118 
119  private:
120  int UpdateTablesForFeature(
121  INT_CLASS ClassTemplate,
122  BIT_VECTOR ProtoMask,
123  BIT_VECTOR ConfigMask,
124  int FeatureNum,
125  const INT_FEATURE_STRUCT* Feature,
126  ScratchEvidence *evidence,
127  int Debug);
128 
129  int FindBestMatch(INT_CLASS ClassTemplate,
130  const ScratchEvidence &tables,
131  tesseract::UnicharRating* Result);
132 
133 #ifndef GRAPHICS_DISABLED
134  void DebugFeatureProtoError(
135  INT_CLASS ClassTemplate,
136  BIT_VECTOR ProtoMask,
137  BIT_VECTOR ConfigMask,
138  const ScratchEvidence &tables,
139  int16_t NumFeatures,
140  int Debug);
141 
142  void DisplayProtoDebugInfo(
143  INT_CLASS ClassTemplate,
144  BIT_VECTOR ConfigMask,
145  const ScratchEvidence &tables,
146  bool SeparateDebugWindows);
147 
148  void DisplayFeatureDebugInfo(
149  INT_CLASS ClassTemplate,
150  BIT_VECTOR ProtoMask,
151  BIT_VECTOR ConfigMask,
152  int16_t NumFeatures,
153  const INT_FEATURE_STRUCT* Features,
154  int AdaptFeatureThreshold,
155  int Debug,
156  bool SeparateDebugWindows);
157 #endif
158 
159  private:
160  tesseract::IntParam *classify_debug_level_;
161  uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
162  uint32_t evidence_table_mask_;
163  uint32_t mult_trunc_shift_bits_;
164  uint32_t table_trunc_shift_bits_;
165  uint32_t evidence_mult_mask_;
166 };
167 
168 #endif
IntegerMatcher::Match
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:510
CLASS_ID
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:33
ScratchEvidence::ClearFeatureEvidence
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:745
tesseract::IntParam
Definition: params.h:152
tesseract::UnicharRating
Definition: shapetable.h:40
ScratchEvidence
Definition: intmatcher.h:57
INT_CLASS_STRUCT
Definition: intproto.h:104
IntegerMatcher::ApplyCNCorrection
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
Definition: intmatcher.cpp:1223
params.h
IntegerMatcher::kEvidenceTableBits
static const int kEvidenceTableBits
Definition: intmatcher.h:75
PROTO_ID
int16_t PROTO_ID
Definition: matchdefs.h:39
CP_RESULT_STRUCT::Class
CLASS_ID Class
Definition: intmatcher.h:46
ScratchEvidence::feature_evidence_
uint8_t feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:58
IntegerMatcher::kIntEvidenceTruncBits
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:77
IntegerMatcher::FindGoodProtos
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:588
classify_integer_matcher_multiplier
int classify_integer_matcher_multiplier
ScratchEvidence::NormalizeSums
void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures)
Definition: intmatcher.cpp:1176
IntegerMatcher::IntegerMatcher
IntegerMatcher(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:708
CP_RESULT_STRUCT
Definition: intmatcher.h:42
IntegerMatcher::kSimilarityCenter
static const float kSimilarityCenter
Definition: intmatcher.h:81
CP_RESULT_STRUCT::Rating
float Rating
Definition: intmatcher.h:45
MAX_NUM_CONFIGS
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
MAX_NUM_PROTOS
#define MAX_NUM_PROTOS
Definition: intproto.h:47
ScratchEvidence::UpdateSumOfProtoEvidences
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask)
Definition: intmatcher.cpp:1133
MAX_PROTO_INDEX
#define MAX_PROTO_INDEX
Definition: intproto.h:43
IntegerMatcher
Definition: intmatcher.h:70
BIT_VECTOR
uint32_t * BIT_VECTOR
Definition: bitvec.h:27
tesseract
Definition: baseapi.h:65
SE_TABLE_SIZE
#define SE_TABLE_SIZE
Definition: intmatcher.h:55
ScratchEvidence::Clear
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:738
INT_VAR_H
#define INT_VAR_H(name, val, comment)
Definition: params.h:292
ScratchEvidence::sum_feature_evidence_
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:59
INT_FEATURE_ARRAY
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:151
CP_RESULT_STRUCT::CP_RESULT_STRUCT
CP_RESULT_STRUCT()
Definition: intmatcher.h:43
INT_FEATURE_STRUCT
Definition: intproto.h:131
BOOL_VAR_H
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:294
IntegerMatcher::FindBadFeatures
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:656
IntegerMatcher::kIntThetaFudge
static const int kIntThetaFudge
Definition: intmatcher.h:73
IntegerMatcher::kSEExponentialMultiplier
static const float kSEExponentialMultiplier
Definition: intmatcher.h:79
intproto.h
disable_character_fragments
bool disable_character_fragments
FEATURE_ID
uint8_t FEATURE_ID
Definition: matchdefs.h:45
ScratchEvidence::proto_evidence_
uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:60