tesseract  5.0.0-alpha-619-ge9db
intmatcher.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.cpp
3  ** Purpose: Generic high level classification routines.
4  ** Author: Robert Moss
5  ** (c) Copyright Hewlett-Packard Company, 1988.
6  ** Licensed under the Apache License, Version 2.0 (the "License");
7  ** you may not use this file except in compliance with the License.
8  ** You may obtain a copy of the License at
9  ** http://www.apache.org/licenses/LICENSE-2.0
10  ** Unless required by applicable law or agreed to in writing, software
11  ** distributed under the License is distributed on an "AS IS" BASIS,
12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  ** See the License for the specific language governing permissions and
14  ** limitations under the License.
15  ******************************************************************************/
16 
17 // Include automatically generated configuration file if running autoconf.
18 #ifdef HAVE_CONFIG_H
19 #include "config_auto.h"
20 #endif
21 
22 /*----------------------------------------------------------------------------
23  Include Files and Type Defines
24 ----------------------------------------------------------------------------*/
25 #include "intmatcher.h"
26 
27 #include <cassert>
28 #include <cmath>
29 #include "fontinfo.h"
30 #include "intproto.h"
31 #include "callcpp.h"
32 #include "scrollview.h"
33 #include "float2int.h"
34 #include <tesseract/helpers.h>
35 #include "classify.h"
36 #include "shapetable.h"
37 
40 
41 /*----------------------------------------------------------------------------
42  Global Data Definitions and Declarations
43 ----------------------------------------------------------------------------*/
44 // Parameters of the sigmoid used to convert similarity to evidence in the
45 // similarity_evidence_table_ that is used to convert distance metric to an
46 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
48 const float IntegerMatcher::kSimilarityCenter = 0.0075f;
49 
50 static const uint8_t offset_table[] = {
51  255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
52  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
53  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,
54  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,
55  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
56  0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
57  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,
58  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6,
59  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
60  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
61  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
62 };
63 
64 static const uint8_t next_table[] = {
65  0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e,
66  0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a,
67  0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26,
68  0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32,
69  0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e,
70  0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a,
71  0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56,
72  0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62,
73  0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e,
74  0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a,
75  0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86,
76  0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92,
77  0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e,
78  0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa,
79  0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6,
80  0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2,
81  0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce,
82  0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda,
83  0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6,
84  0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2,
85  0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
86 };
87 
88 // See http://b/19318793 (#6) for a complete discussion.
89 
90 namespace tesseract {
91 
100 static void
101 HeapSort (int n, int ra[], int rb[]) {
102  int i, rra, rrb;
103  int l, j, ir;
104 
105  l = (n >> 1) + 1;
106  ir = n;
107  for (;;) {
108  if (l > 1) {
109  rra = ra[--l];
110  rrb = rb[l];
111  }
112  else {
113  rra = ra[ir];
114  rrb = rb[ir];
115  ra[ir] = ra[1];
116  rb[ir] = rb[1];
117  if (--ir == 1) {
118  ra[1] = rra;
119  rb[1] = rrb;
120  return;
121  }
122  }
123  i = l;
124  j = l << 1;
125  while (j <= ir) {
126  if (j < ir && ra[j] < ra[j + 1])
127  ++j;
128  if (rra < ra[j]) {
129  ra[i] = ra[j];
130  rb[i] = rb[j];
131  j += (i = j);
132  }
133  else
134  j = ir + 1;
135  }
136  ra[i] = rra;
137  rb[i] = rrb;
138  }
139 }
140 
141 // Encapsulation of the intermediate data and computations made by the class
142 // pruner. The class pruner implements a simple linear classifier on binary
143 // features by heavily quantizing the feature space, and applying
144 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
145 // weights is compensated by a non-constant bias that is dependent on the
146 // number of features present.
147 class ClassPruner {
148  public:
149  ClassPruner(int max_classes) {
150  // The unrolled loop in ComputeScores means that the array sizes need to
151  // be rounded up so that the array is big enough to accommodate the extra
152  // entries accessed by the unrolling. Each pruner word is of sized
153  // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
154  // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
155  // See ComputeScores.
156  max_classes_ = max_classes;
157  rounded_classes_ = RoundUp(
159  class_count_ = new int[rounded_classes_];
160  norm_count_ = new int[rounded_classes_];
161  sort_key_ = new int[rounded_classes_ + 1];
162  sort_index_ = new int[rounded_classes_ + 1];
163  for (int i = 0; i < rounded_classes_; i++) {
164  class_count_[i] = 0;
165  }
166  pruning_threshold_ = 0;
167  num_features_ = 0;
168  num_classes_ = 0;
169  }
170 
171  ~ClassPruner() {
172  delete []class_count_;
173  delete []norm_count_;
174  delete []sort_key_;
175  delete []sort_index_;
176  }
177 
180  void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
181  int num_features, const INT_FEATURE_STRUCT* features) {
182  num_features_ = num_features;
183  int num_pruners = int_templates->NumClassPruners;
184  for (int f = 0; f < num_features; ++f) {
185  const INT_FEATURE_STRUCT* feature = &features[f];
186  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
187  int x = feature->X * NUM_CP_BUCKETS >> 8;
188  int y = feature->Y * NUM_CP_BUCKETS >> 8;
189  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
190  int class_id = 0;
191  // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
192  // we need a collection of them, indexed by pruner_set.
193  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
194  // Look up quantized feature in a 3-D array, an array of weights for
195  // each class.
196  const uint32_t* pruner_word_ptr =
197  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
198  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
199  uint32_t pruner_word = *pruner_word_ptr++;
200  // This inner loop is unrolled to speed up the ClassPruner.
201  // Currently gcc would not unroll it unless it is set to O3
202  // level of optimization or -funroll-loops is specified.
203  /*
204  uint32_t class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
205  for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
206  class_count_[class_id++] += pruner_word & class_mask;
207  pruner_word >>= NUM_BITS_PER_CLASS;
208  }
209  */
210  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
211  pruner_word >>= NUM_BITS_PER_CLASS;
212  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
213  pruner_word >>= NUM_BITS_PER_CLASS;
214  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
215  pruner_word >>= NUM_BITS_PER_CLASS;
216  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
217  pruner_word >>= NUM_BITS_PER_CLASS;
218  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
219  pruner_word >>= NUM_BITS_PER_CLASS;
220  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
221  pruner_word >>= NUM_BITS_PER_CLASS;
222  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
223  pruner_word >>= NUM_BITS_PER_CLASS;
224  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
225  pruner_word >>= NUM_BITS_PER_CLASS;
226  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
227  pruner_word >>= NUM_BITS_PER_CLASS;
228  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
229  pruner_word >>= NUM_BITS_PER_CLASS;
230  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
231  pruner_word >>= NUM_BITS_PER_CLASS;
232  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
233  pruner_word >>= NUM_BITS_PER_CLASS;
234  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
235  pruner_word >>= NUM_BITS_PER_CLASS;
236  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
237  pruner_word >>= NUM_BITS_PER_CLASS;
238  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
239  pruner_word >>= NUM_BITS_PER_CLASS;
240  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
241  }
242  }
243  }
244  }
245 
251  void AdjustForExpectedNumFeatures(const uint16_t* expected_num_features,
252  int cutoff_strength) {
253  for (int class_id = 0; class_id < max_classes_; ++class_id) {
254  if (num_features_ < expected_num_features[class_id]) {
255  int deficit = expected_num_features[class_id] - num_features_;
256  class_count_[class_id] -= class_count_[class_id] * deficit /
257  (num_features_ * cutoff_strength + deficit);
258  }
259  }
260  }
261 
264  void DisableDisabledClasses(const UNICHARSET& unicharset) {
265  for (int class_id = 0; class_id < max_classes_; ++class_id) {
266  if (!unicharset.get_enabled(class_id))
267  class_count_[class_id] = 0; // This char is disabled!
268  }
269  }
270 
272  void DisableFragments(const UNICHARSET& unicharset) {
273  for (int class_id = 0; class_id < max_classes_; ++class_id) {
274  // Do not include character fragments in the class pruner
275  // results if disable_character_fragments is true.
276  if (unicharset.get_fragment(class_id)) {
277  class_count_[class_id] = 0;
278  }
279  }
280  }
281 
286  void NormalizeForXheight(int norm_multiplier,
287  const uint8_t* normalization_factors) {
288  for (int class_id = 0; class_id < max_classes_; class_id++) {
289  norm_count_[class_id] = class_count_[class_id] -
290  ((norm_multiplier * normalization_factors[class_id]) >> 8);
291  }
292  }
293 
295  void NoNormalization() {
296  for (int class_id = 0; class_id < max_classes_; class_id++) {
297  norm_count_[class_id] = class_count_[class_id];
298  }
299  }
300 
304  void PruneAndSort(int pruning_factor, int keep_this,
305  bool max_of_non_fragments, const UNICHARSET& unicharset) {
306  int max_count = 0;
307  for (int c = 0; c < max_classes_; ++c) {
308  if (norm_count_[c] > max_count &&
309  // This additional check is added in order to ensure that
310  // the classifier will return at least one non-fragmented
311  // character match.
312  // TODO(daria): verify that this helps accuracy and does not
313  // hurt performance.
314  (!max_of_non_fragments || !unicharset.get_fragment(c))) {
315  max_count = norm_count_[c];
316  }
317  }
318  // Prune Classes.
319  pruning_threshold_ = (max_count * pruning_factor) >> 8;
320  // Select Classes.
321  if (pruning_threshold_ < 1)
322  pruning_threshold_ = 1;
323  num_classes_ = 0;
324  for (int class_id = 0; class_id < max_classes_; class_id++) {
325  if (norm_count_[class_id] >= pruning_threshold_ ||
326  class_id == keep_this) {
327  ++num_classes_;
328  sort_index_[num_classes_] = class_id;
329  sort_key_[num_classes_] = norm_count_[class_id];
330  }
331  }
332 
333  // Sort Classes using Heapsort Algorithm.
334  if (num_classes_ > 1)
335  HeapSort(num_classes_, sort_key_, sort_index_);
336  }
337 
340  void DebugMatch(const Classify& classify,
341  const INT_TEMPLATES_STRUCT* int_templates,
342  const INT_FEATURE_STRUCT* features) const {
343  int num_pruners = int_templates->NumClassPruners;
344  int max_num_classes = int_templates->NumClasses;
345  for (int f = 0; f < num_features_; ++f) {
346  const INT_FEATURE_STRUCT* feature = &features[f];
347  tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
348  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
349  int x = feature->X * NUM_CP_BUCKETS >> 8;
350  int y = feature->Y * NUM_CP_BUCKETS >> 8;
351  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
352  int class_id = 0;
353  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
354  // Look up quantized feature in a 3-D array, an array of weights for
355  // each class.
356  const uint32_t* pruner_word_ptr =
357  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
358  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
359  uint32_t pruner_word = *pruner_word_ptr++;
360  for (int word_class = 0; word_class < 16 &&
361  class_id < max_num_classes; ++word_class, ++class_id) {
362  if (norm_count_[class_id] >= pruning_threshold_) {
363  tprintf(" %s=%d,",
364  classify.ClassIDToDebugStr(int_templates,
365  class_id, 0).c_str(),
366  pruner_word & CLASS_PRUNER_CLASS_MASK);
367  }
368  pruner_word >>= NUM_BITS_PER_CLASS;
369  }
370  }
371  tprintf("\n");
372  }
373  }
374  }
375 
377  void SummarizeResult(const Classify& classify,
378  const INT_TEMPLATES_STRUCT* int_templates,
379  const uint16_t* expected_num_features,
380  int norm_multiplier,
381  const uint8_t* normalization_factors) const {
382  tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
383  for (int i = 0; i < num_classes_; ++i) {
384  int class_id = sort_index_[num_classes_ - i];
385  STRING class_string = classify.ClassIDToDebugStr(int_templates,
386  class_id, 0);
387  tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
388  class_string.c_str(),
389  class_count_[class_id],
390  expected_num_features[class_id],
391  (norm_multiplier * normalization_factors[class_id]) >> 8,
392  sort_key_[num_classes_ - i],
393  100.0 - 100.0 * sort_key_[num_classes_ - i] /
394  (CLASS_PRUNER_CLASS_MASK * num_features_));
395  }
396  }
397 
400  int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const {
401  CP_RESULT_STRUCT empty;
402  results->init_to_size(num_classes_, empty);
403  for (int c = 0; c < num_classes_; ++c) {
404  (*results)[c].Class = sort_index_[num_classes_ - c];
405  (*results)[c].Rating = 1.0f - sort_key_[num_classes_ - c] /
406  (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
407  }
408  return num_classes_;
409  }
410 
411  private:
413  int *class_count_;
417  int *norm_count_;
419  int *sort_key_;
421  int *sort_index_;
423  int max_classes_;
425  int rounded_classes_;
427  int pruning_threshold_;
429  int num_features_;
431  int num_classes_;
432 };
433 
434 /*----------------------------------------------------------------------------
435  Public Code
436 ----------------------------------------------------------------------------*/
452 int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
453  int num_features, int keep_this,
454  const INT_FEATURE_STRUCT* features,
455  const uint8_t* normalization_factors,
456  const uint16_t* expected_num_features,
458  ClassPruner pruner(int_templates->NumClasses);
459  // Compute initial match scores for all classes.
460  pruner.ComputeScores(int_templates, num_features, features);
461  // Adjust match scores for number of expected features.
462  pruner.AdjustForExpectedNumFeatures(expected_num_features,
464  // Apply disabled classes in unicharset - only works without a shape_table.
465  if (shape_table_ == nullptr)
466  pruner.DisableDisabledClasses(unicharset);
467  // If fragments are disabled, remove them, also only without a shape table.
468  if (disable_character_fragments && shape_table_ == nullptr)
469  pruner.DisableFragments(unicharset);
470 
471  // If we have good x-heights, apply the given normalization factors.
472  if (normalization_factors != nullptr) {
473  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
474  normalization_factors);
475  } else {
476  pruner.NoNormalization();
477  }
478  // Do the actual pruning and sort the short-list.
479  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
480  shape_table_ == nullptr, unicharset);
481 
482  if (classify_debug_level > 2) {
483  pruner.DebugMatch(*this, int_templates, features);
484  }
485  if (classify_debug_level > 1) {
486  pruner.SummarizeResult(*this, int_templates, expected_num_features,
488  normalization_factors);
489  }
490  // Convert to the expected output format.
491  return pruner.SetupResults(results);
492 }
493 
494 } // namespace tesseract
495 
511 void IntegerMatcher::Match(INT_CLASS ClassTemplate,
512  BIT_VECTOR ProtoMask,
513  BIT_VECTOR ConfigMask,
514  int16_t NumFeatures,
515  const INT_FEATURE_STRUCT* Features,
516  UnicharRating* Result,
517  int AdaptFeatureThreshold,
518  int Debug,
519  bool SeparateDebugWindows) {
520  auto *tables = new ScratchEvidence();
521  int Feature;
522 
523  if (MatchDebuggingOn (Debug))
524  cprintf ("Integer Matcher -------------------------------------------\n");
525 
526  tables->Clear(ClassTemplate);
527  Result->feature_misses = 0;
528 
529  for (Feature = 0; Feature < NumFeatures; Feature++) {
530  int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
531  Feature, &Features[Feature],
532  tables, Debug);
533  // Count features that were missed over all configs.
534  if (csum == 0)
535  ++Result->feature_misses;
536  }
537 
538 #ifndef GRAPHICS_DISABLED
539  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
540  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
541  NumFeatures, Debug);
542  }
543 
544  if (DisplayProtoMatchesOn(Debug)) {
545  DisplayProtoDebugInfo(ClassTemplate, ConfigMask,
546  *tables, SeparateDebugWindows);
547  }
548 
549  if (DisplayFeatureMatchesOn(Debug)) {
550  DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
551  Features, AdaptFeatureThreshold, Debug,
552  SeparateDebugWindows);
553  }
554 #endif
555 
556  tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask);
557  tables->NormalizeSums(ClassTemplate, NumFeatures);
558 
559  FindBestMatch(ClassTemplate, *tables, Result);
560 
561 #ifndef GRAPHICS_DISABLED
562  if (PrintMatchSummaryOn(Debug))
563  Result->Print();
564 
565  if (MatchDebuggingOn(Debug))
566  cprintf("Match Complete --------------------------------------------\n");
567 #endif
568 
569  delete tables;
570 }
571 
590  INT_CLASS ClassTemplate,
591  BIT_VECTOR ProtoMask,
592  BIT_VECTOR ConfigMask,
593  int16_t NumFeatures,
594  INT_FEATURE_ARRAY Features,
595  PROTO_ID *ProtoArray,
596  int AdaptProtoThreshold,
597  int Debug) {
598  auto *tables = new ScratchEvidence();
599  int NumGoodProtos = 0;
600 
601  /* DEBUG opening heading */
602  if (MatchDebuggingOn (Debug))
603  cprintf
604  ("Find Good Protos -------------------------------------------\n");
605 
606  tables->Clear(ClassTemplate);
607 
608  for (int Feature = 0; Feature < NumFeatures; Feature++)
609  UpdateTablesForFeature(
610  ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
611  tables, Debug);
612 
613 #ifndef GRAPHICS_DISABLED
614  if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
615  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
616  NumFeatures, Debug);
617 #endif
618 
619  /* Average Proto Evidences & Find Good Protos */
620  for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
621  /* Compute Average for Actual Proto */
622  int Temp = 0;
623  for (uint8_t i = 0;
624  i < MAX_PROTO_INDEX && i < ClassTemplate->ProtoLengths[proto]; i++)
625  Temp += tables->proto_evidence_[proto][i];
626 
627  Temp /= ClassTemplate->ProtoLengths[proto];
628 
629  /* Find Good Protos */
630  if (Temp >= AdaptProtoThreshold) {
631  *ProtoArray = proto;
632  ProtoArray++;
633  NumGoodProtos++;
634  }
635  }
636 
637  if (MatchDebuggingOn (Debug))
638  cprintf ("Match Complete --------------------------------------------\n");
639  delete tables;
640 
641  return NumGoodProtos;
642 }
643 
658  INT_CLASS ClassTemplate,
659  BIT_VECTOR ProtoMask,
660  BIT_VECTOR ConfigMask,
661  int16_t NumFeatures,
662  INT_FEATURE_ARRAY Features,
663  FEATURE_ID *FeatureArray,
664  int AdaptFeatureThreshold,
665  int Debug) {
666  auto *tables = new ScratchEvidence();
667  int NumBadFeatures = 0;
668 
669  /* DEBUG opening heading */
670  if (MatchDebuggingOn(Debug))
671  cprintf("Find Bad Features -------------------------------------------\n");
672 
673  tables->Clear(ClassTemplate);
674 
675  for (int Feature = 0; Feature < NumFeatures; Feature++) {
676  UpdateTablesForFeature(
677  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
678  tables, Debug);
679 
680  /* Find Best Evidence for Current Feature */
681  int best = 0;
682  assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
683  for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++)
684  if (tables->feature_evidence_[i] > best)
685  best = tables->feature_evidence_[i];
686 
687  /* Find Bad Features */
688  if (best < AdaptFeatureThreshold) {
689  *FeatureArray = Feature;
690  FeatureArray++;
691  NumBadFeatures++;
692  }
693  }
694 
695 #ifndef GRAPHICS_DISABLED
696  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
697  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
698  NumFeatures, Debug);
699 #endif
700 
701  if (MatchDebuggingOn(Debug))
702  cprintf("Match Complete --------------------------------------------\n");
703 
704  delete tables;
705  return NumBadFeatures;
706 }
707 
708 
710  : classify_debug_level_(classify_debug_level)
711 {
712  /* Initialize table for evidence to similarity lookup */
713  for (int i = 0; i < SE_TABLE_SIZE; i++) {
714  uint32_t IntSimilarity = i << (27 - SE_TABLE_BITS);
715  double Similarity = (static_cast<double>(IntSimilarity)) / 65536.0 / 65536.0;
716  double evidence = Similarity / kSimilarityCenter;
717  evidence = 255.0 / (evidence * evidence + 1.0);
718 
719  if (kSEExponentialMultiplier > 0.0) {
720  double scale = 1.0 - exp(-kSEExponentialMultiplier) *
721  exp(kSEExponentialMultiplier * (static_cast<double>(i) / SE_TABLE_SIZE));
722  evidence *= ClipToRange(scale, 0.0, 1.0);
723  }
724 
725  similarity_evidence_table_[i] = static_cast<uint8_t>(evidence + 0.5);
726  }
727 
728  /* Initialize evidence computation variables */
729  evidence_table_mask_ =
730  ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
731  mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
732  table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
733  evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
734 }
735 
736 /*----------------------------------------------------------------------------
737  Private Code
738 ----------------------------------------------------------------------------*/
739 void ScratchEvidence::Clear(const INT_CLASS class_template) {
740  memset(sum_feature_evidence_, 0,
741  class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
742  memset(proto_evidence_, 0,
743  class_template->NumProtos * sizeof(proto_evidence_[0]));
744 }
745 
746 void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
747  memset(feature_evidence_, 0,
748  class_template->NumConfigs * sizeof(feature_evidence_[0]));
749 }
750 
754 static void IMDebugConfiguration(int FeatureNum, uint16_t ActualProtoNum,
755  uint8_t Evidence, uint32_t ConfigWord) {
756  cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
757  FeatureNum, static_cast<int>(ActualProtoNum), static_cast<int>(Evidence));
758  while (ConfigWord) {
759  if (ConfigWord & 1)
760  cprintf ("1");
761  else
762  cprintf ("0");
763  ConfigWord >>= 1;
764  }
765  cprintf ("\n");
766 }
767 
771 static void IMDebugConfigurationSum(int FeatureNum, uint8_t *FeatureEvidence,
772  int32_t ConfigCount) {
773  cprintf("F=%3d, C=", FeatureNum);
774  for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
775  cprintf("%4d", FeatureEvidence[ConfigNum]);
776  }
777  cprintf("\n");
778 }
779 
791 int IntegerMatcher::UpdateTablesForFeature(
792  INT_CLASS ClassTemplate,
793  BIT_VECTOR ProtoMask,
794  BIT_VECTOR ConfigMask,
795  int FeatureNum,
796  const INT_FEATURE_STRUCT* Feature,
797  ScratchEvidence *tables,
798  int Debug) {
799  uint32_t ConfigWord;
800  uint32_t ProtoWord;
801  uint32_t ProtoNum;
802  uint32_t ActualProtoNum;
803  uint8_t proto_byte;
804  int32_t proto_word_offset;
805  int32_t proto_offset;
806  PROTO_SET ProtoSet;
807  uint32_t *ProtoPrunerPtr;
808  INT_PROTO Proto;
809  int ProtoSetIndex;
810  uint8_t Evidence;
811  uint32_t XFeatureAddress;
812  uint32_t YFeatureAddress;
813  uint32_t ThetaFeatureAddress;
814 
815  tables->ClearFeatureEvidence(ClassTemplate);
816 
817  /* Precompute Feature Address offset for Proto Pruning */
818  XFeatureAddress = ((Feature->X >> 2) << 1);
819  YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
820  ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
821 
822  for (ProtoSetIndex = 0, ActualProtoNum = 0;
823  ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
824  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
825  ProtoPrunerPtr = reinterpret_cast<uint32_t *>((*ProtoSet).ProtoPruner);
826  for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
827  ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
828  (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
829  /* Prune Protos of current Proto Set */
830  ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
831  ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
832  ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
833  ProtoWord &= *ProtoMask;
834 
835  if (ProtoWord != 0) {
836  proto_byte = ProtoWord & 0xff;
837  ProtoWord >>= 8;
838  proto_word_offset = 0;
839  while (ProtoWord != 0 || proto_byte != 0) {
840  while (proto_byte == 0) {
841  proto_byte = ProtoWord & 0xff;
842  ProtoWord >>= 8;
843  proto_word_offset += 8;
844  }
845  proto_offset = offset_table[proto_byte] + proto_word_offset;
846  proto_byte = next_table[proto_byte];
847  Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
848  ConfigWord = Proto->Configs[0];
849  int32_t A3 = (((Proto->A * (Feature->X - 128)) * 2)
850  - (Proto->B * (Feature->Y - 128)) + (Proto->C * 512));
851  int32_t M3 = ((static_cast<int8_t>(Feature->Theta - Proto->Angle)) *
852  kIntThetaFudge) * 2;
853 
854  if (A3 < 0)
855  A3 = ~A3;
856  if (M3 < 0)
857  M3 = ~M3;
858  A3 >>= mult_trunc_shift_bits_;
859  M3 >>= mult_trunc_shift_bits_;
860  if (static_cast<uint32_t>(A3) > evidence_mult_mask_)
861  A3 = evidence_mult_mask_;
862  if (static_cast<uint32_t>(M3) > evidence_mult_mask_)
863  M3 = evidence_mult_mask_;
864 
865  uint32_t A4 = (A3 * A3) + (M3 * M3);
866  A4 >>= table_trunc_shift_bits_;
867  if (A4 > evidence_table_mask_)
868  Evidence = 0;
869  else
870  Evidence = similarity_evidence_table_[A4];
871 
872  if (PrintFeatureMatchesOn (Debug))
873  IMDebugConfiguration (FeatureNum,
874  ActualProtoNum + proto_offset,
875  Evidence, ConfigWord);
876 
877  ConfigWord &= *ConfigMask;
878 
879  uint8_t feature_evidence_index = 0;
880  uint8_t config_byte = 0;
881  while (ConfigWord != 0 || config_byte != 0) {
882  while (config_byte == 0) {
883  config_byte = ConfigWord & 0xff;
884  ConfigWord >>= 8;
885  feature_evidence_index += 8;
886  }
887  const uint8_t config_offset =
888  offset_table[config_byte] + feature_evidence_index - 8;
889  config_byte = next_table[config_byte];
890  if (Evidence > tables->feature_evidence_[config_offset])
891  tables->feature_evidence_[config_offset] = Evidence;
892  }
893 
894  uint8_t* UINT8Pointer =
895  &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
896  for (uint8_t ProtoIndex =
897  ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
898  ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
899  if (Evidence > *UINT8Pointer) {
900  uint8_t Temp = *UINT8Pointer;
901  *UINT8Pointer = Evidence;
902  Evidence = Temp;
903  }
904  else if (Evidence == 0)
905  break;
906  }
907  }
908  }
909  }
910  }
911 
912  if (PrintFeatureMatchesOn(Debug)) {
913  IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
914  ClassTemplate->NumConfigs);
915  }
916 
917  int* IntPointer = tables->sum_feature_evidence_;
918  uint8_t* UINT8Pointer = tables->feature_evidence_;
919  int SumOverConfigs = 0;
920  for (int ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
921  int evidence = *UINT8Pointer++;
922  SumOverConfigs += evidence;
923  *IntPointer++ += evidence;
924  }
925  return SumOverConfigs;
926 }
927 
931 #ifndef GRAPHICS_DISABLED
932 void IntegerMatcher::DebugFeatureProtoError(
933  INT_CLASS ClassTemplate,
934  BIT_VECTOR ProtoMask,
935  BIT_VECTOR ConfigMask,
936  const ScratchEvidence& tables,
937  int16_t NumFeatures,
938  int Debug) {
939  float ProtoConfigs[MAX_NUM_CONFIGS];
940  int ConfigNum;
941  uint32_t ConfigWord;
942  int ProtoSetIndex;
943  uint16_t ProtoNum;
944  uint8_t ProtoWordNum;
945  PROTO_SET ProtoSet;
946  uint16_t ActualProtoNum;
947 
948  if (PrintMatchSummaryOn(Debug)) {
949  cprintf("Configuration Mask:\n");
950  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
951  cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
952  cprintf("\n");
953 
954  cprintf("Feature Error for Configurations:\n");
955  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
956  cprintf(
957  " %5.1f",
958  100.0 * (1.0 - static_cast<float>(tables.sum_feature_evidence_[ConfigNum])
959  / NumFeatures / 256.0));
960  }
961  cprintf("\n\n\n");
962  }
963 
964  if (PrintMatchSummaryOn (Debug)) {
965  cprintf ("Proto Mask:\n");
966  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
967  ProtoSetIndex++) {
968  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
969  for (ProtoWordNum = 0; ProtoWordNum < 2;
970  ProtoWordNum++, ProtoMask++) {
971  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
972  for (ProtoNum = 0;
973  ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
974  && (ActualProtoNum < ClassTemplate->NumProtos));
975  ProtoNum++, ActualProtoNum++)
976  cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
977  cprintf ("\n");
978  }
979  }
980  cprintf ("\n");
981  }
982 
983  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
984  ProtoConfigs[i] = 0;
985 
986  if (PrintProtoMatchesOn (Debug)) {
987  cprintf ("Proto Evidence:\n");
988  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
989  ProtoSetIndex++) {
990  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
991  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
992  for (ProtoNum = 0;
993  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
994  (ActualProtoNum < ClassTemplate->NumProtos));
995  ProtoNum++, ActualProtoNum++) {
996  cprintf ("P %3d =", ActualProtoNum);
997  int temp = 0;
998  for (uint8_t j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
999  uint8_t data = tables.proto_evidence_[ActualProtoNum][j];
1000  cprintf(" %d", data);
1001  temp += data;
1002  }
1003 
1004  cprintf(" = %6.4f%%\n",
1005  temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
1006 
1007  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1008  ConfigNum = 0;
1009  while (ConfigWord) {
1010  cprintf ("%5d", ConfigWord & 1 ? temp : 0);
1011  if (ConfigWord & 1)
1012  ProtoConfigs[ConfigNum] += temp;
1013  ConfigNum++;
1014  ConfigWord >>= 1;
1015  }
1016  cprintf("\n");
1017  }
1018  }
1019  }
1020 
1021  if (PrintMatchSummaryOn (Debug)) {
1022  cprintf ("Proto Error for Configurations:\n");
1023  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1024  cprintf (" %5.1f",
1025  100.0 * (1.0 -
1026  ProtoConfigs[ConfigNum] /
1027  ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
1028  cprintf ("\n\n");
1029  }
1030 
1031  if (PrintProtoMatchesOn (Debug)) {
1032  cprintf ("Proto Sum for Configurations:\n");
1033  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1034  cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
1035  cprintf ("\n\n");
1036 
1037  cprintf ("Proto Length for Configurations:\n");
1038  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1039  cprintf (" %4.1f",
1040  static_cast<float>(ClassTemplate->ConfigLengths[ConfigNum]));
1041  cprintf ("\n\n");
1042  }
1043 
1044 }
1045 
1046 void IntegerMatcher::DisplayProtoDebugInfo(
1047  INT_CLASS ClassTemplate,
1048  BIT_VECTOR ConfigMask,
1049  const ScratchEvidence& tables,
1050  bool SeparateDebugWindows) {
1051  uint16_t ProtoNum;
1052  uint16_t ActualProtoNum;
1053  PROTO_SET ProtoSet;
1054  int ProtoSetIndex;
1055 
1057  if (SeparateDebugWindows) {
1060  }
1061 
1062  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1063  ProtoSetIndex++) {
1064  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1065  ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
1066  for (ProtoNum = 0;
1067  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
1068  (ActualProtoNum < ClassTemplate->NumProtos));
1069  ProtoNum++, ActualProtoNum++) {
1070  /* Compute Average for Actual Proto */
1071  int temp = 0;
1072  for (uint8_t i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1073  temp += tables.proto_evidence_[ActualProtoNum][i];
1074 
1075  temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
1076 
1077  if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
1078  DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
1079  }
1080  }
1081  }
1082 }
1083 
1084 
1085 void IntegerMatcher::DisplayFeatureDebugInfo(
1086  INT_CLASS ClassTemplate,
1087  BIT_VECTOR ProtoMask,
1088  BIT_VECTOR ConfigMask,
1089  int16_t NumFeatures,
1090  const INT_FEATURE_STRUCT* Features,
1091  int AdaptFeatureThreshold,
1092  int Debug,
1093  bool SeparateDebugWindows) {
1094  auto *tables = new ScratchEvidence();
1095 
1096  tables->Clear(ClassTemplate);
1097 
1099  if (SeparateDebugWindows) {
1102  }
1103 
1104  for (int Feature = 0; Feature < NumFeatures; Feature++) {
1105  UpdateTablesForFeature(
1106  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
1107  tables, 0);
1108 
1109  /* Find Best Evidence for Current Feature */
1110  int best = 0;
1111  assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
1112  for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++)
1113  if (tables->feature_evidence_[i] > best)
1114  best = tables->feature_evidence_[i];
1115 
1116  /* Update display for current feature */
1117  if (ClipMatchEvidenceOn(Debug)) {
1118  if (best < AdaptFeatureThreshold)
1119  DisplayIntFeature(&Features[Feature], 0.0);
1120  else
1121  DisplayIntFeature(&Features[Feature], 1.0);
1122  } else {
1123  DisplayIntFeature(&Features[Feature], best / 255.0);
1124  }
1125  }
1126 
1127  delete tables;
1128 }
1129 #endif
1130 
1135  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask) {
1136 
1137  int *IntPointer;
1138  uint32_t ConfigWord;
1139  int ProtoSetIndex;
1140  uint16_t ProtoNum;
1141  PROTO_SET ProtoSet;
1142  int NumProtos;
1143  uint16_t ActualProtoNum;
1144 
1145  NumProtos = ClassTemplate->NumProtos;
1146 
1147  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1148  ProtoSetIndex++) {
1149  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1150  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1151  for (ProtoNum = 0;
1152  ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
1153  ProtoNum++, ActualProtoNum++) {
1154  int temp = 0;
1155  assert(ClassTemplate->ProtoLengths[ActualProtoNum] < MAX_PROTO_INDEX);
1156  for (uint8_t i = 0; i < MAX_PROTO_INDEX &&
1157  i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1158  temp += proto_evidence_[ActualProtoNum] [i];
1159 
1160  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1161  ConfigWord &= *ConfigMask;
1162  IntPointer = sum_feature_evidence_;
1163  while (ConfigWord) {
1164  if (ConfigWord & 1)
1165  *IntPointer += temp;
1166  IntPointer++;
1167  ConfigWord >>= 1;
1168  }
1169  }
1170  }
1171 }
1172 
1178  INT_CLASS ClassTemplate, int16_t NumFeatures) {
1179 
1180  assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
1181  for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++) {
1183  (NumFeatures + ClassTemplate->ConfigLengths[i]);
1184  }
1185 }
1186 
1192 int IntegerMatcher::FindBestMatch(
1193  INT_CLASS class_template,
1194  const ScratchEvidence &tables,
1195  UnicharRating* result) {
1196  int best_match = 0;
1197  result->config = 0;
1198  result->fonts.truncate(0);
1199  result->fonts.reserve(class_template->NumConfigs);
1200 
1201  /* Find best match */
1202  assert(class_template->NumConfigs < MAX_NUM_CONFIGS);
1203  for (int c = 0; c < MAX_NUM_CONFIGS && c < class_template->NumConfigs; ++c) {
1204  int rating = tables.sum_feature_evidence_[c];
1205  if (*classify_debug_level_ > 2)
1206  tprintf("Config %d, rating=%d\n", c, rating);
1207  if (rating > best_match) {
1208  result->config = c;
1209  best_match = rating;
1210  }
1211  result->fonts.push_back(ScoredFont(c, rating));
1212  }
1213 
1214  // Compute confidence on a Probability scale.
1215  result->rating = best_match / 65536.0f;
1216 
1217  return best_match;
1218 }
1219 
1224 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
1225  int normalization_factor,
1226  int matcher_multiplier) {
1227  int divisor = blob_length + matcher_multiplier;
1228  return divisor == 0 ? 1.0f : (rating * blob_length +
1229  matcher_multiplier * normalization_factor / 256.0f) / divisor;
1230 }
IntegerMatcher::Match
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:510
INT_TEMPLATES_STRUCT
Definition: intproto.h:117
ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:106
SE_TABLE_BITS
#define SE_TABLE_BITS
Definition: intmatcher.h:54
INT_CLASS_STRUCT::ConfigLengths
uint16_t ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:110
PROTOS_PER_PROTO_SET
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
tesseract::ClassPruner::AdjustForExpectedNumFeatures
void AdjustForExpectedNumFeatures(const uint16_t *expected_num_features, int cutoff_strength)
Definition: intmatcher.cpp:250
DisplayProtoMatchesOn
#define DisplayProtoMatchesOn(D)
Definition: intproto.h:197
ScratchEvidence::ClearFeatureEvidence
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:745
PROTO_SET_STRUCT::Protos
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:96
tesseract::IntParam
Definition: params.h:152
tesseract::ClassPruner::DebugMatch
void DebugMatch(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const
Definition: intmatcher.cpp:339
PrintFeatureMatchesOn
#define PrintFeatureMatchesOn(D)
Definition: intproto.h:198
tesseract::Classify::classify_class_pruner_threshold
int classify_class_pruner_threshold
Definition: classify.h:499
tesseract::UnicharRating
Definition: shapetable.h:40
ScratchEvidence
Definition: intmatcher.h:57
INT_CLASS_STRUCT
Definition: intproto.h:104
IntegerMatcher::ApplyCNCorrection
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
Definition: intmatcher.cpp:1223
tesseract::UnicharRating::fonts
GenericVector< ScoredFont > fonts
Definition: shapetable.h:87
IntegerMatcher::kEvidenceTableBits
static const int kEvidenceTableBits
Definition: intmatcher.h:75
INT_FEATURE_STRUCT::Theta
uint8_t Theta
Definition: intproto.h:141
PROTO_ID
int16_t PROTO_ID
Definition: matchdefs.h:39
INT_PROTO_STRUCT
Definition: intproto.h:80
STRING
Definition: strngs.h:45
INT_PROTO_STRUCT::Configs
uint32_t Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:85
RoundUp
int RoundUp(int n, int block_size)
Definition: helpers.h:100
tesseract::Classify::classify_cp_cutoff_strength
int classify_cp_cutoff_strength
Definition: classify.h:503
cprintf
void cprintf(const char *format,...)
Definition: callcpp.cpp:32
NUM_PP_BUCKETS
#define NUM_PP_BUCKETS
Definition: intproto.h:51
tesseract::Classify::classify_class_pruner_multiplier
int classify_class_pruner_multiplier
Definition: classify.h:501
INT_CLASS_STRUCT::NumProtos
uint16_t NumProtos
Definition: intproto.h:105
ScratchEvidence::feature_evidence_
uint8_t feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:58
IntegerMatcher::kIntEvidenceTruncBits
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:77
INT_TEMPLATES_STRUCT::NumClassPruners
int NumClassPruners
Definition: intproto.h:119
IntegerMatcher::FindGoodProtos
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:588
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
tesseract::ClassPruner::DisableFragments
void DisableFragments(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:271
ScratchEvidence::NormalizeSums
void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures)
Definition: intmatcher.cpp:1176
tesseract::ClassPruner::~ClassPruner
~ClassPruner()
Definition: intmatcher.cpp:170
INT_CLASS_STRUCT::NumProtoSets
uint8_t NumProtoSets
Definition: intproto.h:106
INT_PROTO_STRUCT::B
uint8_t B
Definition: intproto.h:82
tesseract::UnicharRating::config
uint8_t config
Definition: shapetable.h:81
IntegerMatcher::IntegerMatcher
IntegerMatcher(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:708
tesseract::ClassPruner::NormalizeForXheight
void NormalizeForXheight(int norm_multiplier, const uint8_t *normalization_factors)
Definition: intmatcher.cpp:285
tesseract::ClassPruner::SetupResults
int SetupResults(GenericVector< CP_RESULT_STRUCT > *results) const
Definition: intmatcher.cpp:399
INT_PROTO_STRUCT::Angle
uint8_t Angle
Definition: intproto.h:84
CP_RESULT_STRUCT
Definition: intmatcher.h:42
BITS_PER_WERD
#define BITS_PER_WERD
Definition: intproto.h:44
IntegerMatcher::kSimilarityCenter
static const float kSimilarityCenter
Definition: intmatcher.h:81
tesseract::UnicharRating::rating
float rating
Definition: shapetable.h:77
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
UNICHARSET::get_enabled
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:868
DisplayFeatureMatchesOn
#define DisplayFeatureMatchesOn(D)
Definition: intproto.h:196
MAX_NUM_CONFIGS
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
tesseract::Classify::shape_table_
ShapeTable * shape_table_
Definition: classify.h:546
tesseract::Classify::PruneClasses
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:451
INT_CLASS_STRUCT::ProtoLengths
uint8_t * ProtoLengths
Definition: intproto.h:109
tesseract::Classify::disable_character_fragments
bool disable_character_fragments
Definition: classify.h:486
InitIntMatchWindowIfReqd
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1723
DisplayIntFeature
void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, float Evidence)
Definition: intproto.cpp:589
shapetable.h
tesseract::ScoredFont
Definition: fontinfo.h:38
ScratchEvidence::UpdateSumOfProtoEvidences
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask)
Definition: intmatcher.cpp:1133
UNICHARSET
Definition: unicharset.h:145
MAX_PROTO_INDEX
#define MAX_PROTO_INDEX
Definition: intproto.h:43
INT_TEMPLATES_STRUCT::NumClasses
int NumClasses
Definition: intproto.h:118
tesseract::UnicharRating::Print
void Print() const
Definition: shapetable.h:48
tesseract::ClassPruner::PruneAndSort
void PruneAndSort(int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset)
Definition: intmatcher.cpp:303
tesseract::ClassPruner
Definition: intmatcher.cpp:146
INT_FEATURE_STRUCT::Y
uint8_t Y
Definition: intproto.h:140
BIT_VECTOR
uint32_t * BIT_VECTOR
Definition: bitvec.h:27
helpers.h
float2int.h
tesseract
Definition: baseapi.h:65
fontinfo.h
SE_TABLE_SIZE
#define SE_TABLE_SIZE
Definition: intmatcher.h:55
ScratchEvidence::Clear
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:738
PrintMatchSummaryOn
#define PrintMatchSummaryOn(D)
Definition: intproto.h:195
NUM_CP_BUCKETS
#define NUM_CP_BUCKETS
Definition: intproto.h:52
callcpp.h
ScratchEvidence::sum_feature_evidence_
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:59
tesseract::ClassPruner::ClassPruner
ClassPruner(int max_classes)
Definition: intmatcher.cpp:148
INT_CLASS_STRUCT::ProtoSets
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:108
INT_FEATURE_ARRAY
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:151
tesseract::ClassPruner::NoNormalization
void NoNormalization()
Definition: intmatcher.cpp:294
GenericVector< CP_RESULT_STRUCT >
InitProtoDisplayWindowIfReqd
void InitProtoDisplayWindowIfReqd()
Definition: intproto.cpp:1744
DisplayIntProto
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, float Evidence)
Definition: intproto.cpp:607
INT_FEATURE_STRUCT
Definition: intproto.h:131
CLASS_PRUNER_CLASS_MASK
#define CLASS_PRUNER_CLASS_MASK
Definition: intproto.h:55
PROTO_SET_STRUCT
Definition: intproto.h:94
tesseract::ClassPruner::SummarizeResult
void SummarizeResult(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uint16_t *expected_num_features, int norm_multiplier, const uint8_t *normalization_factors) const
Definition: intmatcher.cpp:376
GenericVector::init_to_size
void init_to_size(int size, const T &t)
Definition: genericvector.h:706
ClipMatchEvidenceOn
#define ClipMatchEvidenceOn(D)
Definition: intproto.h:200
INT_TEMPLATES_STRUCT::ClassPruners
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:121
IntegerMatcher::FindBadFeatures
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:656
IntegerMatcher::kIntThetaFudge
static const int kIntThetaFudge
Definition: intmatcher.h:73
WERDS_PER_CP_VECTOR
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
PrintProtoMatchesOn
#define PrintProtoMatchesOn(D)
Definition: intproto.h:199
UNICHARSET::get_fragment
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:724
CLASS_PRUNER_STRUCT::p
uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
MatchDebuggingOn
#define MatchDebuggingOn(D)
Definition: intproto.h:194
NUM_BITS_PER_CLASS
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
tesseract::ClassPruner::ComputeScores
void ComputeScores(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features)
Definition: intmatcher.cpp:179
tesseract::ClassPruner::DisableDisabledClasses
void DisableDisabledClasses(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:263
IntegerMatcher::kSEExponentialMultiplier
static const float kSEExponentialMultiplier
Definition: intmatcher.h:79
intproto.h
FEATURE_ID
uint8_t FEATURE_ID
Definition: matchdefs.h:45
classify.h
INT_CLASS_STRUCT::NumConfigs
uint8_t NumConfigs
Definition: intproto.h:107
INT_PROTO_STRUCT::C
int8_t C
Definition: intproto.h:83
INT_FEATURE_STRUCT::X
uint8_t X
Definition: intproto.h:139
scrollview.h
INT_PROTO_STRUCT::A
int8_t A
Definition: intproto.h:81
ScratchEvidence::proto_evidence_
uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:60
InitFeatureDisplayWindowIfReqd
void InitFeatureDisplayWindowIfReqd()
Definition: intproto.cpp:1755
tesseract::Classify::classify_debug_level
int classify_debug_level
Definition: classify.h:430
tesseract::UnicharRating::feature_misses
uint16_t feature_misses
Definition: shapetable.h:83
intmatcher.h