tesseract  5.0.0-alpha-619-ge9db
IntegerMatcher Class Reference

#include <intmatcher.h>

Public Member Functions

 IntegerMatcher (tesseract::IntParam *classify_debug_level)
 
void Match (INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
 
float ApplyCNCorrection (float rating, int blob_length, int normalization_factor, int matcher_multiplier)
 
int FindGoodProtos (INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
 
int FindBadFeatures (INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
 

Static Public Attributes

static const int kIntThetaFudge = 128
 
static const int kEvidenceTableBits = 9
 
static const int kIntEvidenceTruncBits = 14
 
static const float kSEExponentialMultiplier = 0.0f
 
static const float kSimilarityCenter = 0.0075f
 

Detailed Description

Definition at line 70 of file intmatcher.h.

Constructor & Destructor Documentation

◆ IntegerMatcher()

IntegerMatcher::IntegerMatcher ( tesseract::IntParam classify_debug_level)

Definition at line 708 of file intmatcher.cpp.

710  : classify_debug_level_(classify_debug_level)
711 {
712  /* Initialize table for evidence to similarity lookup */
713  for (int i = 0; i < SE_TABLE_SIZE; i++) {
714  uint32_t IntSimilarity = i << (27 - SE_TABLE_BITS);
715  double Similarity = (static_cast<double>(IntSimilarity)) / 65536.0 / 65536.0;
716  double evidence = Similarity / kSimilarityCenter;
717  evidence = 255.0 / (evidence * evidence + 1.0);
718 
719  if (kSEExponentialMultiplier > 0.0) {
720  double scale = 1.0 - exp(-kSEExponentialMultiplier) *
721  exp(kSEExponentialMultiplier * (static_cast<double>(i) / SE_TABLE_SIZE));
722  evidence *= ClipToRange(scale, 0.0, 1.0);
723  }
724 
725  similarity_evidence_table_[i] = static_cast<uint8_t>(evidence + 0.5);
726  }
727 
728  /* Initialize evidence computation variables */
729  evidence_table_mask_ =
730  ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
731  mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
732  table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
733  evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);

Member Function Documentation

◆ ApplyCNCorrection()

float IntegerMatcher::ApplyCNCorrection ( float  rating,
int  blob_length,
int  normalization_factor,
int  matcher_multiplier 
)

Applies the CN normalization factor to the given rating and returns the modified rating.

Definition at line 1223 of file intmatcher.cpp.

1226  {
1227  int divisor = blob_length + matcher_multiplier;
1228  return divisor == 0 ? 1.0f : (rating * blob_length +
1229  matcher_multiplier * normalization_factor / 256.0f) / divisor;

◆ FindBadFeatures()

int IntegerMatcher::FindBadFeatures ( INT_CLASS  ClassTemplate,
BIT_VECTOR  ProtoMask,
BIT_VECTOR  ConfigMask,
int16_t  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_ID FeatureArray,
int  AdaptFeatureThreshold,
int  Debug 
)

FindBadFeatures finds all features with maximum feature-evidence < AdaptFeatureThresh. The list is ordered by increasing feature number.

Parameters
ClassTemplatePrototypes & tables for a class
ProtoMaskAND Mask for proto word
ConfigMaskAND Mask for config word
NumFeaturesNumber of features in blob
FeaturesArray of features
FeatureArrayArray of bad features
AdaptFeatureThresholdThreshold for bad features
DebugDebugger flag: 1=debugger on
Returns
Number of bad features in FeatureArray.

Definition at line 656 of file intmatcher.cpp.

665  {
666  auto *tables = new ScratchEvidence();
667  int NumBadFeatures = 0;
668 
669  /* DEBUG opening heading */
670  if (MatchDebuggingOn(Debug))
671  cprintf("Find Bad Features -------------------------------------------\n");
672 
673  tables->Clear(ClassTemplate);
674 
675  for (int Feature = 0; Feature < NumFeatures; Feature++) {
676  UpdateTablesForFeature(
677  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
678  tables, Debug);
679 
680  /* Find Best Evidence for Current Feature */
681  int best = 0;
682  assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
683  for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++)
684  if (tables->feature_evidence_[i] > best)
685  best = tables->feature_evidence_[i];
686 
687  /* Find Bad Features */
688  if (best < AdaptFeatureThreshold) {
689  *FeatureArray = Feature;
690  FeatureArray++;
691  NumBadFeatures++;
692  }
693  }
694 
695 #ifndef GRAPHICS_DISABLED
696  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
697  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
698  NumFeatures, Debug);
699 #endif
700 
701  if (MatchDebuggingOn(Debug))
702  cprintf("Match Complete --------------------------------------------\n");
703 
704  delete tables;
705  return NumBadFeatures;

◆ FindGoodProtos()

int IntegerMatcher::FindGoodProtos ( INT_CLASS  ClassTemplate,
BIT_VECTOR  ProtoMask,
BIT_VECTOR  ConfigMask,
int16_t  NumFeatures,
INT_FEATURE_ARRAY  Features,
PROTO_ID ProtoArray,
int  AdaptProtoThreshold,
int  Debug 
)

FindGoodProtos finds all protos whose normalized proto-evidence exceed AdaptProtoThreshold. The list is ordered by increasing proto id number.

Globals:

  • local_matcher_multiplier_ Normalization factor multiplier param ClassTemplate Prototypes & tables for a class param ProtoMask AND Mask for proto word param ConfigMask AND Mask for config word param NumFeatures Number of features in blob param Features Array of features param ProtoArray Array of good protos param AdaptProtoThreshold Threshold for good protos param Debug Debugger flag: 1=debugger on
    Returns
    Number of good protos in ProtoArray.

Definition at line 588 of file intmatcher.cpp.

597  {
598  auto *tables = new ScratchEvidence();
599  int NumGoodProtos = 0;
600 
601  /* DEBUG opening heading */
602  if (MatchDebuggingOn (Debug))
603  cprintf
604  ("Find Good Protos -------------------------------------------\n");
605 
606  tables->Clear(ClassTemplate);
607 
608  for (int Feature = 0; Feature < NumFeatures; Feature++)
609  UpdateTablesForFeature(
610  ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
611  tables, Debug);
612 
613 #ifndef GRAPHICS_DISABLED
614  if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
615  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
616  NumFeatures, Debug);
617 #endif
618 
619  /* Average Proto Evidences & Find Good Protos */
620  for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
621  /* Compute Average for Actual Proto */
622  int Temp = 0;
623  for (uint8_t i = 0;
624  i < MAX_PROTO_INDEX && i < ClassTemplate->ProtoLengths[proto]; i++)
625  Temp += tables->proto_evidence_[proto][i];
626 
627  Temp /= ClassTemplate->ProtoLengths[proto];
628 
629  /* Find Good Protos */
630  if (Temp >= AdaptProtoThreshold) {
631  *ProtoArray = proto;
632  ProtoArray++;
633  NumGoodProtos++;
634  }
635  }
636 
637  if (MatchDebuggingOn (Debug))
638  cprintf ("Match Complete --------------------------------------------\n");
639  delete tables;
640 
641  return NumGoodProtos;

◆ Match()

void IntegerMatcher::Match ( INT_CLASS  ClassTemplate,
BIT_VECTOR  ProtoMask,
BIT_VECTOR  ConfigMask,
int16_t  NumFeatures,
const INT_FEATURE_STRUCT Features,
tesseract::UnicharRating Result,
int  AdaptFeatureThreshold,
int  Debug,
bool  SeparateDebugWindows 
)

IntegerMatcher returns the best configuration and rating for a single class. The class matched against is determined by the uniqueness of the ClassTemplate parameter. The best rating and its associated configuration are returned.

Globals:

  • local_matcher_multiplier_ Normalization factor multiplier param ClassTemplate Prototypes & tables for a class param NumFeatures Number of features in blob param Features Array of features param NormalizationFactor Fudge factor from blob normalization process param Result Class rating & configuration: (0.0 -> 1.0), 0=bad, 1=good param Debug Debugger flag: 1=debugger on

Definition at line 510 of file intmatcher.cpp.

519  {
520  auto *tables = new ScratchEvidence();
521  int Feature;
522 
523  if (MatchDebuggingOn (Debug))
524  cprintf ("Integer Matcher -------------------------------------------\n");
525 
526  tables->Clear(ClassTemplate);
527  Result->feature_misses = 0;
528 
529  for (Feature = 0; Feature < NumFeatures; Feature++) {
530  int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
531  Feature, &Features[Feature],
532  tables, Debug);
533  // Count features that were missed over all configs.
534  if (csum == 0)
535  ++Result->feature_misses;
536  }
537 
538 #ifndef GRAPHICS_DISABLED
539  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
540  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
541  NumFeatures, Debug);
542  }
543 
544  if (DisplayProtoMatchesOn(Debug)) {
545  DisplayProtoDebugInfo(ClassTemplate, ConfigMask,
546  *tables, SeparateDebugWindows);
547  }
548 
549  if (DisplayFeatureMatchesOn(Debug)) {
550  DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
551  Features, AdaptFeatureThreshold, Debug,
552  SeparateDebugWindows);
553  }
554 #endif
555 
556  tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask);
557  tables->NormalizeSums(ClassTemplate, NumFeatures);
558 
559  FindBestMatch(ClassTemplate, *tables, Result);
560 
561 #ifndef GRAPHICS_DISABLED
562  if (PrintMatchSummaryOn(Debug))
563  Result->Print();
564 
565  if (MatchDebuggingOn(Debug))
566  cprintf("Match Complete --------------------------------------------\n");
567 #endif
568 
569  delete tables;

Member Data Documentation

◆ kEvidenceTableBits

const int IntegerMatcher::kEvidenceTableBits = 9
static

Definition at line 75 of file intmatcher.h.

◆ kIntEvidenceTruncBits

const int IntegerMatcher::kIntEvidenceTruncBits = 14
static

Definition at line 77 of file intmatcher.h.

◆ kIntThetaFudge

const int IntegerMatcher::kIntThetaFudge = 128
static

Definition at line 73 of file intmatcher.h.

◆ kSEExponentialMultiplier

const float IntegerMatcher::kSEExponentialMultiplier = 0.0f
static

Definition at line 79 of file intmatcher.h.

◆ kSimilarityCenter

const float IntegerMatcher::kSimilarityCenter = 0.0075f
static

Definition at line 81 of file intmatcher.h.


The documentation for this class was generated from the following files:
ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:106
SE_TABLE_BITS
#define SE_TABLE_BITS
Definition: intmatcher.h:54
DisplayProtoMatchesOn
#define DisplayProtoMatchesOn(D)
Definition: intproto.h:197
ScratchEvidence
Definition: intmatcher.h:57
IntegerMatcher::kEvidenceTableBits
static const int kEvidenceTableBits
Definition: intmatcher.h:75
cprintf
void cprintf(const char *format,...)
Definition: callcpp.cpp:32
INT_CLASS_STRUCT::NumProtos
uint16_t NumProtos
Definition: intproto.h:105
IntegerMatcher::kIntEvidenceTruncBits
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:77
IntegerMatcher::kSimilarityCenter
static const float kSimilarityCenter
Definition: intmatcher.h:81
DisplayFeatureMatchesOn
#define DisplayFeatureMatchesOn(D)
Definition: intproto.h:196
MAX_NUM_CONFIGS
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
INT_CLASS_STRUCT::ProtoLengths
uint8_t * ProtoLengths
Definition: intproto.h:109
MAX_PROTO_INDEX
#define MAX_PROTO_INDEX
Definition: intproto.h:43
tesseract::UnicharRating::Print
void Print() const
Definition: shapetable.h:48
SE_TABLE_SIZE
#define SE_TABLE_SIZE
Definition: intmatcher.h:55
PrintMatchSummaryOn
#define PrintMatchSummaryOn(D)
Definition: intproto.h:195
PrintProtoMatchesOn
#define PrintProtoMatchesOn(D)
Definition: intproto.h:199
MatchDebuggingOn
#define MatchDebuggingOn(D)
Definition: intproto.h:194
IntegerMatcher::kSEExponentialMultiplier
static const float kSEExponentialMultiplier
Definition: intmatcher.h:79
INT_CLASS_STRUCT::NumConfigs
uint8_t NumConfigs
Definition: intproto.h:107
tesseract::UnicharRating::feature_misses
uint16_t feature_misses
Definition: shapetable.h:83