tesseract  5.0.0-alpha-619-ge9db
tesseract::Classify Class Reference

#include <classify.h>

Inheritance diagram for tesseract::Classify:
tesseract::CCStruct tesseract::CCUtil tesseract::Wordrec tesseract::Tesseract

Public Member Functions

 Classify ()
 
 ~Classify () override
 
virtual DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (TFile *fp, uint16_t *Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (TFile *File)
 
float ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (TFile *fp)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (TessdataManager *mgr)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (float Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uint8_t *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (TFile *fp)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, float *XScale, float *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const STRING &filename)
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()=default
 
 ~CCStruct () override
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 

Static Public Member Functions

static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 

Public Attributes

bool allow_blob_division = true
 
bool prioritize_division = false
 
bool classify_enable_learning = true
 
int classify_debug_level = 0
 
int classify_norm_method = character
 
double classify_char_norm_range = 0.2
 
double classify_max_rating_ratio = 1.5
 
double classify_max_certainty_margin = 5.5
 
bool tess_cn_matching = 0
 
bool tess_bn_matching = 0
 
bool classify_enable_adaptive_matcher = 1
 
bool classify_use_pre_adapted_templates = 0
 
bool classify_save_adapted_templates = 0
 
bool classify_enable_adaptive_debugger = 0
 
bool classify_nonlinear_norm = 0
 
int matcher_debug_level = 0
 
int matcher_debug_flags = 0
 
int classify_learning_debug_level = 0
 
double matcher_good_threshold = 0.125
 
double matcher_reliable_adaptive_result = 0.0
 
double matcher_perfect_threshold = 0.02
 
double matcher_bad_match_pad = 0.15
 
double matcher_rating_margin = 0.1
 
double matcher_avg_noise_size = 12.0
 
int matcher_permanent_classes_min = 1
 
int matcher_min_examples_for_prototyping = 3
 
int matcher_sufficient_examples_for_prototyping = 5
 
double matcher_clustering_max_angle_delta = 0.015
 
double classify_misfit_junk_penalty = 0.0
 
double rating_scale = 1.5
 
double certainty_scale = 20.0
 
double tessedit_class_miss_scale = 0.00390625
 
double classify_adapted_pruning_factor = 2.5
 
double classify_adapted_pruning_threshold = -1.0
 
int classify_adapt_proto_threshold = 230
 
int classify_adapt_feature_threshold = 230
 
bool disable_character_fragments = true
 
double classify_character_fragments_garbage_certainty_threshold = -3.0
 
bool classify_debug_character_fragments = false
 
bool matcher_debug_separate_windows = false
 
char * classify_learn_debug_str = ""
 
int classify_class_pruner_threshold = 229
 
int classify_class_pruner_multiplier = 15
 
int classify_cp_cutoff_strength = 7
 
int classify_integer_matcher_multiplier = 10
 
bool classify_bln_numeric_mode = 0
 
double speckle_large_max_size = 0.30
 
double speckle_rating_penalty = 10.0
 
INT_TEMPLATES PreTrainedTemplates = nullptr
 
ADAPT_TEMPLATES AdaptedTemplates = nullptr
 
ADAPT_TEMPLATES BackupAdaptedTemplates = nullptr
 
BIT_VECTOR AllProtosOn = nullptr
 
BIT_VECTOR AllConfigsOn = nullptr
 
BIT_VECTOR AllConfigsOff = nullptr
 
BIT_VECTOR TempProtoMask = nullptr
 
NORM_PROTOSNormProtos = nullptr
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
bool EnableLearning = true
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
int ambigs_debug_level = 0
 
bool use_ambigs_for_adaption = false
 

Protected Attributes

IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_ = nullptr
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 

Detailed Description

Definition at line 103 of file classify.h.

Constructor & Destructor Documentation

◆ Classify()

tesseract::Classify::Classify ( )

Definition at line 60 of file classify.cpp.

61  : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
62  this->params()),
64  "Prioritize blob division over chopping", this->params()),
65  BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
66  this->params()),
67  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
68  this->params()),
69  INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
70  this->params()),
72  "Character Normalization Range ...", this->params()),
74  "Veto ratio between classifier ratings", this->params()),
76  "Veto difference between classifier certainties",
77  this->params()),
78  BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
79  this->params()),
80  BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
81  this->params()),
83  "Enable adaptive classifier", this->params()),
85  "Use pre-adapted classifier templates", this->params()),
87  "Save adapted templates to a file", this->params()),
88  BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
89  this->params()),
91  "Non-linear stroke-density normalization", this->params()),
92  INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
93  INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
94  INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
95  this->params()),
96  double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
97  this->params()),
98  double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
99  this->params()),
100  double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
101  this->params()),
102  double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
103  this->params()),
104  double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
105  this->params()),
106  double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
107  this->params()),
108  INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
109  this->params()),
111  "Reliable Config Threshold", this->params()),
113  "Enable adaption even if the ambiguities have not been seen",
114  this->params()),
116  "Maximum angle delta for prototype clustering",
117  this->params()),
119  "Penalty to apply when a non-alnum is vertically out of "
120  "its expected textline position",
121  this->params()),
122  double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
123  double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
124  this->params()),
126  "Scale factor for features not used", this->params()),
129  "Prune poor adapted results this much worse than best result",
130  this->params()),
132  "Threshold at which classify_adapted_pruning_factor starts",
133  this->params()),
135  "Threshold for good protos during adaptive 0-255",
136  this->params()),
138  "Threshold for good features during adaptive 0-255",
139  this->params()),
141  "Do not include character fragments in the"
142  " results of the classifier",
143  this->params()),
145  -3.0,
146  "Exclude fragments that do not look like whole"
147  " characters from training and adaption",
148  this->params()),
150  "Bring up graphical debugging windows for fragments training",
151  this->params()),
153  "Use two different windows for debugging the matching: "
154  "One for the protos and one for the features.",
155  this->params()),
156  STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
157  this->params()),
159  "Class Pruner Threshold 0-255", this->params()),
161  "Class Pruner Multiplier 0-255: ", this->params()),
163  "Class Pruner CutoffStrength: ", this->params()),
165  "Integer Matcher Multiplier 0-255: ", this->params()),
167  "Assume the input is numbers [0-9].", this->params()),
168  double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
169  this->params()),
171  "Penalty to add to worst rating for noise", this->params()),
173  dict_(this) {
174  using namespace std::placeholders; // for _1, _2
175  fontinfo_table_.set_compare_callback(std::bind(CompareFontInfo, _1, _2));
176  fontinfo_table_.set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
177  fontset_table_.set_compare_callback(std::bind(CompareFontSet, _1, _2));
178  fontset_table_.set_clear_callback(std::bind(FontSetDeleteCallback, _1));
179 
181 }

◆ ~Classify()

tesseract::Classify::~Classify ( )
override

Definition at line 183 of file classify.cpp.

183  {
185  delete learn_debug_win_;
186  delete learn_fragmented_word_debug_win_;
187  delete learn_fragments_debug_win_;
188 }

Member Function Documentation

◆ AdaptableWord()

bool tesseract::Classify::AdaptableWord ( WERD_RES word)

Return true if the specified word is acceptable for adaptation.

Globals: none

Parameters
wordcurrent word
Returns
true or false

Definition at line 821 of file adaptmatch.cpp.

821  {
822  if (word->best_choice == nullptr) return false;
823  int BestChoiceLength = word->best_choice->length();
824  float adaptable_score =
826  return // rules that apply in general - simplest to compute first
827  BestChoiceLength > 0 &&
828  BestChoiceLength == word->rebuild_word->NumBlobs() &&
829  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
830  // This basically ensures that the word is at least a dictionary match
831  // (freq word, user word, system dawg word, etc).
832  // Since all the other adjustments will make adjust factor higher
833  // than higher than adaptable_score=1.1+0.05=1.15
834  // Since these are other flags that ensure that the word is dict word,
835  // this check could be at times redundant.
836  word->best_choice->adjust_factor() <= adaptable_score &&
837  // Make sure that alternative choices are not dictionary words.
838  word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
839 }

◆ AdaptiveClassifier()

void tesseract::Classify::AdaptiveClassifier ( TBLOB Blob,
BLOB_CHOICE_LIST *  Choices 
)

This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.

It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.

Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.

This routine also performs some simple speckle filtering.

Parameters
Blobblob to be classified
[out]ChoicesList of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher.

Definition at line 191 of file adaptmatch.cpp.

191  {
192  assert(Choices != nullptr);
193  auto *Results = new ADAPT_RESULTS;
194  Results->Initialize();
195 
196  ASSERT_HOST(AdaptedTemplates != nullptr);
197 
198  DoAdaptiveMatch(Blob, Results);
199 
200  RemoveBadMatches(Results);
201  Results->match.sort(&UnicharRating::SortDescendingRating);
202  RemoveExtraPuncs(Results);
203  Results->ComputeBest();
204  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
205  Choices);
206 
207  // TODO(rays) Move to before ConvertMatchesToChoices!
208  if (LargeSpeckle(*Blob) || Choices->length() == 0)
209  AddLargeSpeckleTo(Results->BlobLength, Choices);
210 
211  if (matcher_debug_level >= 1) {
212  tprintf("AD Matches = ");
213  PrintAdaptiveMatchResults(*Results);
214  }
215 
216 #ifndef GRAPHICS_DISABLED
218  DebugAdaptiveClassifier(Blob, Results);
219 #endif
220 
221  delete Results;
222 } /* AdaptiveClassifier */

◆ AdaptiveClassifierIsEmpty()

bool tesseract::Classify::AdaptiveClassifierIsEmpty ( ) const
inline

Definition at line 326 of file classify.h.

326  {
327  return AdaptedTemplates->NumPermClasses == 0;
328  }

◆ AdaptiveClassifierIsFull()

bool tesseract::Classify::AdaptiveClassifierIsFull ( ) const
inline

Definition at line 325 of file classify.h.

325 { return NumAdaptationsFailed > 0; }

◆ AdaptToChar()

void tesseract::Classify::AdaptToChar ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
float  Threshold,
ADAPT_TEMPLATES  adaptive_templates 
)
Parameters
Blobblob to add to templates for ClassId
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained templates
Thresholdminimum match rating to existing template
adaptive_templatescurrent set of adapted templates

Globals:

  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs

Definition at line 853 of file adaptmatch.cpp.

855  {
856  int NumFeatures;
857  INT_FEATURE_ARRAY IntFeatures;
858  UnicharRating int_result;
859  INT_CLASS IClass;
860  ADAPT_CLASS Class;
861  TEMP_CONFIG TempConfig;
862  FEATURE_SET FloatFeatures;
863  int NewTempConfigId;
864 
865  if (!LegalClassId (ClassId))
866  return;
867 
868  int_result.unichar_id = ClassId;
869  Class = adaptive_templates->Class[ClassId];
870  assert(Class != nullptr);
871  if (IsEmptyAdaptedClass(Class)) {
872  InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
873  } else {
874  IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
875 
876  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
877  if (NumFeatures <= 0) {
878  return; // Features already freed by GetAdaptiveFeatures.
879  }
880 
881  // Only match configs with the matching font.
882  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
883  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
884  if (GetFontinfoId(Class, cfg) == FontinfoId) {
885  SET_BIT(MatchingFontConfigs, cfg);
886  } else {
887  reset_bit(MatchingFontConfigs, cfg);
888  }
889  }
890  im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
891  NumFeatures, IntFeatures,
894  FreeBitVector(MatchingFontConfigs);
895 
896  SetAdaptiveThreshold(Threshold);
897 
898  if (1.0f - int_result.rating <= Threshold) {
899  if (ConfigIsPermanent(Class, int_result.config)) {
901  tprintf("Found good match to perm config %d = %4.1f%%.\n",
902  int_result.config, int_result.rating * 100.0);
903  FreeFeatureSet(FloatFeatures);
904  return;
905  }
906 
907  TempConfig = TempConfigFor(Class, int_result.config);
908  IncreaseConfidence(TempConfig);
909  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
910  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
911  }
913  tprintf("Increasing reliability of temp config %d to %d.\n",
914  int_result.config, TempConfig->NumTimesSeen);
915 
916  if (TempConfigReliable(ClassId, TempConfig)) {
917  MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
918  UpdateAmbigsGroup(ClassId, Blob);
919  }
920  } else {
922  tprintf("Found poor match to temp config %d = %4.1f%%.\n",
923  int_result.config, int_result.rating * 100.0);
925  DisplayAdaptedChar(Blob, IClass);
926  }
927  NewTempConfigId =
928  MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
929  NumFeatures, IntFeatures, FloatFeatures);
930  if (NewTempConfigId >= 0 &&
931  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
932  MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
933  UpdateAmbigsGroup(ClassId, Blob);
934  }
935 
936 #ifndef GRAPHICS_DISABLED
938  DisplayAdaptedChar(Blob, IClass);
939  }
940 #endif
941  }
942  FreeFeatureSet(FloatFeatures);
943  }
944 } /* AdaptToChar */

◆ AddLargeSpeckleTo()

void tesseract::Classify::AddLargeSpeckleTo ( int  blob_length,
BLOB_CHOICE_LIST *  choices 
)

Definition at line 201 of file classify.cpp.

201  {
202  BLOB_CHOICE_IT bc_it(choices);
203  // If there is no classifier result, we will use the worst possible certainty
204  // and corresponding rating.
205  float certainty = -getDict().certainty_scale;
206  float rating = rating_scale * blob_length;
207  if (!choices->empty() && blob_length > 0) {
208  bc_it.move_to_last();
209  BLOB_CHOICE* worst_choice = bc_it.data();
210  // Add speckle_rating_penalty to worst rating, matching old value.
211  rating = worst_choice->rating() + speckle_rating_penalty;
212  // Compute the rating to correspond to the certainty. (Used to be kept
213  // the same, but that messes up the language model search.)
214  certainty = -rating * getDict().certainty_scale /
215  (rating_scale * blob_length);
216  }
217  auto* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
218  -1, 0.0f, FLT_MAX, 0,
220  bc_it.add_to_end(blob_choice);
221 }

◆ AddNewResult()

void tesseract::Classify::AddNewResult ( const UnicharRating new_result,
ADAPT_RESULTS results 
)

This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.

Globals:

Parameters
new_resultnew result to add
[out]resultsresults to add new result to

Definition at line 994 of file adaptmatch.cpp.

995  {
996  int old_match = FindScoredUnichar(new_result.unichar_id, *results);
997 
998  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
999  (old_match < results->match.size() &&
1000  new_result.rating <= results->match[old_match].rating))
1001  return; // New one not good enough.
1002 
1003  if (!unicharset.get_fragment(new_result.unichar_id))
1004  results->HasNonfragment = true;
1005 
1006  if (old_match < results->match.size()) {
1007  results->match[old_match].rating = new_result.rating;
1008  } else {
1009  results->match.push_back(new_result);
1010  }
1011 
1012  if (new_result.rating > results->best_rating &&
1013  // Ensure that fragments do not affect best rating, class and config.
1014  // This is needed so that at least one non-fragmented character is
1015  // always present in the results.
1016  // TODO(daria): verify that this helps accuracy and does not
1017  // hurt performance.
1018  !unicharset.get_fragment(new_result.unichar_id)) {
1019  results->best_match_index = old_match;
1020  results->best_rating = new_result.rating;
1021  results->best_unichar_id = new_result.unichar_id;
1022  }
1023 } /* AddNewResult */

◆ AmbigClassifier()

void tesseract::Classify::AmbigClassifier ( const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
const TBLOB blob,
INT_TEMPLATES  templates,
ADAPT_CLASS classes,
UNICHAR_ID ambiguities,
ADAPT_RESULTS results 
)

This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.

Globals:

Parameters
blobblob to be classified
templatesbuilt-in templates to classify against
classesadapted class templates
ambiguitiesarray of unichar id's to match against
[out]resultsplace to put match results
int_features
fx_info

Definition at line 1045 of file adaptmatch.cpp.

1052  {
1053  if (int_features.empty()) return;
1054  auto* CharNormArray = new uint8_t[unicharset.size()];
1055  UnicharRating int_result;
1056 
1057  results->BlobLength = GetCharNormFeature(fx_info, templates, nullptr,
1058  CharNormArray);
1059  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1060  if (debug)
1061  tprintf("AM Matches = ");
1062 
1063  int top = blob->bounding_box().top();
1064  int bottom = blob->bounding_box().bottom();
1065  while (*ambiguities >= 0) {
1066  CLASS_ID class_id = *ambiguities;
1067 
1068  int_result.unichar_id = class_id;
1069  im_.Match(ClassForClassId(templates, class_id),
1071  int_features.size(), &int_features[0],
1072  &int_result,
1075 
1076  ExpandShapesAndApplyCorrections(nullptr, debug, class_id, bottom, top, 0,
1077  results->BlobLength,
1079  CharNormArray, &int_result, results);
1080  ambiguities++;
1081  }
1082  delete [] CharNormArray;
1083 } /* AmbigClassifier */

◆ BaselineClassifier()

UNICHAR_ID * tesseract::Classify::BaselineClassifier ( TBLOB Blob,
const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Globals:

  • BaselineCutoffs expected num features for each class
Parameters
Blobblob to be classified
Templatescurrent set of adapted templates
Resultsplace to put match results
int_features
fx_info
Returns
Array of possible ambiguous chars that should be checked.

Definition at line 1265 of file adaptmatch.cpp.

1268  {
1269  if (int_features.empty()) return nullptr;
1270  auto* CharNormArray = new uint8_t[unicharset.size()];
1271  ClearCharNormArray(CharNormArray);
1272 
1274  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
1275  CharNormArray, BaselineCutoffs, &Results->CPResults);
1276 
1277  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1278  tprintf("BL Matches = ");
1279 
1280  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
1281  CharNormArray,
1282  Templates->Class, matcher_debug_flags, 0,
1283  Blob->bounding_box(), Results->CPResults, Results);
1284 
1285  delete [] CharNormArray;
1286  CLASS_ID ClassId = Results->best_unichar_id;
1287  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
1288  return nullptr;
1289 
1290  return Templates->Class[ClassId]->
1291  Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
1292 } /* BaselineClassifier */

◆ CharNormClassifier()

int tesseract::Classify::CharNormClassifier ( TBLOB blob,
const TrainingSample sample,
ADAPT_RESULTS adapt_results 
)

This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Parameters
blobblob to be classified
sampletemplates to classify unknown against
adapt_resultsplace to put match results

Globals:

  • CharNormCutoffs expected num features for each class
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs

Definition at line 1311 of file adaptmatch.cpp.

1313  {
1314  // This is the length that is used for scaling ratings vs certainty.
1315  adapt_results->BlobLength =
1316  IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1317  GenericVector<UnicharRating> unichar_results;
1318  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
1319  -1, &unichar_results);
1320  // Convert results to the format used internally by AdaptiveClassifier.
1321  for (int r = 0; r < unichar_results.size(); ++r) {
1322  AddNewResult(unichar_results[r], adapt_results);
1323  }
1324  return sample.num_features();
1325 } /* CharNormClassifier */

◆ CharNormTrainingSample()

int tesseract::Classify::CharNormTrainingSample ( bool  pruner_only,
int  keep_this,
const TrainingSample sample,
GenericVector< UnicharRating > *  results 
)

Definition at line 1329 of file adaptmatch.cpp.

1332  {
1333  results->clear();
1334  auto* adapt_results = new ADAPT_RESULTS();
1335  adapt_results->Initialize();
1336  // Compute the bounding box of the features.
1337  uint32_t num_features = sample.num_features();
1338  // Only the top and bottom of the blob_box are used by MasterMatcher, so
1339  // fabricate right and left using top and bottom.
1340  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
1341  sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
1342  // Compute the char_norm_array from the saved cn_feature.
1343  FEATURE norm_feature = sample.GetCNFeature();
1344  auto* char_norm_array = new uint8_t[unicharset.size()];
1345  int num_pruner_classes = std::max(unicharset.size(),
1347  auto* pruner_norm_array = new uint8_t[num_pruner_classes];
1348  adapt_results->BlobLength =
1349  static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
1350  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
1351  pruner_norm_array);
1352 
1353  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
1354  pruner_norm_array,
1355  shape_table_ != nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1356  &adapt_results->CPResults);
1357  delete [] pruner_norm_array;
1358  if (keep_this >= 0) {
1359  adapt_results->CPResults[0].Class = keep_this;
1360  adapt_results->CPResults.truncate(1);
1361  }
1362  if (pruner_only) {
1363  // Convert pruner results to output format.
1364  for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
1365  int class_id = adapt_results->CPResults[i].Class;
1366  results->push_back(
1367  UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1368  }
1369  } else {
1370  MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1371  char_norm_array,
1372  nullptr, matcher_debug_flags,
1374  blob_box, adapt_results->CPResults, adapt_results);
1375  // Convert master matcher results to output format.
1376  for (int i = 0; i < adapt_results->match.size(); i++) {
1377  results->push_back(adapt_results->match[i]);
1378  }
1380  }
1381  delete [] char_norm_array;
1382  delete adapt_results;
1383  return num_features;
1384 } /* CharNormTrainingSample */

◆ ClassAndConfigIDToFontOrShapeID()

int tesseract::Classify::ClassAndConfigIDToFontOrShapeID ( int  class_id,
int  int_result_config 
) const

Definition at line 2207 of file adaptmatch.cpp.

2208  {
2209  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2210  // Older inttemps have no font_ids.
2211  if (font_set_id < 0)
2212  return kBlankFontinfoId;
2213  const FontSet &fs = fontset_table_.get(font_set_id);
2214  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2215  return fs.configs[int_result_config];
2216 }

◆ ClassIDToDebugStr()

STRING tesseract::Classify::ClassIDToDebugStr ( const INT_TEMPLATES_STRUCT templates,
int  class_id,
int  config_id 
) const

Definition at line 2194 of file adaptmatch.cpp.

2195  {
2196  STRING class_string;
2197  if (templates == PreTrainedTemplates && shape_table_ != nullptr) {
2198  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2199  class_string = shape_table_->DebugStr(shape_id);
2200  } else {
2201  class_string = unicharset.debug_str(class_id);
2202  }
2203  return class_string;
2204 }

◆ ClassifyAsNoise()

void tesseract::Classify::ClassifyAsNoise ( ADAPT_RESULTS results)

This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.

Parameters
resultsresults to add noise classification to

Globals:

  • matcher_avg_noise_size avg. length of a noise blob

Definition at line 1399 of file adaptmatch.cpp.

1399  {
1400  float rating = results->BlobLength / matcher_avg_noise_size;
1401  rating *= rating;
1402  rating /= 1.0 + rating;
1403 
1404  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
1405 } /* ClassifyAsNoise */

◆ ClearCharNormArray()

void tesseract::Classify::ClearCharNormArray ( uint8_t *  char_norm_array)

For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.

Globals:

  • none
Parameters
char_norm_arrayarray to be cleared

Definition at line 44 of file float2int.cpp.

44  {
45  memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
46 } /* ClearCharNormArray */

◆ ComputeCharNormArrays()

void tesseract::Classify::ComputeCharNormArrays ( FEATURE_STRUCT norm_feature,
INT_TEMPLATES_STRUCT templates,
uint8_t *  char_norm_array,
uint8_t *  pruner_array 
)

Definition at line 1698 of file adaptmatch.cpp.

1701  {
1702  ComputeIntCharNormArray(*norm_feature, char_norm_array);
1703  if (pruner_array != nullptr) {
1704  if (shape_table_ == nullptr) {
1705  ComputeIntCharNormArray(*norm_feature, pruner_array);
1706  } else {
1707  memset(pruner_array, UINT8_MAX,
1708  templates->NumClasses * sizeof(pruner_array[0]));
1709  // Each entry in the pruner norm array is the MIN of all the entries of
1710  // the corresponding unichars in the CharNormArray.
1711  for (int id = 0; id < templates->NumClasses; ++id) {
1712  int font_set_id = templates->Class[id]->font_set_id;
1713  const FontSet &fs = fontset_table_.get(font_set_id);
1714  for (int config = 0; config < fs.size; ++config) {
1715  const Shape& shape = shape_table_->GetShape(fs.configs[config]);
1716  for (int c = 0; c < shape.size(); ++c) {
1717  if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
1718  pruner_array[id] = char_norm_array[shape[c].unichar_id];
1719  }
1720  }
1721  }
1722  }
1723  }
1724  FreeFeature(norm_feature);
1725 }

◆ ComputeCorrectedRating()

double tesseract::Classify::ComputeCorrectedRating ( bool  debug,
int  unichar_id,
double  cp_rating,
double  im_rating,
int  feature_misses,
int  bottom,
int  top,
int  blob_length,
int  matcher_multiplier,
const uint8_t *  cn_factors 
)

Definition at line 1202 of file adaptmatch.cpp.

1207  {
1208  // Compute class feature corrections.
1209  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
1210  cn_factors[unichar_id],
1211  matcher_multiplier);
1212  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1213  double vertical_penalty = 0.0;
1214  // Penalize non-alnums for being vertical misfits.
1215  if (!unicharset.get_isalpha(unichar_id) &&
1216  !unicharset.get_isdigit(unichar_id) &&
1217  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1218  int min_bottom, max_bottom, min_top, max_top;
1219  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
1220  &min_top, &max_top);
1221  if (debug) {
1222  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1223  top, min_top, max_top, bottom, min_bottom, max_bottom);
1224  }
1225  if (top < min_top || top > max_top ||
1226  bottom < min_bottom || bottom > max_bottom) {
1227  vertical_penalty = classify_misfit_junk_penalty;
1228  }
1229  }
1230  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1231  if (result < WORST_POSSIBLE_RATING)
1232  result = WORST_POSSIBLE_RATING;
1233  if (debug) {
1234  tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1235  unicharset.id_to_unichar(unichar_id),
1236  result * 100.0,
1237  cp_rating * 100.0,
1238  (1.0 - im_rating) * 100.0,
1239  (cn_corrected - (1.0 - im_rating)) * 100.0,
1240  cn_factors[unichar_id],
1241  miss_penalty * 100.0,
1242  vertical_penalty * 100.0);
1243  }
1244  return result;
1245 }

◆ ComputeIntCharNormArray()

void tesseract::Classify::ComputeIntCharNormArray ( const FEATURE_STRUCT norm_feature,
uint8_t *  char_norm_array 
)

For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.

Globals:

  • PreTrainedTemplates current set of built-in templates
Parameters
norm_featurecharacter normalization feature
[out]char_norm_arrayplace to put results of size unicharset.size()

Definition at line 62 of file float2int.cpp.

63  {
64  for (int i = 0; i < unicharset.size(); i++) {
65  if (i < PreTrainedTemplates->NumClasses) {
66  int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE *
67  ComputeNormMatch(i, norm_feature, false));
68  char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
69  } else {
70  // Classes with no templates (eg. ambigs & ligatures) default
71  // to worst match.
72  char_norm_array[i] = MAX_INT_CHAR_NORM;
73  }
74  }
75 } /* ComputeIntCharNormArray */

◆ ComputeIntFeatures()

void tesseract::Classify::ComputeIntFeatures ( FEATURE_SET  Features,
INT_FEATURE_ARRAY  IntFeatures 
)

This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.

Globals:

  • none
Parameters
Featuresfloating point pico-features to be converted
[out]IntFeaturesarray to put converted features into

Definition at line 90 of file float2int.cpp.

91  {
92  float YShift;
93 
95  YShift = BASELINE_Y_SHIFT;
96  else
97  YShift = Y_SHIFT;
98 
99  for (int Fid = 0; Fid < Features->NumFeatures; Fid++) {
100  FEATURE Feature = Features->Features[Fid];
101 
102  IntFeatures[Fid].X =
104  IntFeatures[Fid].Y =
105  Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE);
106  IntFeatures[Fid].Theta = CircBucketFor(Feature->Params[PicoFeatDir],
108  IntFeatures[Fid].CP_misses = 0;
109  }
110 } /* ComputeIntFeatures */

◆ ComputeNormMatch()

float tesseract::Classify::ComputeNormMatch ( CLASS_ID  ClassId,
const FEATURE_STRUCT feature,
bool  DebugMatch 
)

This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match.

Parameters
ClassIdid of class to match against
featurecharacter normalization feature
DebugMatchcontrols dump of debug info

Globals: NormProtos character normalization prototypes

Returns
Best match rating for Feature against protos of ClassId.

Definition at line 93 of file normmatch.cpp.

96  {
97  LIST Protos;
98  float BestMatch;
99  float Match;
100  float Delta;
101  PROTOTYPE *Proto;
102  int ProtoId;
103 
104  if (ClassId >= NormProtos->NumProtos) {
105  ClassId = NO_CLASS;
106  }
107 
108  /* handle requests for classification as noise */
109  if (ClassId == NO_CLASS) {
110  /* kludge - clean up constants and make into control knobs later */
111  Match = (feature.Params[CharNormLength] *
112  feature.Params[CharNormLength] * 500.0 +
113  feature.Params[CharNormRx] *
114  feature.Params[CharNormRx] * 8000.0 +
115  feature.Params[CharNormRy] *
116  feature.Params[CharNormRy] * 8000.0);
117  return (1.0 - NormEvidenceOf(Match));
118  }
119 
120  BestMatch = FLT_MAX;
121  Protos = NormProtos->Protos[ClassId];
122 
123  if (DebugMatch) {
124  tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId));
125  }
126 
127  ProtoId = 0;
128  iterate(Protos) {
129  Proto = reinterpret_cast<PROTOTYPE *>first_node (Protos);
130  Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY];
131  Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY];
132  if (DebugMatch) {
133  tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
134  Proto->Mean[CharNormY], Delta,
135  Proto->Weight.Elliptical[CharNormY], Match);
136  }
137  Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx];
138  Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx];
139  if (DebugMatch) {
140  tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
141  Proto->Mean[CharNormRx], Delta,
142  Proto->Weight.Elliptical[CharNormRx], Match);
143  }
144  // Ry is width! See intfx.cpp.
145  Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy];
146  if (DebugMatch) {
147  tprintf("Width: Proto=%g, Delta=%g, Var=%g\n",
148  Proto->Mean[CharNormRy], Delta,
149  Proto->Weight.Elliptical[CharNormRy]);
150  }
151  Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy];
152  Delta *= kWidthErrorWeighting;
153  Match += Delta;
154  if (DebugMatch) {
155  tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
156  Match, Match / classify_norm_adj_midpoint,
157  NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
158  }
159 
160  if (Match < BestMatch)
161  BestMatch = Match;
162 
163  ProtoId++;
164  }
165  return 1.0 - NormEvidenceOf(BestMatch);

◆ ConvertMatchesToChoices()

void tesseract::Classify::ConvertMatchesToChoices ( const DENORM denorm,
const TBOX box,
ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)

The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.

Definition at line 1413 of file adaptmatch.cpp.

1415  {
1416  assert(Choices != nullptr);
1417  float Rating;
1418  float Certainty;
1419  BLOB_CHOICE_IT temp_it;
1420  bool contains_nonfrag = false;
1421  temp_it.set_to_list(Choices);
1422  int choices_length = 0;
1423  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1424  // number of returned results, but with a shape_table_ we want to have room
1425  // for at least the biggest shape (which might contain hundreds of Indic
1426  // grapheme fragments) and more, so use double the size of the biggest shape
1427  // if that is more than the default.
1428  int max_matches = MAX_MATCHES;
1429  if (shape_table_ != nullptr) {
1430  max_matches = shape_table_->MaxNumUnichars() * 2;
1431  if (max_matches < MAX_MATCHES)
1432  max_matches = MAX_MATCHES;
1433  }
1434 
1435  float best_certainty = -FLT_MAX;
1436  for (int i = 0; i < Results->match.size(); i++) {
1437  const UnicharRating& result = Results->match[i];
1438  bool adapted = result.adapted;
1439  bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != nullptr);
1440  if (temp_it.length()+1 == max_matches &&
1441  !contains_nonfrag && current_is_frag) {
1442  continue; // look for a non-fragmented character to fill the
1443  // last spot in Choices if only fragments are present
1444  }
1445  // BlobLength can never be legally 0, this means recognition failed.
1446  // But we must return a classification result because some invoking
1447  // functions (chopper/permuter) do not anticipate a null blob choice.
1448  // So we need to assign a poor, but not infinitely bad score.
1449  if (Results->BlobLength == 0) {
1450  Certainty = -20;
1451  Rating = 100; // should be -certainty * real_blob_length
1452  } else {
1453  Rating = Certainty = (1.0f - result.rating);
1454  Rating *= rating_scale * Results->BlobLength;
1455  Certainty *= -(getDict().certainty_scale);
1456  }
1457  // Adapted results, by their very nature, should have good certainty.
1458  // Those that don't are at best misleading, and often lead to errors,
1459  // so don't accept adapted results that are too far behind the best result,
1460  // whether adapted or static.
1461  // TODO(rays) find some way of automatically tuning these constants.
1462  if (Certainty > best_certainty) {
1463  best_certainty = std::min(Certainty, static_cast<float>(classify_adapted_pruning_threshold));
1464  } else if (adapted &&
1465  Certainty / classify_adapted_pruning_factor < best_certainty) {
1466  continue; // Don't accept bad adapted results.
1467  }
1468 
1469  float min_xheight, max_xheight, yshift;
1470  denorm.XHeightRange(result.unichar_id, unicharset, box,
1471  &min_xheight, &max_xheight, &yshift);
1472  auto* choice =
1473  new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
1475  min_xheight, max_xheight, yshift,
1476  adapted ? BCC_ADAPTED_CLASSIFIER
1478  choice->set_fonts(result.fonts);
1479  temp_it.add_to_end(choice);
1480  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1481  choices_length++;
1482  if (choices_length >= max_matches) break;
1483  }
1484  Results->match.truncate(choices_length);
1485 } // ConvertMatchesToChoices

◆ ConvertProto()

void tesseract::Classify::ConvertProto ( PROTO  Proto,
int  ProtoId,
INT_CLASS  Class 
)

This routine converts Proto to integer format and installs it as ProtoId in Class.

Parameters
Protofloating-pt proto to be converted to integer format
ProtoIdid of proto
Classinteger class to add converted proto to

Definition at line 487 of file intproto.cpp.

488  {
489  INT_PROTO P;
490  float Param;
491 
492  assert(ProtoId < Class->NumProtos);
493 
494  P = ProtoForProtoId(Class, ProtoId);
495 
496  Param = Proto->A * 128;
497  P->A = TruncateParam(Param, -128, 127, nullptr);
498 
499  Param = -Proto->B * 256;
500  P->B = TruncateParam(Param, 0, 255, nullptr);
501 
502  Param = Proto->C * 128;
503  P->C = TruncateParam(Param, -128, 127, nullptr);
504 
505  Param = Proto->Angle * 256;
506  if (Param < 0 || Param >= 256)
507  P->Angle = 0;
508  else
509  P->Angle = static_cast<uint8_t>(Param);
510 
511  /* round proto length to nearest integer number of pico-features */
512  Param = (Proto->Length / GetPicoFeatureLength()) + 0.5;
513  Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, nullptr);
515  cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
516  P->A, P->B, P->C, Class->ProtoLengths[ProtoId]);

◆ CreateIntTemplates()

INT_TEMPLATES tesseract::Classify::CreateIntTemplates ( CLASSES  FloatProtos,
const UNICHARSET target_unicharset 
)

This routine converts from the old floating point format to the new integer format.

Parameters
FloatProtosprototypes in old floating pt format
target_unicharsetthe UNICHARSET to use
Returns
New set of training templates in integer format.
Note
Globals: none

Definition at line 526 of file intproto.cpp.

529  {
530  INT_TEMPLATES IntTemplates;
531  CLASS_TYPE FClass;
532  INT_CLASS IClass;
533  int ClassId;
534  int ProtoId;
535  int ConfigId;
536 
537  IntTemplates = NewIntTemplates();
538 
539  for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
540  FClass = &(FloatProtos[ClassId]);
541  if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
542  strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
543  cprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n",
544  target_unicharset.id_to_unichar(ClassId));
545  }
546  assert(UnusedClassIdIn(IntTemplates, ClassId));
547  IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs);
548  FontSet fs;
549  fs.size = FClass->font_set.size();
550  fs.configs = new int[fs.size];
551  for (int i = 0; i < fs.size; ++i) {
552  fs.configs[i] = FClass->font_set.get(i);
553  }
554  if (this->fontset_table_.contains(fs)) {
555  IClass->font_set_id = this->fontset_table_.get_id(fs);
556  delete[] fs.configs;
557  } else {
558  IClass->font_set_id = this->fontset_table_.push_back(fs);
559  }
560  AddIntClass(IntTemplates, ClassId, IClass);
561 
562  for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) {
563  AddIntProto(IClass);
564  ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass);
565  AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass,
567  AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates);
568  }
569 
570  for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) {
571  AddIntConfig(IClass);
572  ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass);
573  }
574  }
575  return (IntTemplates);

◆ DebugAdaptiveClassifier()

void tesseract::Classify::DebugAdaptiveClassifier ( TBLOB blob,
ADAPT_RESULTS Results 
)
Parameters
blobblob whose classification is being debugged
Resultsresults of match being debugged

Globals: none

Definition at line 1497 of file adaptmatch.cpp.

1498  {
1499  if (static_classifier_ == nullptr) return;
1500  INT_FX_RESULT_STRUCT fx_info;
1502  TrainingSample* sample =
1503  BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1504  if (sample == nullptr) return;
1505  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
1506  Results->best_unichar_id);
1507 } /* DebugAdaptiveClassifier */

◆ DisplayAdaptedChar()

void tesseract::Classify::DisplayAdaptedChar ( TBLOB blob,
INT_CLASS_STRUCT int_class 
)

Definition at line 946 of file adaptmatch.cpp.

946  {
947 #ifndef GRAPHICS_DISABLED
948  INT_FX_RESULT_STRUCT fx_info;
950  TrainingSample* sample =
952  &bl_features);
953  if (sample == nullptr) return;
954 
955  UnicharRating int_result;
956  im_.Match(int_class, AllProtosOn, AllConfigsOn,
957  bl_features.size(), &bl_features[0],
960  tprintf("Best match to temp config %d = %4.1f%%.\n",
961  int_result.config, int_result.rating * 100.0);
963  uint32_t ConfigMask;
964  ConfigMask = 1 << int_result.config;
966  im_.Match(int_class, AllProtosOn, static_cast<BIT_VECTOR>(&ConfigMask),
967  bl_features.size(), &bl_features[0],
971  }
972 
973  delete sample;
974 #endif
975 }

◆ DoAdaptiveMatch()

void tesseract::Classify::DoAdaptiveMatch ( TBLOB Blob,
ADAPT_RESULTS Results 
)

This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.

Parameters
Blobblob to be classified
Resultsplace to put match results

Globals:

  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • matcher_reliable_adaptive_result rating limit for a great match

Definition at line 1530 of file adaptmatch.cpp.

1530  {
1531  UNICHAR_ID *Ambiguities;
1532 
1533  INT_FX_RESULT_STRUCT fx_info;
1535  TrainingSample* sample =
1537  &bl_features);
1538  if (sample == nullptr) return;
1539 
1540  // TODO: With LSTM, static_classifier_ is nullptr.
1541  // Return to avoid crash in CharNormClassifier.
1542  if (static_classifier_ == nullptr) {
1543  delete sample;
1544  return;
1545  }
1546 
1548  tess_cn_matching) {
1549  CharNormClassifier(Blob, *sample, Results);
1550  } else {
1551  Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
1552  AdaptedTemplates, Results);
1553  if ((!Results->match.empty() &&
1554  MarginalMatch(Results->best_rating,
1556  !tess_bn_matching) ||
1557  Results->match.empty()) {
1558  CharNormClassifier(Blob, *sample, Results);
1559  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1560  AmbigClassifier(bl_features, fx_info, Blob,
1563  Ambiguities,
1564  Results);
1565  }
1566  }
1567 
1568  // Force the blob to be classified as noise
1569  // if the results contain only fragments.
1570  // TODO(daria): verify that this is better than
1571  // just adding a nullptr classification.
1572  if (!Results->HasNonfragment || Results->match.empty())
1573  ClassifyAsNoise(Results);
1574  delete sample;
1575 } /* DoAdaptiveMatch */

◆ EndAdaptiveClassifier()

void tesseract::Classify::EndAdaptiveClassifier ( )

This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Globals:

Definition at line 459 of file adaptmatch.cpp.

459  {
460  STRING Filename;
461  FILE *File;
462 
463  if (AdaptedTemplates != nullptr &&
465  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
466  File = fopen (Filename.c_str(), "wb");
467  if (File == nullptr)
468  cprintf ("Unable to save adapted templates to %s!\n", Filename.c_str());
469  else {
470  cprintf ("\nSaving adapted templates to %s ...", Filename.c_str());
471  fflush(stdout);
473  cprintf ("\n");
474  fclose(File);
475  }
476  }
477 
478  if (AdaptedTemplates != nullptr) {
480  AdaptedTemplates = nullptr;
481  }
482  if (BackupAdaptedTemplates != nullptr) {
484  BackupAdaptedTemplates = nullptr;
485  }
486 
487  if (PreTrainedTemplates != nullptr) {
489  PreTrainedTemplates = nullptr;
490  }
492  FreeNormProtos();
493  if (AllProtosOn != nullptr) {
494  FreeBitVector(AllProtosOn);
495  FreeBitVector(AllConfigsOn);
496  FreeBitVector(AllConfigsOff);
497  FreeBitVector(TempProtoMask);
498  AllProtosOn = nullptr;
499  AllConfigsOn = nullptr;
500  AllConfigsOff = nullptr;
501  TempProtoMask = nullptr;
502  }
503  delete shape_table_;
504  shape_table_ = nullptr;
505  delete static_classifier_;
506  static_classifier_ = nullptr;
507 } /* EndAdaptiveClassifier */

◆ ExpandShapesAndApplyCorrections()

void tesseract::Classify::ExpandShapesAndApplyCorrections ( ADAPT_CLASS classes,
bool  debug,
int  class_id,
int  bottom,
int  top,
float  cp_rating,
int  blob_length,
int  matcher_multiplier,
const uint8_t *  cn_factors,
UnicharRating int_result,
ADAPT_RESULTS final_results 
)

Definition at line 1128 of file adaptmatch.cpp.

1132  {
1133  if (classes != nullptr) {
1134  // Adapted result. Convert configs to fontinfo_ids.
1135  int_result->adapted = true;
1136  for (int f = 0; f < int_result->fonts.size(); ++f) {
1137  int_result->fonts[f].fontinfo_id =
1138  GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
1139  }
1140  } else {
1141  // Pre-trained result. Map fonts using font_sets_.
1142  int_result->adapted = false;
1143  for (int f = 0; f < int_result->fonts.size(); ++f) {
1144  int_result->fonts[f].fontinfo_id =
1146  int_result->fonts[f].fontinfo_id);
1147  }
1148  if (shape_table_ != nullptr) {
1149  // Two possible cases:
1150  // 1. Flat shapetable. All unichar-ids of the shapes referenced by
1151  // int_result->fonts are the same. In this case build a new vector of
1152  // mapped fonts and replace the fonts in int_result.
1153  // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
1154  // by int_result. In this case, build a vector of UnicharRating to
1155  // gather together different font-ids for each unichar. Also covers case1.
1156  GenericVector<UnicharRating> mapped_results;
1157  for (int f = 0; f < int_result->fonts.size(); ++f) {
1158  int shape_id = int_result->fonts[f].fontinfo_id;
1159  const Shape& shape = shape_table_->GetShape(shape_id);
1160  for (int c = 0; c < shape.size(); ++c) {
1161  int unichar_id = shape[c].unichar_id;
1162  if (!unicharset.get_enabled(unichar_id)) continue;
1163  // Find the mapped_result for unichar_id.
1164  int r = 0;
1165  for (r = 0; r < mapped_results.size() &&
1166  mapped_results[r].unichar_id != unichar_id; ++r) {}
1167  if (r == mapped_results.size()) {
1168  mapped_results.push_back(*int_result);
1169  mapped_results[r].unichar_id = unichar_id;
1170  mapped_results[r].fonts.truncate(0);
1171  }
1172  for (int i = 0; i < shape[c].font_ids.size(); ++i) {
1173  mapped_results[r].fonts.push_back(
1174  ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
1175  }
1176  }
1177  }
1178  for (int m = 0; m < mapped_results.size(); ++m) {
1179  mapped_results[m].rating =
1180  ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
1181  cp_rating, int_result->rating,
1182  int_result->feature_misses, bottom, top,
1183  blob_length, matcher_multiplier, cn_factors);
1184  AddNewResult(mapped_results[m], final_results);
1185  }
1186  return;
1187  }
1188  }
1189  if (unicharset.get_enabled(class_id)) {
1190  int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
1191  int_result->rating,
1192  int_result->feature_misses,
1193  bottom, top, blob_length,
1194  matcher_multiplier, cn_factors);
1195  AddNewResult(*int_result, final_results);
1196  }
1197 }

◆ ExtractFeatures()

void tesseract::Classify::ExtractFeatures ( const TBLOB blob,
bool  nonlinear_norm,
GenericVector< INT_FEATURE_STRUCT > *  bl_features,
GenericVector< INT_FEATURE_STRUCT > *  cn_features,
INT_FX_RESULT_STRUCT results,
GenericVector< int > *  outline_cn_counts 
)
static

Definition at line 440 of file intfx.cpp.

446  {
447  DENORM bl_denorm, cn_denorm;
448  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm,
449  &bl_denorm, &cn_denorm, results);
450  if (outline_cn_counts != nullptr)
451  outline_cn_counts->truncate(0);
452  // Iterate the outlines.
453  for (TESSLINE* ol = blob.outlines; ol != nullptr; ol = ol->next) {
454  // Iterate the polygon.
455  EDGEPT* loop_pt = ol->FindBestStartPt();
456  EDGEPT* pt = loop_pt;
457  if (pt == nullptr) continue;
458  do {
459  if (pt->IsHidden()) continue;
460  // Find a run of equal src_outline.
461  EDGEPT* last_pt = pt;
462  do {
463  last_pt = last_pt->next;
464  } while (last_pt != loop_pt && !last_pt->IsHidden() &&
465  last_pt->src_outline == pt->src_outline);
466  last_pt = last_pt->prev;
467  // Until the adaptive classifier can be weaned off polygon segments,
468  // we have to force extraction from the polygon for the bl_features.
469  ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength,
470  true, bl_features);
471  ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength,
472  false, cn_features);
473  pt = last_pt;
474  } while ((pt = pt->next) != loop_pt);
475  if (outline_cn_counts != nullptr)
476  outline_cn_counts->push_back(cn_features->size());
477  }
478  results->NumBL = bl_features->size();
479  results->NumCN = cn_features->size();
480  results->YBottom = blob.bounding_box().bottom();
481  results->YTop = blob.bounding_box().top();
482  results->Width = blob.bounding_box().width();

◆ ExtractIntCNFeatures()

FEATURE_SET tesseract::Classify::ExtractIntCNFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Integer character-normalized features for blob.

Definition at line 216 of file picofeat.cpp.

218  {
219  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
222  blob, false, &local_fx_info, &bl_features);
223  if (sample == nullptr) return nullptr;
224 
225  uint32_t num_features = sample->num_features();
226  const INT_FEATURE_STRUCT* features = sample->features();
227  FEATURE_SET feature_set = NewFeatureSet(num_features);
228  for (uint32_t f = 0; f < num_features; ++f) {
229  FEATURE feature = NewFeature(&IntFeatDesc);
230 
231  feature->Params[IntX] = features[f].X;
232  feature->Params[IntY] = features[f].Y;
233  feature->Params[IntDir] = features[f].Theta;
234  AddFeature(feature_set, feature);
235  }
236  delete sample;
237 
238  return feature_set;

◆ ExtractIntGeoFeatures()

FEATURE_SET tesseract::Classify::ExtractIntGeoFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Geometric (top/bottom/width) features for blob.

Definition at line 246 of file picofeat.cpp.

248  {
249  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
252  blob, false, &local_fx_info, &bl_features);
253  if (sample == nullptr) return nullptr;
254 
255  FEATURE_SET feature_set = NewFeatureSet(1);
256  FEATURE feature = NewFeature(&IntFeatDesc);
257 
258  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
259  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
260  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
261  AddFeature(feature_set, feature);
262  delete sample;
263 
264  return feature_set;

◆ ExtractOutlineFeatures()

FEATURE_SET tesseract::Classify::ExtractOutlineFeatures ( TBLOB Blob)

Convert each segment in the outline to a feature and return the features.

Parameters
Blobblob to extract pico-features from
Returns
Outline-features for Blob.
Note
Globals: none

Definition at line 54 of file outfeat.cpp.

56  {
57  Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines);
58  ConvertToOutlineFeatures(Outline, FeatureSet);
59  }
61  NormalizeOutlineX(FeatureSet);
62  FreeOutlines(Outlines);
63  return (FeatureSet);
64 } /* ExtractOutlineFeatures */
65 } // namespace tesseract
66 
67 /*----------------------------------------------------------------------------
68  Private Code
69 ----------------------------------------------------------------------------*/
70 /*---------------------------------------------------------------------------*/

◆ ExtractPicoFeatures()

FEATURE_SET tesseract::Classify::ExtractPicoFeatures ( TBLOB Blob)

Operation: Dummy for now.

Globals:

  • classify_norm_method normalization method currently specified
    Parameters
    Blobblob to extract pico-features from
    Returns
    Pico-features for Blob.

Definition at line 62 of file picofeat.cpp.

63  {
64  LIST Outlines;
65  LIST RemainingOutlines;
66  MFOUTLINE Outline;
67  FEATURE_SET FeatureSet;
68  float XScale, YScale;
69 
70  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
71  Outlines = ConvertBlob(Blob);
72  NormalizeOutlines(Outlines, &XScale, &YScale);
73  RemainingOutlines = Outlines;
74  iterate(RemainingOutlines) {
75  Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines);
76  ConvertToPicoFeatures2(Outline, FeatureSet);
77  }
79  NormalizePicoX(FeatureSet);
80  FreeOutlines(Outlines);
81  return (FeatureSet);
82 

◆ FreeNormProtos()

void tesseract::Classify::FreeNormProtos ( )

Definition at line 167 of file normmatch.cpp.

168  {
169  if (NormProtos != nullptr) {
170  for (int i = 0; i < NormProtos->NumProtos; i++)
174  Efree(NormProtos);
175  NormProtos = nullptr;
176  }

◆ get_fontinfo_table() [1/2]

UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( )
inline

Definition at line 386 of file classify.h.

386  {
387  return fontinfo_table_;
388  }

◆ get_fontinfo_table() [2/2]

const UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( ) const
inline

Definition at line 389 of file classify.h.

389  {
390  return fontinfo_table_;
391  }

◆ get_fontset_table()

UnicityTable<FontSet>& tesseract::Classify::get_fontset_table ( )
inline

Definition at line 392 of file classify.h.

392  {
393  return fontset_table_;
394  }

◆ GetAdaptiveFeatures()

int tesseract::Classify::GetAdaptiveFeatures ( TBLOB Blob,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

This routine sets up the feature extractor to extract baseline normalized pico-features.

The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.

Globals: none

Parameters
Blobblob to extract features from
[out]IntFeaturesarray to fill with integer features
[out]FloatFeaturesplace to return actual floating-pt features
Returns
Number of pico-features returned (0 if an error occurred)

Definition at line 786 of file adaptmatch.cpp.

788  {
789  FEATURE_SET Features;
790  int NumFeatures;
791 
792  classify_norm_method.set_value(baseline);
793  Features = ExtractPicoFeatures(Blob);
794 
795  NumFeatures = Features->NumFeatures;
796  if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) {
797  FreeFeatureSet(Features);
798  return 0;
799  }
800 
801  ComputeIntFeatures(Features, IntFeatures);
802  *FloatFeatures = Features;
803 
804  return NumFeatures;
805 } /* GetAdaptiveFeatures */

◆ GetAmbiguities()

UNICHAR_ID * tesseract::Classify::GetAmbiguities ( TBLOB Blob,
CLASS_ID  CorrectClass 
)

This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters
Blobblob to get classification ambiguities for
CorrectClasscorrect class for Blob

Globals:

  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns
String containing all possible ambiguous classes.

Definition at line 1592 of file adaptmatch.cpp.

1593  {
1594  auto *Results = new ADAPT_RESULTS();
1595  UNICHAR_ID *Ambiguities;
1596  int i;
1597 
1598  Results->Initialize();
1599  INT_FX_RESULT_STRUCT fx_info;
1601  TrainingSample* sample =
1603  &bl_features);
1604  if (sample == nullptr) {
1605  delete Results;
1606  return nullptr;
1607  }
1608 
1609  CharNormClassifier(Blob, *sample, Results);
1610  delete sample;
1611  RemoveBadMatches(Results);
1612  Results->match.sort(&UnicharRating::SortDescendingRating);
1613 
1614  /* copy the class id's into an string of ambiguities - don't copy if
1615  the correct class is the only class id matched */
1616  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
1617  if (Results->match.size() > 1 ||
1618  (Results->match.size() == 1 &&
1619  Results->match[0].unichar_id != CorrectClass)) {
1620  for (i = 0; i < Results->match.size(); i++)
1621  Ambiguities[i] = Results->match[i].unichar_id;
1622  Ambiguities[i] = -1;
1623  } else {
1624  Ambiguities[0] = -1;
1625  }
1626 
1627  delete Results;
1628  return Ambiguities;
1629 } /* GetAmbiguities */

◆ GetCharNormFeature()

int tesseract::Classify::GetCharNormFeature ( const INT_FX_RESULT_STRUCT fx_info,
INT_TEMPLATES  templates,
uint8_t *  pruner_norm_array,
uint8_t *  char_norm_array 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.

The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Parameters
templatesused to compute char norm adjustments
pruner_norm_arrayArray of factors from blob normalization process
char_norm_arrayarray to fill with dummy char norm adjustments
fx_infoGlobals:
Returns
Number of features extracted or 0 if an error occurred.

Definition at line 1678 of file adaptmatch.cpp.

1681  {
1682  FEATURE norm_feature = NewFeature(&CharNormDesc);
1683  float baseline = kBlnBaselineOffset;
1684  float scale = MF_SCALE_FACTOR;
1685  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
1686  norm_feature->Params[CharNormLength] =
1687  fx_info.Length * scale / LENGTH_COMPRESSION;
1688  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
1689  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
1690  // Deletes norm_feature.
1691  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
1692  pruner_norm_array);
1693  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
1694 } /* GetCharNormFeature */

◆ GetClassToDebug()

CLASS_ID tesseract::Classify::GetClassToDebug ( const char *  Prompt,
bool *  adaptive_on,
bool *  pretrained_on,
int *  shape_id 
)

This routine prompts the user with Prompt and waits for the user to enter something in the debug window.

Parameters
Promptprompt to print while waiting for input from window
adaptive_on
pretrained_on
shape_id
Returns
Character entered in the debug window.
Note
Globals: none

Definition at line 1256 of file intproto.cpp.

1258  {
1259  tprintf("%s\n", Prompt);
1260  SVEvent* ev;
1261  SVEventType ev_type;
1262  int unichar_id = INVALID_UNICHAR_ID;
1263  // Wait until a click or popup event.
1264  do {
1265  ev = IntMatchWindow->AwaitEvent(SVET_ANY);
1266  ev_type = ev->type;
1267  if (ev_type == SVET_POPUP) {
1268  if (ev->command_id == IDA_SHAPE_INDEX) {
1269  if (shape_table_ != nullptr) {
1270  *shape_id = atoi(ev->parameter);
1271  *adaptive_on = false;
1272  *pretrained_on = true;
1273  if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
1274  int font_id;
1275  shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id,
1276  &font_id);
1277  tprintf("Shape %d, first unichar=%d, font=%d\n",
1278  *shape_id, unichar_id, font_id);
1279  return unichar_id;
1280  }
1281  tprintf("Shape index '%s' not found in shape table\n", ev->parameter);
1282  } else {
1283  tprintf("No shape table loaded!\n");
1284  }
1285  } else {
1287  unichar_id = unicharset.unichar_to_id(ev->parameter);
1288  if (ev->command_id == IDA_ADAPTIVE) {
1289  *adaptive_on = true;
1290  *pretrained_on = false;
1291  *shape_id = -1;
1292  } else if (ev->command_id == IDA_STATIC) {
1293  *adaptive_on = false;
1294  *pretrained_on = true;
1295  } else {
1296  *adaptive_on = true;
1297  *pretrained_on = true;
1298  }
1299  if (ev->command_id == IDA_ADAPTIVE || shape_table_ == nullptr) {
1300  *shape_id = -1;
1301  return unichar_id;
1302  }
1303  for (int s = 0; s < shape_table_->NumShapes(); ++s) {
1304  if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
1305  tprintf("%s\n", shape_table_->DebugStr(s).c_str());
1306  }
1307  }
1308  } else {
1309  tprintf("Char class '%s' not found in unicharset",
1310  ev->parameter);
1311  }
1312  }
1313  }
1314  delete ev;
1315  } while (ev_type != SVET_CLICK);
1316  return 0;

◆ getDict()

virtual Dict& tesseract::Classify::getDict ( )
inlinevirtual

Reimplemented in tesseract::Tesseract.

Definition at line 107 of file classify.h.

107  {
108  return dict_;
109  }

◆ GetFontinfoId()

int tesseract::Classify::GetFontinfoId ( ADAPT_CLASS  Class,
uint8_t  ConfigId 
)

Definition at line 173 of file adaptive.cpp.

173  {
174  return (ConfigIsPermanent(Class, ConfigId) ?
175  PermConfigFor(Class, ConfigId)->FontinfoId :
176  TempConfigFor(Class, ConfigId)->FontinfoId);
177 }

◆ InitAdaptedClass()

void tesseract::Classify::InitAdaptedClass ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
ADAPT_CLASS  Class,
ADAPT_TEMPLATES  Templates 
)

This routine creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters
Blobblob to model new class after
ClassIdid of the class to be initialized
FontinfoIdfont information inferred from pre-trained templates
Classadapted class to be initialized
Templatesadapted templates to add new class to

Globals:

Definition at line 693 of file adaptmatch.cpp.

697  {
698  FEATURE_SET Features;
699  int Fid, Pid;
700  FEATURE Feature;
701  int NumFeatures;
702  TEMP_PROTO TempProto;
703  PROTO Proto;
704  INT_CLASS IClass;
706 
707  classify_norm_method.set_value(baseline);
708  Features = ExtractOutlineFeatures(Blob);
709  NumFeatures = Features->NumFeatures;
710  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
711  FreeFeatureSet(Features);
712  return;
713  }
714 
715  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
716  TempConfigFor(Class, 0) = Config;
717 
718  /* this is a kludge to construct cutoffs for adapted templates */
719  if (Templates == AdaptedTemplates)
720  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
721 
722  IClass = ClassForClassId (Templates->Templates, ClassId);
723 
724  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
725  Pid = AddIntProto (IClass);
726  assert (Pid != NO_PROTO);
727 
728  Feature = Features->Features[Fid];
729  TempProto = NewTempProto ();
730  Proto = &(TempProto->Proto);
731 
732  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
733  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
734  instead of the -0.25 to 0.75 used in baseline normalization */
735  Proto->Angle = Feature->Params[OutlineFeatDir];
736  Proto->X = Feature->Params[OutlineFeatX];
737  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
738  Proto->Length = Feature->Params[OutlineFeatLength];
739  FillABC(Proto);
740 
741  TempProto->ProtoId = Pid;
742  SET_BIT (Config->Protos, Pid);
743 
744  ConvertProto(Proto, Pid, IClass);
745  AddProtoToProtoPruner(Proto, Pid, IClass,
747 
748  Class->TempProtos = push (Class->TempProtos, TempProto);
749  }
750  FreeFeatureSet(Features);
751 
752  AddIntConfig(IClass);
753  ConvertConfig (AllProtosOn, 0, IClass);
754 
756  tprintf("Added new class '%s' with class id %d and %d protos.\n",
757  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
759  DisplayAdaptedChar(Blob, IClass);
760  }
761 
762  if (IsEmptyAdaptedClass(Class))
763  (Templates->NumNonEmptyClasses)++;
764 } /* InitAdaptedClass */

◆ InitAdaptiveClassifier()

void tesseract::Classify::InitAdaptiveClassifier ( TessdataManager mgr)

This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be loaded. Should only be set to true if the necessary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates

Definition at line 527 of file adaptmatch.cpp.

527  {
529  return;
530  if (AllProtosOn != nullptr)
531  EndAdaptiveClassifier(); // Don't leak with multiple inits.
532 
533  // If there is no language_data_path_prefix, the classifier will be
534  // adaptive only.
535  if (language_data_path_prefix.length() > 0 && mgr != nullptr) {
536  TFile fp;
537  ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp));
539 
540  if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) {
541  shape_table_ = new ShapeTable(unicharset);
542  if (!shape_table_->DeSerialize(&fp)) {
543  tprintf("Error loading shape table!\n");
544  delete shape_table_;
545  shape_table_ = nullptr;
546  }
547  }
548 
549  ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp));
550  ReadNewCutoffs(&fp, CharNormCutoffs);
551 
552  ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp));
553  NormProtos = ReadNormProtos(&fp);
554  static_classifier_ = new TessClassifier(false, this);
555  }
556 
557  InitIntegerFX();
558 
559  AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
560  AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
561  AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
562  TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
563  set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
564  set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
565  zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
566 
567  for (uint16_t& BaselineCutoff : BaselineCutoffs) {
568  BaselineCutoff = 0;
569  }
570 
572  TFile fp;
573  STRING Filename;
574 
575  Filename = imagefile;
576  Filename += ADAPT_TEMPLATE_SUFFIX;
577  if (!fp.Open(Filename.c_str(), nullptr)) {
579  } else {
580  cprintf("\nReading pre-adapted templates from %s ...\n",
581  Filename.c_str());
582  fflush(stdout);
584  cprintf("\n");
586 
587  for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
588  BaselineCutoffs[i] = CharNormCutoffs[i];
589  }
590  }
591  } else {
592  if (AdaptedTemplates != nullptr)
595  }
596 } /* InitAdaptiveClassifier */

◆ LargeSpeckle()

bool tesseract::Classify::LargeSpeckle ( const TBLOB blob)

Definition at line 224 of file classify.cpp.

224  {
225  double speckle_size = kBlnXHeight * speckle_large_max_size;
226  TBOX bbox = blob.bounding_box();
227  return bbox.width() < speckle_size && bbox.height() < speckle_size;
228 }

◆ LearnBlob()

void tesseract::Classify::LearnBlob ( const STRING fontname,
TBLOB Blob,
const DENORM cn_denorm,
const INT_FX_RESULT_STRUCT fx_info,
const char *  blob_text 
)

Definition at line 70 of file blobclass.cpp.

73  {
75  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
76  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
77  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
78  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
79 
80  if (ValidCharDescription(feature_defs_, CharDesc)) {
81  // Label the features with a class name and font name.
82  tr_file_data_ += "\n";
83  tr_file_data_ += fontname;
84  tr_file_data_ += " ";
85  tr_file_data_ += blob_text;
86  tr_file_data_ += "\n";
87 
88  // write micro-features to file and clean up
89  WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
90  } else {
91  tprintf("Blob learned was invalid!\n");
92  }
93  FreeCharDescription(CharDesc);
94 } // LearnBlob

◆ LearnPieces()

void tesseract::Classify::LearnPieces ( const char *  fontname,
int  start,
int  length,
float  threshold,
CharSegmentationType  segmentation,
const char *  correct_text,
WERD_RES word 
)

Definition at line 374 of file adaptmatch.cpp.

376  {
377  // TODO(daria) Remove/modify this if/when we want
378  // to train and/or adapt to n-grams.
379  if (segmentation != CST_WHOLE &&
380  (segmentation != CST_FRAGMENT || disable_character_fragments))
381  return;
382 
383  if (length > 1) {
384  SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
385  start + length - 1);
386  }
387  TBLOB* blob = word->chopped_word->blobs[start];
388  // Rotate the blob if needed for classification.
389  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded();
390  if (rotated_blob == nullptr)
391  rotated_blob = blob;
392 
393  #ifndef GRAPHICS_DISABLED
394  // Draw debug windows showing the blob that is being learned if needed.
395  if (strcmp(classify_learn_debug_str.c_str(), correct_text) == 0) {
396  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
397  word->chopped_word->bounding_box());
398  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
399  learn_debug_win_->Update();
400  window_wait(learn_debug_win_);
401  }
402  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
403  ASSERT_HOST(learn_fragments_debug_win_ != nullptr); // set up in LearnWord
404  blob->plot(learn_fragments_debug_win_,
406  learn_fragments_debug_win_->Update();
407  }
408  #endif // GRAPHICS_DISABLED
409 
410  if (fontname != nullptr) {
411  classify_norm_method.set_value(character); // force char norm spc 30/11/93
412  tess_bn_matching.set_value(false); // turn it off
413  tess_cn_matching.set_value(false);
414  DENORM bl_denorm, cn_denorm;
415  INT_FX_RESULT_STRUCT fx_info;
417  &bl_denorm, &cn_denorm, &fx_info);
418  LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
419  } else if (unicharset.contains_unichar(correct_text)) {
420  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
421  int font_id = word->fontinfo != nullptr
422  ? fontinfo_table_.get_id(*word->fontinfo)
423  : 0;
425  tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
426  unicharset.id_to_unichar(class_id), threshold, font_id);
427  // If filename is not nullptr we are doing recognition
428  // (as opposed to training), so we must have already set word fonts.
429  AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
430  if (BackupAdaptedTemplates != nullptr) {
431  // Adapt the backup templates too. They will be used if the primary gets
432  // too full.
433  AdaptToChar(rotated_blob, class_id, font_id, threshold,
435  }
436  } else if (classify_debug_level >= 1) {
437  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
438  }
439  if (rotated_blob != blob) {
440  delete rotated_blob;
441  }
442 
443  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
444  start + length - 1);
445 } // LearnPieces.

◆ LearnWord()

void tesseract::Classify::LearnWord ( const char *  fontname,
WERD_RES word 
)

Definition at line 250 of file adaptmatch.cpp.

250  {
251  int word_len = word->correct_text.size();
252  if (word_len == 0) return;
253 
254  float* thresholds = nullptr;
255  if (fontname == nullptr) {
256  // Adaption mode.
257  if (!EnableLearning || word->best_choice == nullptr)
258  return; // Can't or won't adapt.
259 
261  tprintf("\n\nAdapting to word = %s\n",
262  word->best_choice->debug_string().c_str());
263  thresholds = new float[word_len];
267  matcher_rating_margin, thresholds);
268  }
269  int start_blob = 0;
270 
271  #ifndef GRAPHICS_DISABLED
273  if (learn_fragmented_word_debug_win_ != nullptr) {
274  window_wait(learn_fragmented_word_debug_win_);
275  }
276  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
277  word->chopped_word->bounding_box());
278  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
279  word->chopped_word->bounding_box());
280  word->chopped_word->plot(learn_fragmented_word_debug_win_);
282  }
283  #endif // GRAPHICS_DISABLED
284 
285  for (int ch = 0; ch < word_len; ++ch) {
287  tprintf("\nLearning %s\n", word->correct_text[ch].c_str());
288  }
289  if (word->correct_text[ch].length() > 0) {
290  float threshold = thresholds != nullptr ? thresholds[ch] : 0.0f;
291 
292  LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
293  CST_WHOLE, word->correct_text[ch].c_str(), word);
294 
295  if (word->best_state[ch] > 1 && !disable_character_fragments) {
296  // Check that the character breaks into meaningful fragments
297  // that each match a whole character with at least
298  // classify_character_fragments_garbage_certainty_threshold
299  bool garbage = false;
300  int frag;
301  for (frag = 0; frag < word->best_state[ch]; ++frag) {
302  TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag];
304  garbage |= LooksLikeGarbage(frag_blob);
305  }
306  }
307  // Learn the fragments.
308  if (!garbage) {
309  bool pieces_all_natural = word->PiecesAllNatural(start_blob,
310  word->best_state[ch]);
311  if (pieces_all_natural || !prioritize_division) {
312  for (frag = 0; frag < word->best_state[ch]; ++frag) {
313  GenericVector<STRING> tokens;
314  word->correct_text[ch].split(' ', &tokens);
315 
316  tokens[0] = CHAR_FRAGMENT::to_string(
317  tokens[0].c_str(), frag, word->best_state[ch],
318  pieces_all_natural);
319 
320  STRING full_string;
321  for (int i = 0; i < tokens.size(); i++) {
322  full_string += tokens[i];
323  if (i != tokens.size() - 1)
324  full_string += ' ';
325  }
326  LearnPieces(fontname, start_blob + frag, 1, threshold,
327  CST_FRAGMENT, full_string.c_str(), word);
328  }
329  }
330  }
331  }
332 
333  // TODO(rays): re-enable this part of the code when we switch to the
334  // new classifier that needs to see examples of garbage.
335  /*
336  if (word->best_state[ch] > 1) {
337  // If the next blob is good, make junk with the rightmost fragment.
338  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
339  LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
340  word->best_state[ch + 1] + 1,
341  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
342  }
343  // If the previous blob is good, make junk with the leftmost fragment.
344  if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
345  LearnPieces(fontname, start_blob - word->best_state[ch - 1],
346  word->best_state[ch - 1] + 1,
347  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
348  }
349  }
350  // If the next blob is good, make a join with it.
351  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
352  STRING joined_text = word->correct_text[ch];
353  joined_text += word->correct_text[ch + 1];
354  LearnPieces(fontname, start_blob,
355  word->best_state[ch] + word->best_state[ch + 1],
356  threshold, CST_NGRAM, joined_text.c_str(), word);
357  }
358  */
359  }
360  start_blob += word->best_state[ch];
361  }
362  delete [] thresholds;
363 } // LearnWord.

◆ LooksLikeGarbage()

bool tesseract::Classify::LooksLikeGarbage ( TBLOB blob)

Definition at line 1633 of file adaptmatch.cpp.

1633  {
1634  auto *ratings = new BLOB_CHOICE_LIST();
1635  AdaptiveClassifier(blob, ratings);
1636  BLOB_CHOICE_IT ratings_it(ratings);
1639  print_ratings_list("======================\nLooksLikeGarbage() got ",
1640  ratings, unicharset);
1641  }
1642  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1643  ratings_it.forward()) {
1644  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != nullptr) {
1645  continue;
1646  }
1647  float certainty = ratings_it.data()->certainty();
1648  delete ratings;
1649  return certainty <
1651  }
1652  delete ratings;
1653  return true; // no whole characters in ratings
1654 }

◆ MakeNewTemporaryConfig()

int tesseract::Classify::MakeNewTemporaryConfig ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  FontinfoId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)
Parameters
Templatesadapted templates to add new config to
ClassIdclass id to associate with new config
FontinfoIdfont information inferred from pre-trained templates
NumFeaturesnumber of features in IntFeatures
Featuresfeatures describing model for new config
FloatFeaturesfloating-pt representation of features
Returns
The id of the new config created, a negative integer in case of error.

Definition at line 1740 of file adaptmatch.cpp.

1745  {
1746  INT_CLASS IClass;
1747  ADAPT_CLASS Class;
1748  PROTO_ID OldProtos[MAX_NUM_PROTOS];
1749  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
1750  int NumOldProtos;
1751  int NumBadFeatures;
1752  int MaxProtoId, OldMaxProtoId;
1753  int MaskSize;
1754  int ConfigId;
1756  int i;
1757  int debug_level = NO_DEBUG;
1758 
1760  debug_level =
1762 
1763  IClass = ClassForClassId(Templates->Templates, ClassId);
1764  Class = Templates->Class[ClassId];
1765 
1766  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
1767  ++NumAdaptationsFailed;
1769  cprintf("Cannot make new temporary config: maximum number exceeded.\n");
1770  return -1;
1771  }
1772 
1773  OldMaxProtoId = IClass->NumProtos - 1;
1774 
1775  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
1776  NumFeatures, Features,
1777  OldProtos, classify_adapt_proto_threshold,
1778  debug_level);
1779 
1780  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
1781  zero_all_bits(TempProtoMask, MaskSize);
1782  for (i = 0; i < NumOldProtos; i++)
1783  SET_BIT(TempProtoMask, OldProtos[i]);
1784 
1785  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
1786  NumFeatures, Features,
1787  BadFeatures,
1789  debug_level);
1790 
1791  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
1792  IClass, Class, TempProtoMask);
1793  if (MaxProtoId == NO_PROTO) {
1794  ++NumAdaptationsFailed;
1796  cprintf("Cannot make new temp protos: maximum number exceeded.\n");
1797  return -1;
1798  }
1799 
1800  ConfigId = AddIntConfig(IClass);
1801  ConvertConfig(TempProtoMask, ConfigId, IClass);
1802  Config = NewTempConfig(MaxProtoId, FontinfoId);
1803  TempConfigFor(Class, ConfigId) = Config;
1804  copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize);
1805 
1807  cprintf("Making new temp config %d fontinfo id %d"
1808  " using %d old and %d new protos.\n",
1809  ConfigId, Config->FontinfoId,
1810  NumOldProtos, MaxProtoId - OldMaxProtoId);
1811 
1812  return ConfigId;
1813 } /* MakeNewTemporaryConfig */

◆ MakeNewTempProtos()

PROTO_ID tesseract::Classify::MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)

This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Parameters
Featuresfloating-pt features describing new character
NumBadFeatnumber of bad features to turn into protos
BadFeatfeature id's of bad features
IClassinteger class templates to add new protos to
Classadapted class templates to add new protos to
TempProtoMaskproto mask to add new protos to

Globals: none

Returns
Max proto id in class after all protos have been added.

Definition at line 1834 of file adaptmatch.cpp.

1839  {
1840  FEATURE_ID *ProtoStart;
1841  FEATURE_ID *ProtoEnd;
1842  FEATURE_ID *LastBad;
1843  TEMP_PROTO TempProto;
1844  PROTO Proto;
1845  FEATURE F1, F2;
1846  float X1, X2, Y1, Y2;
1847  float A1, A2, AngleDelta;
1848  float SegmentLength;
1849  PROTO_ID Pid;
1850 
1851  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1852  ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1853  F1 = Features->Features[*ProtoStart];
1854  X1 = F1->Params[PicoFeatX];
1855  Y1 = F1->Params[PicoFeatY];
1856  A1 = F1->Params[PicoFeatDir];
1857 
1858  for (ProtoEnd = ProtoStart + 1,
1859  SegmentLength = GetPicoFeatureLength();
1860  ProtoEnd < LastBad;
1861  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
1862  F2 = Features->Features[*ProtoEnd];
1863  X2 = F2->Params[PicoFeatX];
1864  Y2 = F2->Params[PicoFeatY];
1865  A2 = F2->Params[PicoFeatDir];
1866 
1867  AngleDelta = fabs(A1 - A2);
1868  if (AngleDelta > 0.5)
1869  AngleDelta = 1.0 - AngleDelta;
1870 
1871  if (AngleDelta > matcher_clustering_max_angle_delta ||
1872  fabs(X1 - X2) > SegmentLength ||
1873  fabs(Y1 - Y2) > SegmentLength)
1874  break;
1875  }
1876 
1877  F2 = Features->Features[*(ProtoEnd - 1)];
1878  X2 = F2->Params[PicoFeatX];
1879  Y2 = F2->Params[PicoFeatY];
1880  A2 = F2->Params[PicoFeatDir];
1881 
1882  Pid = AddIntProto(IClass);
1883  if (Pid == NO_PROTO)
1884  return (NO_PROTO);
1885 
1886  TempProto = NewTempProto();
1887  Proto = &(TempProto->Proto);
1888 
1889  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
1890  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
1891  instead of the -0.25 to 0.75 used in baseline normalization */
1892  Proto->Length = SegmentLength;
1893  Proto->Angle = A1;
1894  Proto->X = (X1 + X2) / 2.0;
1895  Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
1896  FillABC(Proto);
1897 
1898  TempProto->ProtoId = Pid;
1899  SET_BIT(TempProtoMask, Pid);
1900 
1901  ConvertProto(Proto, Pid, IClass);
1902  AddProtoToProtoPruner(Proto, Pid, IClass,
1904 
1905  Class->TempProtos = push(Class->TempProtos, TempProto);
1906  }
1907  return IClass->NumProtos - 1;
1908 } /* MakeNewTempProtos */

◆ MakePermanent()

void tesseract::Classify::MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob 
)
Parameters
Templatescurrent set of adaptive templates
ClassIdclass containing config to be made permanent
ConfigIdconfig to be made permanent
Blobcurrent blob being adapted to

Globals: none

Definition at line 1920 of file adaptmatch.cpp.

1923  {
1924  UNICHAR_ID *Ambigs;
1926  ADAPT_CLASS Class;
1927  PROTO_KEY ProtoKey;
1928 
1929  Class = Templates->Class[ClassId];
1930  Config = TempConfigFor(Class, ConfigId);
1931 
1932  MakeConfigPermanent(Class, ConfigId);
1933  if (Class->NumPermConfigs == 0)
1934  Templates->NumPermClasses++;
1935  Class->NumPermConfigs++;
1936 
1937  // Initialize permanent config.
1938  Ambigs = GetAmbiguities(Blob, ClassId);
1939  auto Perm = static_cast<PERM_CONFIG>(malloc(sizeof(PERM_CONFIG_STRUCT)));
1940  Perm->Ambigs = Ambigs;
1941  Perm->FontinfoId = Config->FontinfoId;
1942 
1943  // Free memory associated with temporary config (since ADAPTED_CONFIG
1944  // is a union we need to clean up before we record permanent config).
1945  ProtoKey.Templates = Templates;
1946  ProtoKey.ClassId = ClassId;
1947  ProtoKey.ConfigId = ConfigId;
1948  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
1950 
1951  // Record permanent config.
1952  PermConfigFor(Class, ConfigId) = Perm;
1953 
1954  if (classify_learning_debug_level >= 1) {
1955  tprintf("Making config %d for %s (ClassId %d) permanent:"
1956  " fontinfo id %d, ambiguities '",
1957  ConfigId, getDict().getUnicharset().debug_str(ClassId).c_str(),
1958  ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
1959  for (UNICHAR_ID *AmbigsPointer = Ambigs;
1960  *AmbigsPointer >= 0; ++AmbigsPointer)
1961  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
1962  tprintf("'.\n");
1963  }
1964 } /* MakePermanent */

◆ MasterMatcher()

void tesseract::Classify::MasterMatcher ( INT_TEMPLATES  templates,
int16_t  num_features,
const INT_FEATURE_STRUCT features,
const uint8_t *  norm_factors,
ADAPT_CLASS classes,
int  debug,
int  matcher_multiplier,
const TBOX blob_box,
const GenericVector< CP_RESULT_STRUCT > &  results,
ADAPT_RESULTS final_results 
)

Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.

Definition at line 1088 of file adaptmatch.cpp.

1097  {
1098  int top = blob_box.top();
1099  int bottom = blob_box.bottom();
1100  UnicharRating int_result;
1101  for (int c = 0; c < results.size(); c++) {
1102  CLASS_ID class_id = results[c].Class;
1103  BIT_VECTOR protos = classes != nullptr ? classes[class_id]->PermProtos
1104  : AllProtosOn;
1105  BIT_VECTOR configs = classes != nullptr ? classes[class_id]->PermConfigs
1106  : AllConfigsOn;
1107 
1108  int_result.unichar_id = class_id;
1109  im_.Match(ClassForClassId(templates, class_id),
1110  protos, configs,
1111  num_features, features,
1112  &int_result, classify_adapt_feature_threshold, debug,
1114  bool is_debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1115  ExpandShapesAndApplyCorrections(classes, is_debug, class_id, bottom, top,
1116  results[c].Rating,
1117  final_results->BlobLength,
1118  matcher_multiplier, norm_factors,
1119  &int_result, final_results);
1120  }
1121 }

◆ NewAdaptedTemplates()

ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates ( bool  InitFromUnicharset)

Allocates memory for adapted templates. each char in unicharset to the newly created templates

Parameters
InitFromUnicharsetif true, add an empty class for
Returns
Ptr to new adapted templates.
Note
Globals: none

Definition at line 151 of file adaptive.cpp.

151  {
152  ADAPT_TEMPLATES Templates;
153 
154  Templates = static_cast<ADAPT_TEMPLATES>(Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)));
155 
156  Templates->Templates = NewIntTemplates ();
157  Templates->NumPermClasses = 0;
158  Templates->NumNonEmptyClasses = 0;
159 
160  /* Insert an empty class for each unichar id in unicharset */
161  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
162  Templates->Class[i] = nullptr;
163  if (InitFromUnicharset && i < unicharset.size()) {
164  AddAdaptedClass(Templates, NewAdaptedClass(), i);
165  }
166  }
167 
168  return (Templates);
169 
170 } /* NewAdaptedTemplates */

◆ NormalizeOutlines()

void tesseract::Classify::NormalizeOutlines ( LIST  Outlines,
float *  XScale,
float *  YScale 
)

This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system. Outlines are changed and XScale and YScale are updated.

Globals:

  • classify_norm_method method being used for normalization
  • classify_char_norm_range map radius of gyration to this value
    Parameters
    Outlineslist of outlines to be normalized
    XScalex-direction scale factor used by routine
    YScaley-direction scale factor used by routine

Definition at line 275 of file mfoutline.cpp.

278  {
279  MFOUTLINE Outline;
280 
281  switch (classify_norm_method) {
282  case character:
283  ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
284  break;
285 
286  case baseline:
287  iterate(Outlines) {
288  Outline = static_cast<MFOUTLINE>first_node(Outlines);
289  NormalizeOutline(Outline, 0.0);
290  }
291  *XScale = *YScale = MF_SCALE_FACTOR;
292  break;
293  }

◆ PrintAdaptedTemplates()

void tesseract::Classify::PrintAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine prints a summary of the adapted templates in Templates to File.

Parameters
Fileopen text file to print Templates to
Templatesadapted templates to print to File
Note
Globals: none

Definition at line 244 of file adaptive.cpp.

244  {
245  INT_CLASS IClass;
246  ADAPT_CLASS AClass;
247 
248  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
249  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
250  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
251  fprintf (File, " Id NC NPC NP NPP\n");
252  fprintf (File, "------------------------\n");
253 
254  for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
255  IClass = Templates->Templates->Class[i];
256  AClass = Templates->Class[i];
257  if (!IsEmptyAdaptedClass (AClass)) {
258  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
260  IClass->NumConfigs, AClass->NumPermConfigs,
261  IClass->NumProtos,
262  IClass->NumProtos - count (AClass->TempProtos));
263  }
264  }
265  fprintf (File, "\n");
266 
267 } /* PrintAdaptedTemplates */

◆ PrintAdaptiveMatchResults()

void tesseract::Classify::PrintAdaptiveMatchResults ( const ADAPT_RESULTS results)

This routine writes the matches in Results to File.

Parameters
resultsmatch results to write to File

Globals: none

Definition at line 2013 of file adaptmatch.cpp.

2013  {
2014  for (int i = 0; i < results.match.size(); ++i) {
2015  tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).c_str());
2016  results.match[i].Print();
2017  }
2018 } /* PrintAdaptiveMatchResults */

◆ PruneClasses()

int tesseract::Classify::PruneClasses ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
int  keep_this,
const INT_FEATURE_STRUCT features,
const uint8_t *  normalization_factors,
const uint16_t *  expected_num_features,
GenericVector< CP_RESULT_STRUCT > *  results 
)

Runs the class pruner from int_templates on the given features, returning the number of classes output in results.

Parameters
int_templatesClass pruner tables
num_featuresNumber of features in blob
featuresArray of features
normalization_factorsArray of fudge factors from blob normalization process (by CLASS_INDEX)
expected_num_featuresArray of expected number of features for each class (by CLASS_INDEX)
resultsSorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses.
keep_this

Definition at line 451 of file intmatcher.cpp.

457  {
458  ClassPruner pruner(int_templates->NumClasses);
459  // Compute initial match scores for all classes.
460  pruner.ComputeScores(int_templates, num_features, features);
461  // Adjust match scores for number of expected features.
462  pruner.AdjustForExpectedNumFeatures(expected_num_features,
464  // Apply disabled classes in unicharset - only works without a shape_table.
465  if (shape_table_ == nullptr)
466  pruner.DisableDisabledClasses(unicharset);
467  // If fragments are disabled, remove them, also only without a shape table.
468  if (disable_character_fragments && shape_table_ == nullptr)
469  pruner.DisableFragments(unicharset);
470 
471  // If we have good x-heights, apply the given normalization factors.
472  if (normalization_factors != nullptr) {
473  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
474  normalization_factors);
475  } else {
476  pruner.NoNormalization();
477  }
478  // Do the actual pruning and sort the short-list.
479  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
480  shape_table_ == nullptr, unicharset);
481 
482  if (classify_debug_level > 2) {
483  pruner.DebugMatch(*this, int_templates, features);
484  }
485  if (classify_debug_level > 1) {
486  pruner.SummarizeResult(*this, int_templates, expected_num_features,
488  normalization_factors);
489  }
490  // Convert to the expected output format.
491  return pruner.SetupResults(results);

◆ ReadAdaptedTemplates()

ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates ( TFile fp)

Read a set of adapted templates from file and return a ptr to the templates.

Parameters
fpopen text file to read adapted templates from
Returns
Ptr to adapted templates read from file.
Note
Globals: none

Definition at line 332 of file adaptive.cpp.

332  {
333  ADAPT_TEMPLATES Templates;
334 
335  /* first read the high level adaptive template struct */
336  Templates = static_cast<ADAPT_TEMPLATES>(Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)));
337  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
338 
339  /* then read in the basic integer templates */
340  Templates->Templates = ReadIntTemplates(fp);
341 
342  /* then read in the adaptive info for each class */
343  for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
344  Templates->Class[i] = ReadAdaptedClass(fp);
345  }
346  return (Templates);
347 
348 } /* ReadAdaptedTemplates */

◆ ReadIntTemplates()

INT_TEMPLATES tesseract::Classify::ReadIntTemplates ( TFile fp)

This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format.

Parameters
fpopen file to read templates from
Returns
Pointer to integer templates read from File.
Note
Globals: none

Definition at line 717 of file intproto.cpp.

718  {
719  int i, j, w, x, y, z;
720  int unicharset_size;
721  int version_id = 0;
722  INT_TEMPLATES Templates;
723  CLASS_PRUNER_STRUCT* Pruner;
724  INT_CLASS Class;
725  uint8_t *Lengths;
726  PROTO_SET ProtoSet;
727 
728  /* variables for conversion from older inttemp formats */
729  int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
730  CLASS_ID class_id, max_class_id;
731  auto *IndexFor = new int16_t[MAX_NUM_CLASSES];
732  auto *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES];
733  auto **TempClassPruner =
735  uint32_t SetBitsForMask = // word with NUM_BITS_PER_CLASS
736  (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0
737  uint32_t Mask, NewMask, ClassBits;
738  int MaxNumConfigs = MAX_NUM_CONFIGS;
739  int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
740 
741  /* first read the high level template struct */
742  Templates = NewIntTemplates();
743  // Read Templates in parts for 64 bit compatibility.
744  if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1)
745  tprintf("Bad read of inttemp!\n");
746  if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
747  1) != 1 ||
748  fp->FReadEndian(&Templates->NumClassPruners,
749  sizeof(Templates->NumClassPruners), 1) != 1)
750  tprintf("Bad read of inttemp!\n");
751  if (Templates->NumClasses < 0) {
752  // This file has a version id!
753  version_id = -Templates->NumClasses;
754  if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
755  1) != 1)
756  tprintf("Bad read of inttemp!\n");
757  }
758 
759  if (version_id < 3) {
760  MaxNumConfigs = OLD_MAX_NUM_CONFIGS;
761  WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC;
762  }
763 
764  if (version_id < 2) {
765  if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size) !=
766  unicharset_size) {
767  tprintf("Bad read of inttemp!\n");
768  }
769  if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]),
770  Templates->NumClasses) != Templates->NumClasses) {
771  tprintf("Bad read of inttemp!\n");
772  }
773  }
774 
775  /* then read in the class pruners */
776  const int kNumBuckets =
778  for (i = 0; i < Templates->NumClassPruners; i++) {
779  Pruner = new CLASS_PRUNER_STRUCT;
780  if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) !=
781  kNumBuckets) {
782  tprintf("Bad read of inttemp!\n");
783  }
784  if (version_id < 2) {
785  TempClassPruner[i] = Pruner;
786  } else {
787  Templates->ClassPruners[i] = Pruner;
788  }
789  }
790 
791  /* fix class pruners if they came from an old version of inttemp */
792  if (version_id < 2) {
793  // Allocate enough class pruners to cover all the class ids.
794  max_class_id = 0;
795  for (i = 0; i < Templates->NumClasses; i++)
796  if (ClassIdFor[i] > max_class_id)
797  max_class_id = ClassIdFor[i];
798  for (i = 0; i <= CPrunerIdFor(max_class_id); i++) {
799  Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
800  memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
801  }
802  // Convert class pruners from the old format (indexed by class index)
803  // to the new format (indexed by class id).
804  last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
805  for (i = 0; i < Templates->NumClassPruners; i++) {
806  for (x = 0; x < NUM_CP_BUCKETS; x++)
807  for (y = 0; y < NUM_CP_BUCKETS; y++)
808  for (z = 0; z < NUM_CP_BUCKETS; z++)
809  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
810  if (TempClassPruner[i]->p[x][y][z][w] == 0)
811  continue;
812  for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) {
813  bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b;
814  if (bit_number > last_cp_bit_number)
815  break; // the rest of the bits in this word are not used
816  class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS];
817  // Single out NUM_BITS_PER_CLASS bits relating to class_id.
818  Mask = SetBitsForMask << b;
819  ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
820  // Move these bits to the new position in which they should
821  // appear (indexed corresponding to the class_id).
822  new_i = CPrunerIdFor(class_id);
823  new_w = CPrunerWordIndexFor(class_id);
824  new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS;
825  if (new_b > b) {
826  ClassBits <<= (new_b - b);
827  } else {
828  ClassBits >>= (b - new_b);
829  }
830  // Copy bits relating to class_id to the correct position
831  // in Templates->ClassPruner.
832  NewMask = SetBitsForMask << new_b;
833  Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask;
834  Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits;
835  }
836  }
837  }
838  for (i = 0; i < Templates->NumClassPruners; i++) {
839  delete TempClassPruner[i];
840  }
841  }
842 
843  /* then read in each class */
844  for (i = 0; i < Templates->NumClasses; i++) {
845  /* first read in the high level struct for the class */
846  Class = static_cast<INT_CLASS>(Emalloc (sizeof (INT_CLASS_STRUCT)));
847  if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 ||
848  fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 ||
849  fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1)
850  tprintf("Bad read of inttemp!\n");
851  if (version_id == 0) {
852  // Only version 0 writes 5 pointless pointers to the file.
853  for (j = 0; j < 5; ++j) {
854  int32_t junk;
855  if (fp->FRead(&junk, sizeof(junk), 1) != 1)
856  tprintf("Bad read of inttemp!\n");
857  }
858  }
859  int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
860  ASSERT_HOST(num_configs <= MaxNumConfigs);
861  if (fp->FReadEndian(Class->ConfigLengths, sizeof(uint16_t), num_configs) !=
862  num_configs) {
863  tprintf("Bad read of inttemp!\n");
864  }
865  if (version_id < 2) {
866  ClassForClassId (Templates, ClassIdFor[i]) = Class;
867  } else {
868  ClassForClassId (Templates, i) = Class;
869  }
870 
871  /* then read in the proto lengths */
872  Lengths = nullptr;
873  if (MaxNumIntProtosIn (Class) > 0) {
874  Lengths = static_cast<uint8_t *>(Emalloc(sizeof(uint8_t) * MaxNumIntProtosIn(Class)));
875  if (fp->FRead(Lengths, sizeof(uint8_t), MaxNumIntProtosIn(Class)) !=
876  MaxNumIntProtosIn(Class))
877  tprintf("Bad read of inttemp!\n");
878  }
879  Class->ProtoLengths = Lengths;
880 
881  /* then read in the proto sets */
882  for (j = 0; j < Class->NumProtoSets; j++) {
883  ProtoSet = static_cast<PROTO_SET>(Emalloc(sizeof(PROTO_SET_STRUCT)));
884  int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
885  if (fp->FReadEndian(&ProtoSet->ProtoPruner,
886  sizeof(ProtoSet->ProtoPruner[0][0][0]),
887  num_buckets) != num_buckets)
888  tprintf("Bad read of inttemp!\n");
889  for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
890  if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A),
891  1) != 1 ||
892  fp->FRead(&ProtoSet->Protos[x].B, sizeof(ProtoSet->Protos[x].B),
893  1) != 1 ||
894  fp->FRead(&ProtoSet->Protos[x].C, sizeof(ProtoSet->Protos[x].C),
895  1) != 1 ||
896  fp->FRead(&ProtoSet->Protos[x].Angle,
897  sizeof(ProtoSet->Protos[x].Angle), 1) != 1)
898  tprintf("Bad read of inttemp!\n");
899  if (fp->FReadEndian(&ProtoSet->Protos[x].Configs,
900  sizeof(ProtoSet->Protos[x].Configs[0]),
901  WerdsPerConfigVec) != WerdsPerConfigVec)
902  cprintf("Bad read of inttemp!\n");
903  }
904  Class->ProtoSets[j] = ProtoSet;
905  }
906  if (version_id < 4) {
907  Class->font_set_id = -1;
908  } else {
909  fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1);
910  }
911  }
912 
913  if (version_id < 2) {
914  /* add an empty nullptr class with class id 0 */
915  assert(UnusedClassIdIn (Templates, 0));
916  ClassForClassId (Templates, 0) = NewIntClass (1, 1);
917  ClassForClassId (Templates, 0)->font_set_id = -1;
918  Templates->NumClasses++;
919  /* make sure the classes are contiguous */
920  for (i = 0; i < MAX_NUM_CLASSES; i++) {
921  if (i < Templates->NumClasses) {
922  if (ClassForClassId (Templates, i) == nullptr) {
923  fprintf(stderr, "Non-contiguous class ids in inttemp\n");
924  exit(1);
925  }
926  } else {
927  if (ClassForClassId (Templates, i) != nullptr) {
928  fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n",
929  i, Templates->NumClasses);
930  exit(1);
931  }
932  }
933  }
934  }
935  if (version_id >= 4) {
936  using namespace std::placeholders; // for _1, _2
937  this->fontinfo_table_.read(fp, std::bind(read_info, _1, _2));
938  if (version_id >= 5) {
939  this->fontinfo_table_.read(fp,
940  std::bind(read_spacing_info, _1, _2));
941  }
942  this->fontset_table_.read(fp, std::bind(read_set, _1, _2));
943  }
944 
945  // Clean up.
946  delete[] IndexFor;
947  delete[] ClassIdFor;
948  delete[] TempClassPruner;
949 
950  return (Templates);

◆ ReadNewCutoffs()

void tesseract::Classify::ReadNewCutoffs ( TFile fp,
uint16_t *  Cutoffs 
)

Open file, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value.

Parameters
fpfile containing cutoff definitions
Cutoffsarray to put cutoffs into

Definition at line 40 of file cutoffs.cpp.

41  {
42  int Cutoff;
43 
44  if (shape_table_ != nullptr) {
45  if (!shapetable_cutoffs_.DeSerialize(fp)) {
46  tprintf("Error during read of shapetable pffmtable!\n");
47  }
48  }
49  for (int i = 0; i < MAX_NUM_CLASSES; i++)
50  Cutoffs[i] = MAX_CUTOFF;
51 
52  const int kMaxLineSize = 100;
53  char line[kMaxLineSize];
54  while (fp->FGets(line, kMaxLineSize) != nullptr) {
55  std::string Class;
56  CLASS_ID ClassId;
57  std::istringstream stream(line);
58  stream >> Class >> Cutoff;
59  if (stream.fail()) {
60  break;
61  }
62  if (Class.compare("NULL") == 0) {
63  ClassId = unicharset.unichar_to_id(" ");
64  } else {
65  ClassId = unicharset.unichar_to_id(Class.c_str());
66  }
67  ASSERT_HOST(ClassId >= 0 && ClassId < MAX_NUM_CLASSES);
68  Cutoffs[ClassId] = Cutoff;
69  }

◆ ReadNormProtos()

NORM_PROTOS * tesseract::Classify::ReadNormProtos ( TFile fp)

This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File.

Parameters
fpopen text file to read normalization protos from Globals: none
Returns
Character normalization protos.

Definition at line 189 of file normmatch.cpp.

190  {
192  int i;
193  char unichar[2 * UNICHAR_LEN + 1];
194  UNICHAR_ID unichar_id;
195  LIST Protos;
196  int NumProtos;
197 
198  /* allocate and initialization data structure */
199  NormProtos = static_cast<NORM_PROTOS *>(Emalloc (sizeof (NORM_PROTOS)));
201  NormProtos->Protos = static_cast<LIST *>(Emalloc (NormProtos->NumProtos * sizeof(LIST)));
202  for (i = 0; i < NormProtos->NumProtos; i++)
203  NormProtos->Protos[i] = NIL_LIST;
204 
205  /* read file header and save in data structure */
208 
209  /* read protos for each class into a separate list */
210  const int kMaxLineSize = 100;
211  char line[kMaxLineSize];
212  while (fp->FGets(line, kMaxLineSize) != nullptr) {
213  std::istringstream stream(line);
214  stream >> unichar >> NumProtos;
215  if (stream.fail()) {
216  continue;
217  }
218  if (unicharset.contains_unichar(unichar)) {
219  unichar_id = unicharset.unichar_to_id(unichar);
220  Protos = NormProtos->Protos[unichar_id];
221  for (i = 0; i < NumProtos; i++)
222  Protos = push_last(Protos, ReadPrototype(fp, NormProtos->NumParams));
223  NormProtos->Protos[unichar_id] = Protos;
224  } else {
225  tprintf("Error: unichar %s in normproto file is not in unichar set.\n",
226  unichar);
227  for (i = 0; i < NumProtos; i++)
229  }
230  }
231  return (NormProtos);

◆ RefreshDebugWindow()

void tesseract::Classify::RefreshDebugWindow ( ScrollView **  win,
const char *  msg,
int  y_offset,
const TBOX wbox 
)

Definition at line 226 of file adaptmatch.cpp.

227  {
228  #ifndef GRAPHICS_DISABLED
229  const int kSampleSpaceWidth = 500;
230  if (*win == nullptr) {
231  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
232  kSampleSpaceWidth * 2, 200, true);
233  }
234  (*win)->Clear();
235  (*win)->Pen(64, 64, 64);
236  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
237  kSampleSpaceWidth, kBlnBaselineOffset);
238  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
239  kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
240  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
241  wbox.right(), wbox.bottom());
242  #endif // GRAPHICS_DISABLED
243 }

◆ RemoveBadMatches()

void tesseract::Classify::RemoveBadMatches ( ADAPT_RESULTS Results)

This routine steps through each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.

Parameters
Resultscontains matches to be filtered

Globals:

  • matcher_bad_match_pad defines a "bad match"

Definition at line 2033 of file adaptmatch.cpp.

2033  {
2034  int Next, NextGood;
2035  float BadMatchThreshold;
2036  static const char* romans = "i v x I V X";
2037  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
2038 
2040  UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2041  unicharset.unichar_to_id("1") : -1;
2042  UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2043  unicharset.unichar_to_id("0") : -1;
2044  float scored_one = ScoredUnichar(unichar_id_one, *Results);
2045  float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2046 
2047  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2048  const UnicharRating& match = Results->match[Next];
2049  if (match.rating >= BadMatchThreshold) {
2050  if (!unicharset.get_isalpha(match.unichar_id) ||
2051  strstr(romans,
2052  unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
2053  } else if (unicharset.eq(match.unichar_id, "l") &&
2054  scored_one < BadMatchThreshold) {
2055  Results->match[Next].unichar_id = unichar_id_one;
2056  } else if (unicharset.eq(match.unichar_id, "O") &&
2057  scored_zero < BadMatchThreshold) {
2058  Results->match[Next].unichar_id = unichar_id_zero;
2059  } else {
2060  Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
2061  }
2062  if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
2063  if (NextGood == Next) {
2064  ++NextGood;
2065  } else {
2066  Results->match[NextGood++] = Results->match[Next];
2067  }
2068  }
2069  }
2070  }
2071  } else {
2072  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2073  if (Results->match[Next].rating >= BadMatchThreshold) {
2074  if (NextGood == Next) {
2075  ++NextGood;
2076  } else {
2077  Results->match[NextGood++] = Results->match[Next];
2078  }
2079  }
2080  }
2081  }
2082  Results->match.truncate(NextGood);
2083 } /* RemoveBadMatches */

◆ RemoveExtraPuncs()

void tesseract::Classify::RemoveExtraPuncs ( ADAPT_RESULTS Results)

This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.

Parameters
Resultscontains matches to be filtered

Definition at line 2093 of file adaptmatch.cpp.

2093  {
2094  int Next, NextGood;
2095  int punc_count; /*no of garbage characters */
2096  int digit_count;
2097  /*garbage characters */
2098  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2099  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2100 
2101  punc_count = 0;
2102  digit_count = 0;
2103  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2104  const UnicharRating& match = Results->match[Next];
2105  bool keep = true;
2106  if (strstr(punc_chars,
2107  unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
2108  if (punc_count >= 2)
2109  keep = false;
2110  punc_count++;
2111  } else {
2112  if (strstr(digit_chars,
2113  unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
2114  if (digit_count >= 1)
2115  keep = false;
2116  digit_count++;
2117  }
2118  }
2119  if (keep) {
2120  if (NextGood == Next) {
2121  ++NextGood;
2122  } else {
2123  Results->match[NextGood++] = match;
2124  }
2125  }
2126  }
2127  Results->match.truncate(NextGood);
2128 } /* RemoveExtraPuncs */

◆ ResetAdaptiveClassifierInternal()

void tesseract::Classify::ResetAdaptiveClassifierInternal ( )

Definition at line 598 of file adaptmatch.cpp.

598  {
600  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
601  NumAdaptationsFailed);
602  }
605  if (BackupAdaptedTemplates != nullptr)
607  BackupAdaptedTemplates = nullptr;
608  NumAdaptationsFailed = 0;
609 }

◆ SetAdaptiveThreshold()

void tesseract::Classify::SetAdaptiveThreshold ( float  Threshold)

This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters
Thresholdthreshold for creating new templates

Globals:

  • matcher_good_threshold default good match rating

Definition at line 2141 of file adaptmatch.cpp.

2141  {
2142  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2144  ClipToRange<int>(255 * Threshold, 0, 255));
2146  ClipToRange<int>(255 * Threshold, 0, 255));
2147 } /* SetAdaptiveThreshold */

◆ SetStaticClassifier()

void tesseract::Classify::SetStaticClassifier ( ShapeClassifier static_classifier)

Definition at line 193 of file classify.cpp.

193  {
194  delete static_classifier_;
195  static_classifier_ = static_classifier;
196 }

◆ SettupPass1()

void tesseract::Classify::SettupPass1 ( )

This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).

Note
this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Globals:

Definition at line 652 of file adaptmatch.cpp.

652  {
654 
656 
657 } /* SettupPass1 */

◆ SettupPass2()

void tesseract::Classify::SettupPass2 ( )

This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.

Globals:

Definition at line 669 of file adaptmatch.cpp.

669  {
670  EnableLearning = false;
672 
673 } /* SettupPass2 */

◆ SetupBLCNDenorms()

void tesseract::Classify::SetupBLCNDenorms ( const TBLOB blob,
bool  nonlinear_norm,
DENORM bl_denorm,
DENORM cn_denorm,
INT_FX_RESULT_STRUCT fx_info 
)
static

Definition at line 127 of file intfx.cpp.

130  {
131  // Compute 1st and 2nd moments of the original outline.
132  FCOORD center, second_moments;
133  int length = blob.ComputeMoments(&center, &second_moments);
134  if (fx_info != nullptr) {
135  fx_info->Length = length;
136  fx_info->Rx = IntCastRounded(second_moments.y());
137  fx_info->Ry = IntCastRounded(second_moments.x());
138 
139  fx_info->Xmean = IntCastRounded(center.x());
140  fx_info->Ymean = IntCastRounded(center.y());
141  }
142  // Setup the denorm for Baseline normalization.
143  bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), 128.0f,
144  1.0f, 1.0f, 128.0f, 128.0f);
145  // Setup the denorm for character normalization.
146  if (nonlinear_norm) {
149  TBOX box;
150  blob.GetPreciseBoundingBox(&box);
151  box.pad(1, 1);
152  blob.GetEdgeCoords(box, &x_coords, &y_coords);
153  cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX,
154  0.0f, 0.0f, x_coords, y_coords);
155  } else {
156  cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(),
157  center.x(), center.y(),
158  51.2f / second_moments.x(),
159  51.2f / second_moments.y(),
160  128.0f, 128.0f);
161  }

◆ shape_table()

const ShapeTable* tesseract::Classify::shape_table ( ) const
inline

Definition at line 111 of file classify.h.

111  {
112  return shape_table_;
113  }

◆ ShapeIDToClassID()

int tesseract::Classify::ShapeIDToClassID ( int  shape_id) const

Definition at line 2220 of file adaptmatch.cpp.

2220  {
2221  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2222  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2223  ASSERT_HOST(font_set_id >= 0);
2224  const FontSet &fs = fontset_table_.get(font_set_id);
2225  for (int config = 0; config < fs.size; ++config) {
2226  if (fs.configs[config] == shape_id)
2227  return id;
2228  }
2229  }
2230  tprintf("Shape %d not found\n", shape_id);
2231  return -1;
2232 }

◆ ShowBestMatchFor()

void tesseract::Classify::ShowBestMatchFor ( int  shape_id,
const INT_FEATURE_STRUCT features,
int  num_features 
)

This routine displays debug information for the best config of the given shape_id for the given set of features.

Parameters
shape_idclassifier id to work with
featuresfeatures of the unknown character
num_featuresNumber of features in the features array.

Definition at line 2159 of file adaptmatch.cpp.

2161  {
2162 #ifndef GRAPHICS_DISABLED
2163  uint32_t config_mask;
2164  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2165  tprintf("No built-in templates for class/shape %d\n", shape_id);
2166  return;
2167  }
2168  if (num_features <= 0) {
2169  tprintf("Illegal blob (char norm features)!\n");
2170  return;
2171  }
2172  UnicharRating cn_result;
2173  classify_norm_method.set_value(character);
2176  num_features, features, &cn_result,
2179  tprintf("\n");
2180  config_mask = 1 << cn_result.config;
2181 
2182  tprintf("Static Shape ID: %d\n", shape_id);
2183  ShowMatchDisplay();
2185  &config_mask, num_features, features, &cn_result,
2189 #endif // GRAPHICS_DISABLED
2190 } /* ShowBestMatchFor */

◆ ShowMatchDisplay()

void tesseract::Classify::ShowMatchDisplay ( )

This routine sends the shapes in the global display lists to the match debugger window.

Globals:

  • FeatureShapes display list containing feature matches
  • ProtoShapes display list containing proto matches

Definition at line 962 of file intproto.cpp.

963  {
965  if (ProtoDisplayWindow) {
966  ProtoDisplayWindow->Clear();
967  }
968  if (FeatureDisplayWindow) {
969  FeatureDisplayWindow->Clear();
970  }
972  static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
973  IntMatchWindow);
974  IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y,
976  if (ProtoDisplayWindow) {
977  ProtoDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y,
979  }
980  if (FeatureDisplayWindow) {
981  FeatureDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y,
983  }

◆ StartBackupAdaptiveClassifier()

void tesseract::Classify::StartBackupAdaptiveClassifier ( )

Definition at line 629 of file adaptmatch.cpp.

◆ SwitchAdaptiveClassifier()

void tesseract::Classify::SwitchAdaptiveClassifier ( )

Definition at line 613 of file adaptmatch.cpp.

613  {
614  if (BackupAdaptedTemplates == nullptr) {
616  return;
617  }
619  tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
620  NumAdaptationsFailed);
621  }
624  BackupAdaptedTemplates = nullptr;
625  NumAdaptationsFailed = 0;
626 }

◆ TempConfigReliable()

bool tesseract::Classify::TempConfigReliable ( CLASS_ID  class_id,
const TEMP_CONFIG config 
)

Definition at line 2236 of file adaptmatch.cpp.

2237  {
2238  if (classify_learning_debug_level >= 1) {
2239  tprintf("NumTimesSeen for config of %s is %d\n",
2240  getDict().getUnicharset().debug_str(class_id).c_str(),
2241  config->NumTimesSeen);
2242  }
2244  return true;
2245  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2246  return false;
2247  } else if (use_ambigs_for_adaption) {
2248  // Go through the ambigs vector and see whether we have already seen
2249  // enough times all the characters represented by the ambigs vector.
2250  const UnicharIdVector *ambigs =
2252  int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size();
2253  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2254  ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2255  assert(ambig_class != nullptr);
2256  if (ambig_class->NumPermConfigs == 0 &&
2257  ambig_class->MaxNumTimesSeen <
2259  if (classify_learning_debug_level >= 1) {
2260  tprintf("Ambig %s has not been seen enough times,"
2261  " not making config for %s permanent\n",
2262  getDict().getUnicharset().debug_str(
2263  (*ambigs)[ambig]).c_str(),
2264  getDict().getUnicharset().debug_str(class_id).c_str());
2265  }
2266  return false;
2267  }
2268  }
2269  }
2270  return true;
2271 }

◆ UpdateAmbigsGroup()

void tesseract::Classify::UpdateAmbigsGroup ( CLASS_ID  class_id,
TBLOB Blob 
)

Definition at line 2273 of file adaptmatch.cpp.

2273  {
2274  const UnicharIdVector *ambigs =
2276  int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size();
2277  if (classify_learning_debug_level >= 1) {
2278  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2279  getDict().getUnicharset().debug_str(class_id).c_str(), class_id);
2280  }
2281  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2282  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2283  const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2284  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2285  if (ConfigIsPermanent(ambigs_class, cfg)) continue;
2286  const TEMP_CONFIG config =
2287  TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2288  if (config != nullptr && TempConfigReliable(ambig_class_id, config)) {
2289  if (classify_learning_debug_level >= 1) {
2290  tprintf("Making config %d of %s permanent\n", cfg,
2291  getDict().getUnicharset().debug_str(
2292  ambig_class_id).c_str());
2293  }
2294  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
2295  }
2296  }
2297  }
2298 }

◆ WriteAdaptedTemplates()

void tesseract::Classify::WriteAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine saves Templates to File in a binary format.

Parameters
Fileopen text file to write Templates to
Templatesset of adapted templates to write to File
Note
Globals: none

Definition at line 453 of file adaptive.cpp.

453  {
454  int i;
455 
456  /* first write the high level adaptive template struct */
457  fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
458 
459  /* then write out the basic integer templates */
460  WriteIntTemplates (File, Templates->Templates, unicharset);
461 
462  /* then write out the adaptive info for each class */
463  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
464  WriteAdaptedClass (File, Templates->Class[i],
465  Templates->Templates->Class[i]->NumConfigs);
466  }
467 } /* WriteAdaptedTemplates */

◆ WriteIntTemplates()

void tesseract::Classify::WriteIntTemplates ( FILE *  File,
INT_TEMPLATES  Templates,
const UNICHARSET target_unicharset 
)

This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing.

Parameters
Fileopen file to write templates to
Templatestemplates to save into File
target_unicharsetthe UNICHARSET to use

Definition at line 1017 of file intproto.cpp.

1019  {
1020  int i, j;
1021  INT_CLASS Class;
1022  int unicharset_size = target_unicharset.size();
1023  int version_id = -5; // When negated by the reader -1 becomes +1 etc.
1024 
1025  if (Templates->NumClasses != unicharset_size) {
1026  cprintf("Warning: executing WriteIntTemplates() with %d classes in"
1027  " Templates, while target_unicharset size is %d\n",
1028  Templates->NumClasses, unicharset_size);
1029  }
1030 
1031  /* first write the high level template struct */
1032  fwrite(&unicharset_size, sizeof(unicharset_size), 1, File);
1033  fwrite(&version_id, sizeof(version_id), 1, File);
1034  fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners),
1035  1, File);
1036  fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
1037 
1038  /* then write out the class pruners */
1039  for (i = 0; i < Templates->NumClassPruners; i++)
1040  fwrite(Templates->ClassPruners[i],
1041  sizeof(CLASS_PRUNER_STRUCT), 1, File);
1042 
1043  /* then write out each class */
1044  for (i = 0; i < Templates->NumClasses; i++) {
1045  Class = Templates->Class[i];
1046 
1047  /* first write out the high level struct for the class */
1048  fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File);
1049  fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
1050  ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size);
1051  fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
1052  for (j = 0; j < Class->NumConfigs; ++j) {
1053  fwrite(&Class->ConfigLengths[j], sizeof(uint16_t), 1, File);
1054  }
1055 
1056  /* then write out the proto lengths */
1057  if (MaxNumIntProtosIn (Class) > 0) {
1058  fwrite(Class->ProtoLengths, sizeof(uint8_t),
1059  MaxNumIntProtosIn(Class), File);
1060  }
1061 
1062  /* then write out the proto sets */
1063  for (j = 0; j < Class->NumProtoSets; j++)
1064  fwrite(Class->ProtoSets[j], sizeof(PROTO_SET_STRUCT), 1, File);
1065 
1066  /* then write the fonts info */
1067  fwrite(&Class->font_set_id, sizeof(int), 1, File);
1068  }
1069 
1070  /* Write the fonts info tables */
1071  using namespace std::placeholders; // for _1, _2
1072  this->fontinfo_table_.write(File, std::bind(write_info, _1, _2));
1073  this->fontinfo_table_.write(File,
1074  std::bind(write_spacing_info, _1, _2));
1075  this->fontset_table_.write(File, std::bind(write_set, _1, _2));

◆ WriteTRFile()

bool tesseract::Classify::WriteTRFile ( const STRING filename)

Definition at line 98 of file blobclass.cpp.

98  {
99  bool result = false;
100  STRING tr_filename = filename + ".tr";
101  FILE* fp = fopen(tr_filename.c_str(), "wb");
102  if (fp) {
103  result =
104  tesseract::Serialize(fp, &tr_file_data_[0], tr_file_data_.length());
105  fclose(fp);
106  }
107  tr_file_data_.truncate_at(0);
108  return result;
109 }

Member Data Documentation

◆ AdaptedTemplates

ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates = nullptr

Definition at line 515 of file classify.h.

◆ AllConfigsOff

BIT_VECTOR tesseract::Classify::AllConfigsOff = nullptr

Definition at line 524 of file classify.h.

◆ AllConfigsOn

BIT_VECTOR tesseract::Classify::AllConfigsOn = nullptr

Definition at line 523 of file classify.h.

◆ allow_blob_division

bool tesseract::Classify::allow_blob_division = true

"Use divisible blobs chopping"

Definition at line 423 of file classify.h.

◆ AllProtosOn

BIT_VECTOR tesseract::Classify::AllProtosOn = nullptr

Definition at line 522 of file classify.h.

◆ BackupAdaptedTemplates

ADAPT_TEMPLATES tesseract::Classify::BackupAdaptedTemplates = nullptr

Definition at line 519 of file classify.h.

◆ certainty_scale

double tesseract::Classify::certainty_scale = 20.0

"Certainty scaling factor"

Definition at line 473 of file classify.h.

◆ classify_adapt_feature_threshold

int tesseract::Classify::classify_adapt_feature_threshold = 230

"Threshold for good features during adaptive 0-255"

Definition at line 483 of file classify.h.

◆ classify_adapt_proto_threshold

int tesseract::Classify::classify_adapt_proto_threshold = 230

"Threshold for good protos during adaptive 0-255"

Definition at line 481 of file classify.h.

◆ classify_adapted_pruning_factor

double tesseract::Classify::classify_adapted_pruning_factor = 2.5

"Prune poor adapted results this much worse than best result"

Definition at line 477 of file classify.h.

◆ classify_adapted_pruning_threshold

double tesseract::Classify::classify_adapted_pruning_threshold = -1.0

"Threshold at which classify_adapted_pruning_factor starts"

Definition at line 479 of file classify.h.

◆ classify_bln_numeric_mode

bool tesseract::Classify::classify_bln_numeric_mode = 0

"Assume the input is numbers [0-9]."

Definition at line 508 of file classify.h.

◆ classify_char_norm_range

double tesseract::Classify::classify_char_norm_range = 0.2

"Character Normalization Range ..."

Definition at line 436 of file classify.h.

◆ classify_character_fragments_garbage_certainty_threshold

double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0

"Exclude fragments that do not match any whole character" " with at least this certainty"

Definition at line 489 of file classify.h.

◆ classify_class_pruner_multiplier

int tesseract::Classify::classify_class_pruner_multiplier = 15

"Class Pruner Multiplier 0-255: "

Definition at line 501 of file classify.h.

◆ classify_class_pruner_threshold

int tesseract::Classify::classify_class_pruner_threshold = 229

"Class Pruner Threshold 0-255"

Definition at line 499 of file classify.h.

◆ classify_cp_cutoff_strength

int tesseract::Classify::classify_cp_cutoff_strength = 7

"Class Pruner CutoffStrength: "

Definition at line 503 of file classify.h.

◆ classify_debug_character_fragments

bool tesseract::Classify::classify_debug_character_fragments = false

"Bring up graphical debugging windows for fragments training"

Definition at line 491 of file classify.h.

◆ classify_debug_level

int tesseract::Classify::classify_debug_level = 0

"Classify debug level"

Definition at line 430 of file classify.h.

◆ classify_enable_adaptive_debugger

bool tesseract::Classify::classify_enable_adaptive_debugger = 0

"Enable match debugger"

Definition at line 450 of file classify.h.

◆ classify_enable_adaptive_matcher

bool tesseract::Classify::classify_enable_adaptive_matcher = 1

"Enable adaptive classifier"

Definition at line 445 of file classify.h.

◆ classify_enable_learning

bool tesseract::Classify::classify_enable_learning = true

"Enable adaptive classifier"

Definition at line 429 of file classify.h.

◆ classify_integer_matcher_multiplier

int tesseract::Classify::classify_integer_matcher_multiplier = 10

"Integer Matcher Multiplier 0-255: "

Definition at line 505 of file classify.h.

◆ classify_learn_debug_str

char* tesseract::Classify::classify_learn_debug_str = ""

"Class str to debug learning"

Definition at line 495 of file classify.h.

◆ classify_learning_debug_level

int tesseract::Classify::classify_learning_debug_level = 0

"Learning Debug Level: "

Definition at line 455 of file classify.h.

◆ classify_max_certainty_margin

double tesseract::Classify::classify_max_certainty_margin = 5.5

"Veto difference between classifier certainties"

Definition at line 440 of file classify.h.

◆ classify_max_rating_ratio

double tesseract::Classify::classify_max_rating_ratio = 1.5

"Veto ratio between classifier ratings"

Definition at line 438 of file classify.h.

◆ classify_misfit_junk_penalty

double tesseract::Classify::classify_misfit_junk_penalty = 0.0

"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"

Definition at line 471 of file classify.h.

◆ classify_nonlinear_norm

bool tesseract::Classify::classify_nonlinear_norm = 0

"Non-linear stroke-density normalization"

Definition at line 452 of file classify.h.

◆ classify_norm_method

int tesseract::Classify::classify_norm_method = character

"Normalization Method ..."

Definition at line 434 of file classify.h.

◆ classify_save_adapted_templates

bool tesseract::Classify::classify_save_adapted_templates = 0

"Save adapted templates to a file"

Definition at line 449 of file classify.h.

◆ classify_use_pre_adapted_templates

bool tesseract::Classify::classify_use_pre_adapted_templates = 0

"Use pre-adapted classifier templates"

Definition at line 447 of file classify.h.

◆ disable_character_fragments

bool tesseract::Classify::disable_character_fragments = true

"Do not include character fragments in the" " results of the classifier"

Definition at line 486 of file classify.h.

◆ EnableLearning

bool tesseract::Classify::EnableLearning = true

Definition at line 577 of file classify.h.

◆ feature_defs_

FEATURE_DEFS_STRUCT tesseract::Classify::feature_defs_
protected

Definition at line 541 of file classify.h.

◆ fontinfo_table_

UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_

Definition at line 529 of file classify.h.

◆ fontset_table_

UnicityTable<FontSet> tesseract::Classify::fontset_table_

Definition at line 537 of file classify.h.

◆ im_

IntegerMatcher tesseract::Classify::im_
protected

Definition at line 540 of file classify.h.

◆ matcher_avg_noise_size

double tesseract::Classify::matcher_avg_noise_size = 12.0

"Avg. noise blob length: "

Definition at line 461 of file classify.h.

◆ matcher_bad_match_pad

double tesseract::Classify::matcher_bad_match_pad = 0.15

"Bad Match Pad (0-1)"

Definition at line 459 of file classify.h.

◆ matcher_clustering_max_angle_delta

double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015

"Maximum angle delta for prototype clustering"

Definition at line 468 of file classify.h.

◆ matcher_debug_flags

int tesseract::Classify::matcher_debug_flags = 0

"Matcher Debug Flags"

Definition at line 454 of file classify.h.

◆ matcher_debug_level

int tesseract::Classify::matcher_debug_level = 0

"Matcher Debug Level"

Definition at line 453 of file classify.h.

◆ matcher_debug_separate_windows

bool tesseract::Classify::matcher_debug_separate_windows = false

"Use two different windows for debugging the matching: " "One for the protos and one for the features."

Definition at line 494 of file classify.h.

◆ matcher_good_threshold

double tesseract::Classify::matcher_good_threshold = 0.125

"Good Match (0-1)"

Definition at line 456 of file classify.h.

◆ matcher_min_examples_for_prototyping

int tesseract::Classify::matcher_min_examples_for_prototyping = 3

"Reliable Config Threshold"

Definition at line 464 of file classify.h.

◆ matcher_perfect_threshold

double tesseract::Classify::matcher_perfect_threshold = 0.02

"Perfect Match (0-1)"

Definition at line 458 of file classify.h.

◆ matcher_permanent_classes_min

int tesseract::Classify::matcher_permanent_classes_min = 1

"Min # of permanent classes"

Definition at line 462 of file classify.h.

◆ matcher_rating_margin

double tesseract::Classify::matcher_rating_margin = 0.1

"New template margin (0-1)"

Definition at line 460 of file classify.h.

◆ matcher_reliable_adaptive_result

double tesseract::Classify::matcher_reliable_adaptive_result = 0.0

"Great Match (0-1)"

Definition at line 457 of file classify.h.

◆ matcher_sufficient_examples_for_prototyping

int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5

"Enable adaption even if the ambiguities have not been seen"

Definition at line 466 of file classify.h.

◆ NormProtos

NORM_PROTOS* tesseract::Classify::NormProtos = nullptr

Definition at line 527 of file classify.h.

◆ PreTrainedTemplates

INT_TEMPLATES tesseract::Classify::PreTrainedTemplates = nullptr

Definition at line 514 of file classify.h.

◆ prioritize_division

bool tesseract::Classify::prioritize_division = false

"Prioritize blob division over chopping"

Definition at line 428 of file classify.h.

◆ rating_scale

double tesseract::Classify::rating_scale = 1.5

"Rating scaling factor"

Definition at line 472 of file classify.h.

◆ shape_table_

ShapeTable* tesseract::Classify::shape_table_ = nullptr
protected

Definition at line 546 of file classify.h.

◆ speckle_large_max_size

double tesseract::Classify::speckle_large_max_size = 0.30

"Max large speckle size"

Definition at line 509 of file classify.h.

◆ speckle_rating_penalty

double tesseract::Classify::speckle_rating_penalty = 10.0

"Penalty to add to worst rating for noise"

Definition at line 511 of file classify.h.

◆ TempProtoMask

BIT_VECTOR tesseract::Classify::TempProtoMask = nullptr

Definition at line 525 of file classify.h.

◆ tess_bn_matching

bool tesseract::Classify::tess_bn_matching = 0

"Baseline Normalized Matching"

Definition at line 444 of file classify.h.

◆ tess_cn_matching

bool tesseract::Classify::tess_cn_matching = 0

"Character Normalized Matching"

Definition at line 443 of file classify.h.

◆ tessedit_class_miss_scale

double tesseract::Classify::tessedit_class_miss_scale = 0.00390625

"Scale factor for features not used"

Definition at line 475 of file classify.h.


The documentation for this class was generated from the following files:
IntegerMatcher::Match
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:510
NO_CLASS
#define NO_CLASS
Definition: matchdefs.h:34
tesseract::Classify::tessedit_class_miss_scale
double tessedit_class_miss_scale
Definition: classify.h:475
INT_TEMPLATES_STRUCT
Definition: intproto.h:117
tesseract::FontInfoDeleteCallback
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:141
string
std::string string
Definition: equationdetect_test.cc:21
tesseract::Classify::AllProtosOn
BIT_VECTOR AllProtosOn
Definition: classify.h:522
INT_CLASS_STRUCT::font_set_id
int font_set_id
Definition: intproto.h:111
tesseract::Shape::ContainsUnichar
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:147
tesseract::Classify::BaselineClassifier
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:1265
TBLOB::ClassifyNormalizeIfNeeded
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:345
ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:106
PROTO_STRUCT::Length
float Length
Definition: protos.h:41
tesseract::Classify::classify_enable_adaptive_matcher
bool classify_enable_adaptive_matcher
Definition: classify.h:445
tesseract::ShapeClassifier::DebugDisplay
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
Definition: shapeclassifier.cpp:96
INT_CLASS_STRUCT::ConfigLengths
uint16_t ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:110
ScrollView
Definition: scrollview.h:97
WERD_RES::ComputeAdaptionThresholds
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
Definition: pageres.cpp:557
PROTOS_PER_PROTO_SET
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
AddFeature
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:39
tesseract::Classify::ClearCharNormArray
void ClearCharNormArray(uint8_t *char_norm_array)
Definition: float2int.cpp:44
MAX_INT_CHAR_NORM
#define MAX_INT_CHAR_NORM
Definition: float2int.cpp:27
BITS_PER_CP_VECTOR
#define BITS_PER_CP_VECTOR
Definition: intproto.h:58
CLASS_ID
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:33
tesseract::CCUtil::use_ambigs_for_adaption
bool use_ambigs_for_adaption
Definition: ccutil.h:73
ExtractMicros
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &cn_denorm)
Definition: mf.cpp:43
TEMP_PROTO_STRUCT::ProtoId
uint16_t ProtoId
Definition: adaptive.h:41
NewIntTemplates
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:681
SVEventType
SVEventType
Definition: scrollview.h:44
ADAPT_CLASS_STRUCT::MaxNumTimesSeen
uint8_t MaxNumTimesSeen
Definition: adaptive.h:56
tesseract::Classify::matcher_clustering_max_angle_delta
double matcher_clustering_max_angle_delta
Definition: classify.h:468
NORM_PROTOS::NumParams
int NumParams
Definition: normmatch.cpp:49
tesseract::Classify::ExpandShapesAndApplyCorrections
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
Definition: adaptmatch.cpp:1128
ADAPT_CLASS_STRUCT::PermConfigs
BIT_VECTOR PermConfigs
Definition: adaptive.h:59
InitFeatureDefs
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:111
tesseract::Classify::NormProtos
NORM_PROTOS * NormProtos
Definition: classify.h:527
IDA_SHAPE_INDEX
Definition: intproto.h:156
PROTO_SET_STRUCT::Protos
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:96
tesseract::Classify::classify_norm_method
int classify_norm_method
Definition: classify.h:434
SVET_CLICK
Definition: scrollview.h:47
tesseract::Classify::matcher_debug_level
int matcher_debug_level
Definition: classify.h:453
NormalizePicoX
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:192
kBlnXHeight
const int kBlnXHeight
Definition: normalis.h:23
tesseract::BlobToTrainingSample
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:75
WERD_RES::PiecesAllNatural
bool PiecesAllNatural(int start, int count) const
Definition: pageres.cpp:1074
first_node
#define first_node(l)
Definition: oldlist.h:84
TempConfigFor
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:90
ADAPT_RESULTS
Definition: adaptmatch.cpp:91
tesseract::Classify::classify_class_pruner_threshold
int classify_class_pruner_threshold
Definition: classify.h:499
tesseract::Classify::prioritize_division
bool prioritize_division
Definition: classify.h:428
CLASS_STRUCT::Configurations
CONFIGS Configurations
Definition: protos.h:58
PROTO_KEY::ConfigId
int ConfigId
Definition: adaptmatch.cpp:125
INT_FX_RESULT_STRUCT::NumBL
int16_t NumBL
Definition: intfx.h:38
Emalloc
void * Emalloc(int Size)
Definition: emalloc.cpp:31
INT_FX_RESULT_STRUCT::YBottom
uint8_t YBottom
Definition: intfx.h:40
Bucket8For
uint8_t Bucket8For(float param, float offset, int num_buckets)
Definition: intproto.cpp:417
WERD_RES::AlternativeChoiceAdjustmentsWorseThan
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
Definition: pageres.cpp:435
NUM_PP_PARAMS
#define NUM_PP_PARAMS
Definition: intproto.h:50
UNICHARSET::get_isdigit
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:502
BASELINE_Y_SHIFT
#define BASELINE_Y_SHIFT
Definition: float2int.h:27
WERD_RES::rebuild_word
TWERD * rebuild_word
Definition: pageres.h:260
tesseract::Classify::fontinfo_table_
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:529
EDGEPT::src_outline
C_OUTLINE * src_outline
Definition: blobs.h:192
UNICHARSET::get_isalpha
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:481
tesseract::UnicharRating
Definition: shapetable.h:40
tesseract::UnicharRating::unichar_id
UNICHAR_ID unichar_id
Definition: shapetable.h:74
PERM_CONFIG_STRUCT
Definition: adaptive.h:43
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::Classify::classify_adapt_proto_threshold
int classify_adapt_proto_threshold
Definition: classify.h:481
SVET_POPUP
Definition: scrollview.h:53
list_rec
Definition: oldlist.h:73
ADAPT_RESULTS::best_rating
float best_rating
Definition: adaptmatch.cpp:96
INT_CLASS_STRUCT
Definition: intproto.h:104
tesseract::write_set
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:235
IntegerMatcher::ApplyCNCorrection
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
Definition: intmatcher.cpp:1223
tesseract::Classify::MasterMatcher
void MasterMatcher(INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
Definition: adaptmatch.cpp:1088
tesseract::Classify::MakeNewTemporaryConfig
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
Definition: adaptmatch.cpp:1740
NO_DEBUG
#define NO_DEBUG
Definition: adaptmatch.cpp:79
tesseract::Classify::classify_enable_learning
bool classify_enable_learning
Definition: classify.h:429
INT_MEMBER
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:312
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:398
tesseract::UnicharRating::fonts
GenericVector< ScoredFont > fonts
Definition: shapetable.h:87
TBLOB::denorm
const DENORM & denorm() const
Definition: blobs.h:361
PROTO_STRUCT
Definition: protos.h:34
baseline
Definition: mfoutline.h:62
INT_FX_RESULT_STRUCT
Definition: intfx.h:34
tesseract::Dict::getUnicharAmbigs
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:108
tesseract::Classify::EnableLearning
bool EnableLearning
Definition: classify.h:577
tesseract::Classify::CharNormClassifier
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
Definition: adaptmatch.cpp:1311
tesseract::Classify::classify_enable_adaptive_debugger
bool classify_enable_adaptive_debugger
Definition: classify.h:450
IntY
Definition: picofeat.h:45
INT_FEAT_RANGE
#define INT_FEAT_RANGE
Definition: float2int.h:26
INT_FX_RESULT_STRUCT::YTop
uint8_t YTop
Definition: intfx.h:41
tesseract::Classify::EndAdaptiveClassifier
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:459
EDGEPT::IsHidden
bool IsHidden() const
Definition: blobs.h:174
NewIntClass
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:625
TBLOB::plot
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:508
FCOORD::y
float y() const
Definition: points.h:209
FreeProtoList
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:538
tesseract::Classify::TempProtoMask
BIT_VECTOR TempProtoMask
Definition: classify.h:525
tesseract::ShapeTable::NumShapes
int NumShapes() const
Definition: shapetable.h:274
INT_FEATURE_STRUCT::Theta
uint8_t Theta
Definition: intproto.h:141
tesseract::Classify::ComputeNormMatch
float ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
Definition: normmatch.cpp:93
tesseract::write_info
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:166
CLASS_STRUCT::NumProtos
int16_t NumProtos
Definition: protos.h:53
PROTO_ID
int16_t PROTO_ID
Definition: matchdefs.h:39
kStandardFeatureLength
const double kStandardFeatureLength
Definition: intfx.h:45
OutlineFeatLength
Definition: outfeat.h:44
tesseract::Classify::matcher_good_threshold
double matcher_good_threshold
Definition: classify.h:456
TESSLINE
Definition: blobs.h:201
FCOORD::x
float x() const
Definition: points.h:206
CHAR_DESC_STRUCT::FeatureSets
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:40
tesseract::Classify::classify_adapted_pruning_threshold
double classify_adapted_pruning_threshold
Definition: classify.h:479
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::Classify::matcher_permanent_classes_min
int matcher_permanent_classes_min
Definition: classify.h:462
INT_PROTO_STRUCT
Definition: intproto.h:80
Config
CLUSTERCONFIG Config
Definition: commontraining.cpp:88
STRING
Definition: strngs.h:45
ScrollView::Clear
void Clear()
Definition: scrollview.cpp:588
INT_CHAR_NORM_RANGE
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:129
INT_PROTO_STRUCT::Configs
uint32_t Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:85
tesseract::Classify::PrintAdaptedTemplates
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:244
BCC_ADAPTED_CLASSIFIER
Definition: ratngs.h:43
STRING::truncate_at
void truncate_at(int32_t index)
Definition: strngs.cpp:258
tesseract::UnicharAmbigs::ReverseAmbigsForAdaption
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:192
ScrollView::BROWN
Definition: scrollview.h:120
FEATURE_STRUCT
Definition: ocrfeatures.h:58
tesseract::Classify::classify_cp_cutoff_strength
int classify_cp_cutoff_strength
Definition: classify.h:503
WERD_RES::fontinfo
const FontInfo * fontinfo
Definition: pageres.h:303
tesseract::Classify::UpdateAmbigsGroup
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
Definition: adaptmatch.cpp:2273
tesseract::Classify::NormalizeOutlines
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
Definition: mfoutline.cpp:275
cprintf
void cprintf(const char *format,...)
Definition: callcpp.cpp:32
tesseract::Dict::EndDangerousAmbigs
void EndDangerousAmbigs()
Definition: stopper.cpp:374
ADAPT_TEMPLATES_STRUCT
Definition: adaptive.h:65
tesseract::Classify::ConvertProto
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:487
ConvertToPicoFeatures2
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:154
tesseract::CCUtil::imagefile
STRING imagefile
Definition: ccutil.h:61
INT_MIN_X
#define INT_MIN_X
Definition: intproto.cpp:58
ReadParamDesc
PARAM_DESC * ReadParamDesc(TFile *fp, uint16_t N)
Definition: clusttool.cpp:140
NORM_PROTOS
Definition: normmatch.cpp:33
PicoFeatY
Definition: picofeat.h:43
tesseract::Classify::ConvertMatchesToChoices
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:1413
tesseract::Classify::classify_max_certainty_margin
double classify_max_certainty_margin
Definition: classify.h:440
TBLOB::ComputeMoments
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
Definition: blobs.cpp:520
NUM_PP_BUCKETS
#define NUM_PP_BUCKETS
Definition: intproto.h:51
tesseract::Classify::classify_learning_debug_level
int classify_learning_debug_level
Definition: classify.h:455
IntCastRounded
int IntCastRounded(double x)
Definition: helpers.h:173
GeoWidth
Definition: picofeat.h:38
tesseract::Classify::classify_class_pruner_multiplier
int classify_class_pruner_multiplier
Definition: classify.h:501
CircBucketFor
uint8_t CircBucketFor(float param, float offset, int num_buckets)
Definition: intproto.cpp:431
tesseract::read_spacing_info
bool read_spacing_info(TFile *f, FontInfo *fi)
Definition: fontinfo.cpp:173
MF_SCALE_FACTOR
const float MF_SCALE_FACTOR
Definition: mfoutline.h:70
PROTO_SET_STRUCT::ProtoPruner
PROTO_PRUNER ProtoPruner
Definition: intproto.h:95
tesseract::Dict::SettupStopperPass1
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:378
tesseract::Classify::SetupBLCNDenorms
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:127
WERD_RES::best_state
GenericVector< int > best_state
Definition: pageres.h:279
tesseract::Classify::LooksLikeGarbage
bool LooksLikeGarbage(TBLOB *blob)
Definition: adaptmatch.cpp:1633
INT_CLASS_STRUCT::NumProtos
uint16_t NumProtos
Definition: intproto.h:105
tesseract::Classify::RemoveBadMatches
void RemoveBadMatches(ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:2033
CPrunerBitIndexFor
#define CPrunerBitIndexFor(c)
Definition: intproto.h:182
ADAPT_TEMPLATES_STRUCT::NumNonEmptyClasses
int NumNonEmptyClasses
Definition: adaptive.h:67
UNICHARSET::eq
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:686
PRINT_FEATURE_MATCHES
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:190
OutlineFeatDir
Definition: outfeat.h:45
tesseract::CCUtil::language_data_path_prefix
STRING language_data_path_prefix
Definition: ccutil.h:56
TESSLINE::next
TESSLINE * next
Definition: blobs.h:279
tesseract::Classify::ComputeIntFeatures
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:90
NIL_LIST
#define NIL_LIST
Definition: oldlist.h:68
ScrollView::ZoomToRectangle
void ZoomToRectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:755
tesseract::Classify::InitAdaptedClass
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:693
ADAPT_TEMPLATES_STRUCT::NumPermClasses
uint8_t NumPermClasses
Definition: adaptive.h:68
FCOORD
Definition: points.h:187
tesseract::Classify::PrintAdaptiveMatchResults
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
Definition: adaptmatch.cpp:2013
tesseract::ShapeClassifier::UnicharClassifySample
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
Definition: shapeclassifier.cpp:39
INT_TEMPLATES_STRUCT::NumClassPruners
int NumClassPruners
Definition: intproto.h:119
PicoFeatX
Definition: picofeat.h:43
tesseract::write_spacing_info
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:201
ScrollView::BLUE
Definition: scrollview.h:108
PROTO_STRUCT::B
float B
Definition: protos.h:36
MAX_NUM_CLASS_PRUNERS
#define MAX_NUM_CLASS_PRUNERS
Definition: intproto.h:59
IntegerMatcher::FindGoodProtos
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:588
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
tesseract::ShapeTable::DeSerialize
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:246
PROTOTYPE
Definition: cluster.h:62
InitIntegerFX
void InitIntegerFX()
Definition: intfx.cpp:48
tesseract::Classify::ExtractIntGeoFeatures
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:246
CPrunerWordIndexFor
#define CPrunerWordIndexFor(c)
Definition: intproto.h:181
tesseract::Classify::SetAdaptiveThreshold
void SetAdaptiveThreshold(float Threshold)
Definition: adaptmatch.cpp:2141
ProtoIn
#define ProtoIn(Class, Pid)
Definition: protos.h:82
PicoFeatDir
Definition: picofeat.h:43
Y_SHIFT
#define Y_SHIFT
Definition: intproto.h:41
tesseract::Classify::matcher_reliable_adaptive_result
double matcher_reliable_adaptive_result
Definition: classify.h:457
TBOX::height
int16_t height() const
Definition: rect.h:107
UNICHARSET::get_script
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:653
tesseract::UnicharAmbigs::AmbigsForAdaption
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:183
AddIntProto
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:281
tesseract::TESSDATA_SHAPE_TABLE
Definition: tessdatamanager.h:70
INT_CLASS_STRUCT::NumProtoSets
uint8_t NumProtoSets
Definition: intproto.h:106
INT_PROTO_STRUCT::B
uint8_t B
Definition: intproto.h:82
kWidthErrorWeighting
const double kWidthErrorWeighting
Definition: normmatch.cpp:73
tesseract::Classify::DisplayAdaptedChar
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:946
ADAPT_CLASS_STRUCT::NumPermConfigs
uint8_t NumPermConfigs
Definition: adaptive.h:55
tesseract::Classify::ClassifyAsNoise
void ClassifyAsNoise(ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:1399
tesseract::Classify::getDict
virtual Dict & getDict()
Definition: classify.h:107
ADAPT_RESULTS::Initialize
void Initialize()
Definition: adaptmatch.cpp:102
tesseract::TFile::FRead
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:284
AddIntClass
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:230
IntX
Definition: picofeat.h:44
tesseract::Classify::matcher_avg_noise_size
double matcher_avg_noise_size
Definition: classify.h:461
IntDir
Definition: picofeat.h:46
EDGEPT::prev
EDGEPT * prev
Definition: blobs.h:191
tesseract::UnicharRating::config
uint8_t config
Definition: shapetable.h:81
tesseract::TESSDATA_INTTEMP
Definition: tessdatamanager.h:60
AddProtoToProtoPruner
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:366
WriteCharDescription
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str)
Definition: featdefs.cpp:173
Y_DIM_OFFSET
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:84
tesseract::UnicharIdVector
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:35
reset_bit
#define reset_bit(array, bit)
Definition: bitvec.h:56
INT_PROTO_STRUCT::Angle
uint8_t Angle
Definition: intproto.h:84
MAX_MATCHES
#define MAX_MATCHES
Definition: adaptmatch.cpp:77
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
ADAPT_RESULTS::CPResults
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:98
free_adapted_templates
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:182
DENORM::SetupNormalization
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:95
tesseract::ShapeTable::GetFirstUnicharAndFont
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:404
ADAPTABLE_WERD_ADJUSTMENT
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:82
ConvertConfig
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:462
tesseract::Classify::matcher_min_examples_for_prototyping
int matcher_min_examples_for_prototyping
Definition: classify.h:464
SVEvent::parameter
char * parameter
Definition: scrollview.h:65
STRING_MEMBER
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:318
DENORM::XHeightRange
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
Definition: normalis.cpp:427
TEMP_CONFIG_STRUCT
Definition: adaptive.h:34
UNICHARSET::debug_str
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:342
INT_FX_RESULT_STRUCT::Ry
int16_t Ry
Definition: intfx.h:37
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
BITS_PER_WERD
#define BITS_PER_WERD
Definition: intproto.h:44
INT_FX_RESULT_STRUCT::Ymean
int16_t Ymean
Definition: intfx.h:36
MAX_CUTOFF
#define MAX_CUTOFF
Definition: cutoffs.cpp:29
INT_FX_RESULT_STRUCT::Rx
int16_t Rx
Definition: intfx.h:37
tesseract::Classify::ShowMatchDisplay
void ShowMatchDisplay()
Definition: intproto.cpp:962
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::UnicharRating::rating
float rating
Definition: shapetable.h:77
UNICHARSET::get_top_bottom
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:558
UNICHARSET::get_enabled
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:868
INT_MAX_Y
#define INT_MAX_Y
Definition: intproto.cpp:61
LegalClassId
#define LegalClassId(c)
Definition: intproto.h:175
tesseract::Classify::matcher_perfect_threshold
double matcher_perfect_threshold
Definition: classify.h:458
MAX_NUM_CONFIGS
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
MAX_NUM_PROTOS
#define MAX_NUM_PROTOS
Definition: intproto.h:47
ADAPT_TEMPLATE_SUFFIX
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:75
GenericVector::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: genericvector.h:954
MAX_NUM_CLASSES
#define MAX_NUM_CLASSES
Definition: matchdefs.h:29
MakeConfigPermanent
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:84
tesseract::Classify::ReadAdaptedTemplates
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:332
delete_d
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:93
tesseract::Classify::shape_table_
ShapeTable * shape_table_
Definition: classify.h:546
tesseract::Classify::PruneClasses
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:451
FLOATUNION::Elliptical
float * Elliptical
Definition: cluster.h:59
CharNormLength
Definition: normfeat.h:29
NO_PROTO
#define NO_PROTO
Definition: matchdefs.h:40
INT_CLASS_STRUCT::ProtoLengths
uint8_t * ProtoLengths
Definition: intproto.h:109
tesseract::Classify::speckle_large_max_size
double speckle_large_max_size
Definition: classify.h:509
tesseract::Classify::disable_character_fragments
bool disable_character_fragments
Definition: classify.h:486
PROTO_STRUCT::Y
float Y
Definition: protos.h:39
INT_MAX_X
#define INT_MAX_X
Definition: intproto.cpp:60
tesseract::Classify::MakePermanent
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
Definition: adaptmatch.cpp:1920
FreeFeature
void FreeFeature(FEATURE Feature)
Definition: ocrfeatures.cpp:53
FreeTempConfig
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:74
MaxNumIntProtosIn
#define MaxNumIntProtosIn(C)
Definition: intproto.h:164
NORM_PROTOS::ParamDesc
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:50
LENGTH_COMPRESSION
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
InitIntMatchWindowIfReqd
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1723
tesseract::Classify::BackupAdaptedTemplates
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:519
tesseract::Classify::classify_save_adapted_templates
bool classify_save_adapted_templates
Definition: classify.h:449
tesseract::Classify::classify_nonlinear_norm
bool classify_nonlinear_norm
Definition: classify.h:452
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
ANGLE_SHIFT
#define ANGLE_SHIFT
Definition: intproto.h:39
tesseract::FontSetDeleteCallback
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:150
PROTO_STRUCT::C
float C
Definition: protos.h:37
UNICHAR_SPACE
Definition: unicharset.h:34
tesseract::Classify::AdaptiveClassifier
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:191
PROTOTYPE::Weight
FLOATUNION Weight
Definition: cluster.h:78
ConvertBlob
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:36
CLASS_STRUCT::NumConfigs
int16_t NumConfigs
Definition: protos.h:56
tesseract::Dict::segment_penalty_dict_case_ok
double segment_penalty_dict_case_ok
Definition: dict.h:605
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:457
tesseract::ScoredFont
Definition: fontinfo.h:38
IncreaseConfidence
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:94
tesseract::Dict::certainty_scale
double certainty_scale
Definition: dict.h:627
MAX_NUM_INT_FEATURES
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:128
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
WERD_CHOICE::adjust_factor
float adjust_factor() const
Definition: ratngs.h:294
TBOX::width
int16_t width() const
Definition: rect.h:114
tesseract::Classify::classify_max_rating_ratio
double classify_max_rating_ratio
Definition: classify.h:438
UNICHARSET
Definition: unicharset.h:145
tesseract::ShapeTable::DebugStr
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:281
PROTO_STRUCT::X
float X
Definition: protos.h:38
tesseract::Classify::classify_char_norm_range
double classify_char_norm_range
Definition: classify.h:436
tesseract::Classify::AmbigClassifier
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
Definition: adaptmatch.cpp:1045
double_MEMBER
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:321
tesseract::Classify::ReadNormProtos
NORM_PROTOS * ReadNormProtos(TFile *fp)
Definition: normmatch.cpp:189
ReadSampleSize
uint16_t ReadSampleSize(TFile *fp)
Definition: clusttool.cpp:120
INT_TEMPLATES_STRUCT::NumClasses
int NumClasses
Definition: intproto.h:118
tesseract::Classify::feature_defs_
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:541
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
CLASS_STRUCT::font_set
UnicityTableEqEq< int > font_set
Definition: protos.h:59
tesseract::ShapeTable::GetShape
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:319
tesseract::Classify::tess_cn_matching
bool tess_cn_matching
Definition: classify.h:443
FEATURE_SET_STRUCT::Features
FEATURE Features[1]
Definition: ocrfeatures.h:67
OutlineFeatY
Definition: outfeat.h:43
tesseract::Classify::AdaptedTemplates
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:515
tesseract::Classify::matcher_sufficient_examples_for_prototyping
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:466
tesseract::Classify::classify_learn_debug_str
char * classify_learn_debug_str
Definition: classify.h:495
tesseract::Classify::ExtractOutlineFeatures
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:54
AddIntConfig
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:260
CLASS_STRUCT
Definition: protos.h:45
PROTO_STRUCT::Angle
float Angle
Definition: protos.h:40
SEAM::BreakPieces
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:186
UnicityTable::get
const T & get(int id) const
Return the object from an id.
Definition: unicity_table.h:140
tesseract::TESSDATA_PFFMTABLE
Definition: tessdatamanager.h:61
TEMP_PROTO_STRUCT
Definition: adaptive.h:26
character
Definition: mfoutline.h:62
INT_FEATURE_STRUCT::Y
uint8_t Y
Definition: intproto.h:140
WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:206
BIT_VECTOR
uint32_t * BIT_VECTOR
Definition: bitvec.h:27
TBLOB::GetPreciseBoundingBox
void GetPreciseBoundingBox(TBOX *precise_box) const
Definition: blobs.cpp:539
tesseract::Classify::AddLargeSpeckleTo
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:201
ReadPrototype
PROTOTYPE * ReadPrototype(TFile *fp, uint16_t N)
Definition: clusttool.cpp:176
INT_FX_RESULT_STRUCT::NumCN
int16_t NumCN
Definition: intfx.h:38
WERD_CHOICE::debug_string
const STRING debug_string() const
Definition: ratngs.h:493
WERDS_PER_CONFIG_VEC
#define WERDS_PER_CONFIG_VEC
Definition: intproto.h:67
PROTO_KEY::Templates
ADAPT_TEMPLATES Templates
Definition: adaptmatch.cpp:123
tesseract::Classify::WriteIntTemplates
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1017
tesseract::CompareFontSet
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:130
ConfigIsPermanent
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:81
push
LIST push(LIST list, void *element)
Definition: oldlist.cpp:172
FillABC
void FillABC(PROTO Proto)
Definition: protos.cpp:105
FEATURE_STRUCT::Params
float Params[1]
Definition: ocrfeatures.h:60
DENORM::SetupNonLinear
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
Definition: normalis.cpp:267
INT_FX_RESULT_STRUCT::Xmean
int16_t Xmean
Definition: intfx.h:36
tesseract::CCUtil::params
ParamsVectors * params()
Definition: ccutil.h:51
tesseract::Classify::LearnPieces
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:374
SVEvent::type
SVEventType type
Definition: scrollview.h:63
tesseract::Classify::GetCharNormFeature
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
Definition: adaptmatch.cpp:1678
NUM_CP_BUCKETS
#define NUM_CP_BUCKETS
Definition: intproto.h:52
CHAR_FRAGMENT::to_string
STRING to_string() const
Definition: unicharset.h:79
IDA_STATIC
Definition: intproto.h:155
tesseract::Classify::DoAdaptiveMatch
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:1530
tesseract::Classify::ExtractIntCNFeatures
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:216
tesseract::Classify::GetFontinfoId
int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId)
Definition: adaptive.cpp:173
NewFeature
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:77
WERD_RES::seam_array
GenericVector< SEAM * > seam_array
Definition: pageres.h:208
BLOB_CHOICE::rating
float rating() const
Definition: ratngs.h:78
NewTempConfig
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:203
tesseract::Classify::speckle_rating_penalty
double speckle_rating_penalty
Definition: classify.h:511
tesseract::Classify::ComputeCharNormArrays
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
Definition: adaptmatch.cpp:1698
INT_CLASS_STRUCT::ProtoSets
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:108
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
INT_FEATURE_ARRAY
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:151
sample
Definition: cluster.h:31
tesseract::Classify::AllConfigsOff
BIT_VECTOR AllConfigsOff
Definition: classify.h:524
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
GenericVector< UnicharRating >
FEATURE_SET_STRUCT
Definition: ocrfeatures.h:64
tesseract::Classify::im_
IntegerMatcher im_
Definition: classify.h:540
FreeOutlines
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:166
TruncateParam
int TruncateParam(float Param, int Min, int Max, char *Id)
Definition: intproto.cpp:1702
tesseract::Classify::classify_use_pre_adapted_templates
bool classify_use_pre_adapted_templates
Definition: classify.h:447
ADAPT_TEMPLATES_STRUCT::Class
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:69
ExtractCharNormFeatures
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
Definition: normfeat.cpp:60
NORM_PROTOS::NumProtos
int NumProtos
Definition: normmatch.cpp:52
SVET_ANY
Definition: scrollview.h:55
tesseract::Classify::ExtractPicoFeatures
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:62
CHAR_DESC_STRUCT
Definition: featdefs.h:38
GeoTop
Definition: picofeat.h:37
tesseract::Classify::classify_adapt_feature_threshold
int classify_adapt_feature_threshold
Definition: classify.h:483
UnicityTable::size
int size() const
Return the size used.
Definition: unicity_table.h:127
tesseract::Classify::ReadNewCutoffs
void ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs)
Definition: cutoffs.cpp:40
TEMP_CONFIG_STRUCT::NumTimesSeen
uint8_t NumTimesSeen
Definition: adaptive.h:35
PROTO_KEY::ClassId
CLASS_ID ClassId
Definition: adaptmatch.cpp:124
NewTempProto
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:228
ScrollView::AwaitEvent
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:443
UNLIKELY_NUM_FEAT
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:78
SET_BIT
#define SET_BIT(array, bit)
Definition: bitvec.h:54
tesseract::Classify::classify_adapted_pruning_factor
double classify_adapted_pruning_factor
Definition: classify.h:477
UNICHAR_LEN
#define UNICHAR_LEN
Definition: unichar.h:32
CharNormDesc
const FEATURE_DESC_STRUCT CharNormDesc
classify_norm_adj_midpoint
double classify_norm_adj_midpoint
Definition: normmatch.cpp:70
STRING::length
int32_t length() const
Definition: strngs.cpp:187
PROTO_STRUCT::A
float A
Definition: protos.h:35
tesseract::UnicharRating::SortDescendingRating
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:55
INT_FX_RESULT_STRUCT::Width
int16_t Width
Definition: intfx.h:39
CPrunerIdFor
#define CPrunerIdFor(c)
Definition: intproto.h:179
tesseract::read_info
bool read_info(TFile *f, FontInfo *fi)
Definition: fontinfo.cpp:156
WORST_POSSIBLE_RATING
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:86
INT_TEMPLATES_STRUCT::Class
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:120
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
tesseract::Classify::ComputeIntCharNormArray
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
Definition: float2int.cpp:62
TBOX::pad
void pad(int xpad, int ypad)
Definition: rect.h:130
NewFeatureSet
FEATURE_SET NewFeatureSet(int NumFeatures)
Definition: ocrfeatures.cpp:93
WriteAdaptedClass
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:409
tesseract::Classify::LearnBlob
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:70
INT_FEATURE_STRUCT
Definition: intproto.h:131
count
int count(LIST var_list)
Definition: oldlist.cpp:79
tesseract::Classify::FreeNormProtos
void FreeNormProtos()
Definition: normmatch.cpp:167
tesseract::Classify::classify_misfit_junk_penalty
double classify_misfit_junk_penalty
Definition: classify.h:471
BLOB_CHOICE
Definition: ratngs.h:49
TBLOB
Definition: blobs.h:282
PROTOTYPE::Mean
float * Mean
Definition: cluster.h:73
tesseract::Classify::tess_bn_matching
bool tess_bn_matching
Definition: classify.h:444
tesseract::TrainingSample
Definition: trainingsample.h:53
PROTO_SET_STRUCT
Definition: intproto.h:94
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:132
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::FontSet::size
int size
Definition: fontinfo.h:138
tesseract::Classify::RemoveExtraPuncs
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:2093
SVEvent
Definition: scrollview.h:60
UNICHARSET::contains_unichar
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670
tesseract::Classify::GetAdaptiveFeatures
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:786
Efree
void Efree(void *ptr)
Definition: emalloc.cpp:45
ScrollView::GREEN
Definition: scrollview.h:106
ADAPT_RESULTS::best_match_index
int best_match_index
Definition: adaptmatch.cpp:95
iterate
#define iterate(l)
Definition: oldlist.h:92
FreePrototype
void FreePrototype(void *arg)
Definition: cluster.cpp:549
MakeTempProtoPerm
int MakeTempProtoPerm(void *item1, void *item2)
Definition: adaptmatch.cpp:1980
MarginalMatch
bool MarginalMatch(float confidence, float matcher_great_threshold)
Definition: adaptmatch.cpp:131
FreeFeatureSet
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:61
ADAPT_CLASS_STRUCT::PermProtos
BIT_VECTOR PermProtos
Definition: adaptive.h:58
GenericVector::clear
void clear()
Definition: genericvector.h:857
tesseract::CompareFontInfo
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:122
INT_FX_RESULT_STRUCT::Length
int32_t Length
Definition: intfx.h:35
tesseract::Classify::DebugAdaptiveClassifier
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:1497
UpdateMatchDisplay
void UpdateMatchDisplay()
Definition: intproto.cpp:446
ADAPT_RESULTS::best_unichar_id
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:94
tesseract::Classify::AddNewResult
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
Definition: adaptmatch.cpp:994
TBLOB::GetEdgeCoords
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
Definition: blobs.cpp:555
print_ratings_list
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:835
SVEvent::command_id
int command_id
Definition: scrollview.h:70
GeoBottom
Definition: picofeat.h:36
TBOX::right
int16_t right() const
Definition: rect.h:78
PRINT_MATCH_SUMMARY
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:187
NormalizeOutlineX
void NormalizeOutlineX(FEATURE_SET FeatureSet)
Definition: outfeat.cpp:144
WERD_RES::correct_text
GenericVector< STRING > correct_text
Definition: pageres.h:283
tesseract::Classify::ComputeCorrectedRating
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
Definition: adaptmatch.cpp:1202
IntFeatDesc
const FEATURE_DESC_STRUCT IntFeatDesc
tesseract::FontSet::configs
int * configs
Definition: fontinfo.h:139
BLOB_CHOICE::set_fonts
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:94
ADAPT_RESULTS::HasNonfragment
bool HasNonfragment
Definition: adaptmatch.cpp:93
TEMP_PROTO_STRUCT::Proto
PROTO_STRUCT Proto
Definition: adaptive.h:42
X_SHIFT
#define X_SHIFT
Definition: intproto.h:40
INT_TEMPLATES_STRUCT::ClassPruners
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:121
IntegerMatcher::FindBadFeatures
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:656
EDGEPT
Definition: blobs.h:97
OutlineFeatX
Definition: outfeat.h:42
WERDS_PER_CP_VECTOR
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
tesseract::Classify::TempConfigReliable
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
Definition: adaptmatch.cpp:2236
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::Classify::LargeSpeckle
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:224
tesseract::Dict::getUnicharset
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
UNICHARSET::get_fragment
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:724
CLASS_PRUNER_STRUCT
Definition: intproto.h:75
PROTO_KEY
Definition: adaptmatch.cpp:122
tesseract::Classify::ReadIntTemplates
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:717
tesseract::Classify::PreTrainedTemplates
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:514
CLASS_PRUNER_STRUCT::p
uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
free_int_templates
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:697
ScrollView::Update
static void Update()
Definition: scrollview.cpp:708
ValidCharDescription
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc)
Definition: featdefs.cpp:194
NUM_BITS_PER_CLASS
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
tesseract::Classify::rating_scale
double rating_scale
Definition: classify.h:472
ADAPT_CLASS_STRUCT
Definition: adaptive.h:54
tesseract::Classify::matcher_debug_separate_windows
bool matcher_debug_separate_windows
Definition: classify.h:494
TWERD::plot
void plot(ScrollView *window)
Definition: blobs.cpp:895
ADAPT_RESULTS::match
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:97
AddProtoToClassPruner
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
Definition: intproto.cpp:327
TWERD::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:859
tesseract::Classify::GetAmbiguities
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
Definition: adaptmatch.cpp:1592
FEATURE_ID
uint8_t FEATURE_ID
Definition: matchdefs.h:45
NewCharDescription
CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs)
Definition: featdefs.cpp:147
tesseract::CST_WHOLE
Definition: classify.h:98
SEAM::JoinPieces
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:208
NewAdaptedClass
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:102
tesseract::Classify::classify_integer_matcher_multiplier
int classify_integer_matcher_multiplier
Definition: classify.h:505
UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
tesseract::Classify::classify_debug_character_fragments
bool classify_debug_character_fragments
Definition: classify.h:491
WERDS_PER_PP_VECTOR
#define WERDS_PER_PP_VECTOR
Definition: intproto.h:62
tesseract::Classify::matcher_rating_margin
double matcher_rating_margin
Definition: classify.h:460
PermConfigFor
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:92
tesseract::Classify::ResetAdaptiveClassifierInternal
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:598
FEATURE_SET_STRUCT::NumFeatures
uint16_t NumFeatures
Definition: ocrfeatures.h:65
tesseract::Classify::WriteAdaptedTemplates
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:453
ProtoForProtoId
#define ProtoForProtoId(C, P)
Definition: intproto.h:167
INT_CLASS_STRUCT::NumConfigs
uint8_t NumConfigs
Definition: intproto.h:107
ADAPT_TEMPLATES_STRUCT::Templates
INT_TEMPLATES Templates
Definition: adaptive.h:66
tesseract::UnicharRating::adapted
bool adapted
Definition: shapetable.h:79
tesseract::Classify::classify_bln_numeric_mode
bool classify_bln_numeric_mode
Definition: classify.h:508
tesseract::Classify::matcher_bad_match_pad
double matcher_bad_match_pad
Definition: classify.h:459
BOOL_MEMBER
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:315
tesseract::Classify::fontset_table_
UnicityTable< FontSet > fontset_table_
Definition: classify.h:537
tesseract::Classify::matcher_debug_flags
int matcher_debug_flags
Definition: classify.h:454
INT_PROTO_STRUCT::C
int8_t C
Definition: intproto.h:83
GenericVector::sort
void sort()
Definition: genericvector.h:1102
INT_FEATURE_STRUCT::X
uint8_t X
Definition: intproto.h:139
OLD_MAX_NUM_CONFIGS
#define OLD_MAX_NUM_CONFIGS
Definition: intproto.cpp:107
UnusedClassIdIn
#define UnusedClassIdIn(T, c)
Definition: intproto.h:176
DENORM::pix
Pix * pix() const
Definition: normalis.h:245
ADAPT_RESULTS::BlobLength
int32_t BlobLength
Definition: adaptmatch.cpp:92
tesseract::Classify::allow_blob_division
bool allow_blob_division
Definition: classify.h:423
PRINT_PROTO_MATCHES
#define PRINT_PROTO_MATCHES
Definition: intproto.h:191
ActualOutlineLength
float ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:31
NORM_PROTOS::Protos
LIST * Protos
Definition: normmatch.cpp:51
tesseract::Classify::classify_character_fragments_garbage_certainty_threshold
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:489
GenericVector::size
int size() const
Definition: genericvector.h:71
window_wait
char window_wait(ScrollView *win)
Definition: callcpp.cpp:103
tesseract::Serialize
bool Serialize(FILE *fp, const char *data, size_t n=1)
Definition: serialis.cpp:73
ClassForClassId
#define ClassForClassId(T, c)
Definition: intproto.h:177
tesseract::FontSet
Definition: fontinfo.h:137
CharNormY
Definition: normfeat.h:29
tesseract::Classify::AdaptToChar
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:853
IDA_ADAPTIVE
Definition: intproto.h:154
MAX_PICO_FEATURES
#define MAX_PICO_FEATURES
Definition: picofeat.h:45
push_last
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:185
ReadAdaptedClass
ADAPT_CLASS ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:281
tesseract::Dict::SettupStopperPass2
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:382
tesseract::ClearFeatureSpaceWindow
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
Definition: intproto.cpp:987
tesseract::Classify::certainty_scale
double certainty_scale
Definition: classify.h:473
INT_MIN_Y
#define INT_MIN_Y
Definition: intproto.cpp:59
tesseract::CST_FRAGMENT
Definition: classify.h:97
tesseract::Classify::ClassAndConfigIDToFontOrShapeID
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
Definition: adaptmatch.cpp:2207
INT_PROTO_STRUCT::A
int8_t A
Definition: intproto.h:81
ConvertToOutlineFeatures
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: outfeat.cpp:106
tesseract::Classify::MakeNewTempProtos
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
Definition: adaptmatch.cpp:1834
OLD_WERDS_PER_CONFIG_VEC
#define OLD_WERDS_PER_CONFIG_VEC
Definition: intproto.cpp:108
tesseract::Classify::AllConfigsOn
BIT_VECTOR AllConfigsOn
Definition: classify.h:523
tesseract::Classify::classify_debug_level
int classify_debug_level
Definition: classify.h:430
NormalizeOutline
void NormalizeOutline(MFOUTLINE Outline, float XOrigin)
Definition: mfoutline.cpp:241
tesseract::ShapeTable::MaxNumUnichars
int MaxNumUnichars() const
Definition: shapetable.cpp:455
kBlnBaselineOffset
const int kBlnBaselineOffset
Definition: normalis.h:24
UNICHARSET::size
int size() const
Definition: unicharset.h:341
TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:446
EDGEPT::next
EDGEPT * next
Definition: blobs.h:190
FreeCharDescription
void FreeCharDescription(CHAR_DESC CharDesc)
Definition: featdefs.cpp:128
CharNormRx
Definition: normfeat.h:29
tesseract::Classify::RefreshDebugWindow
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:226
IsEmptyAdaptedClass
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:78
BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:44
MAX_ADAPTABLE_WERD_SIZE
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:80
tesseract::Classify::NewAdaptedTemplates
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:151
tesseract::TESSDATA_NORMPROTO
Definition: tessdatamanager.h:62
BCC_STATIC_CLASSIFIER
Definition: ratngs.h:42
AddAdaptedClass
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:45
tesseract::UnicharRating::feature_misses
uint16_t feature_misses
Definition: shapetable.h:83
tesseract::read_set
bool read_set(TFile *f, FontSet *fs)
Definition: fontinfo.cpp:229
TBOX
Definition: rect.h:33
DENORM
Definition: normalis.h:49
CharNormRy
Definition: normfeat.h:29
ADAPT_CLASS_STRUCT::TempProtos
LIST TempProtos
Definition: adaptive.h:60
GetPicoFeatureLength
#define GetPicoFeatureLength()
Definition: picofeat.h:56