| 
    tesseract
    5.0.0-alpha-619-ge9db
    
   | 
 
 
 
 
#include <classify.h>
 | 
|   | Classify () | 
|   | 
|   | ~Classify () override | 
|   | 
| virtual Dict &  | getDict () | 
|   | 
| const ShapeTable *  | shape_table () const | 
|   | 
| void  | SetStaticClassifier (ShapeClassifier *static_classifier) | 
|   | 
| void  | AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices) | 
|   | 
| bool  | LargeSpeckle (const TBLOB &blob) | 
|   | 
| ADAPT_TEMPLATES  | NewAdaptedTemplates (bool InitFromUnicharset) | 
|   | 
| int  | GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId) | 
|   | 
| int  | PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results) | 
|   | 
| void  | ReadNewCutoffs (TFile *fp, uint16_t *Cutoffs) | 
|   | 
| void  | PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) | 
|   | 
| void  | WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) | 
|   | 
| ADAPT_TEMPLATES  | ReadAdaptedTemplates (TFile *File) | 
|   | 
| float  | ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch) | 
|   | 
| void  | FreeNormProtos () | 
|   | 
| NORM_PROTOS *  | ReadNormProtos (TFile *fp) | 
|   | 
| void  | ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class) | 
|   | 
| INT_TEMPLATES  | CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset) | 
|   | 
| void  | LearnWord (const char *fontname, WERD_RES *word) | 
|   | 
| void  | LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word) | 
|   | 
| void  | InitAdaptiveClassifier (TessdataManager *mgr) | 
|   | 
| void  | InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) | 
|   | 
| void  | AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results) | 
|   | 
| void  | MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results) | 
|   | 
| void  | ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results) | 
|   | 
| double  | ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors) | 
|   | 
| void  | ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) | 
|   | 
| void  | AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results) | 
|   | 
| int  | GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) | 
|   | 
| void  | DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results) | 
|   | 
| PROTO_ID  | MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) | 
|   | 
| int  | MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) | 
|   | 
| void  | MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob) | 
|   | 
| void  | PrintAdaptiveMatchResults (const ADAPT_RESULTS &results) | 
|   | 
| void  | RemoveExtraPuncs (ADAPT_RESULTS *Results) | 
|   | 
| void  | RemoveBadMatches (ADAPT_RESULTS *Results) | 
|   | 
| void  | SetAdaptiveThreshold (float Threshold) | 
|   | 
| void  | ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features) | 
|   | 
| STRING  | ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const | 
|   | 
| int  | ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const | 
|   | 
| int  | ShapeIDToClassID (int shape_id) const | 
|   | 
| UNICHAR_ID *  | BaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) | 
|   | 
| int  | CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results) | 
|   | 
| int  | CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results) | 
|   | 
| UNICHAR_ID *  | GetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass) | 
|   | 
| void  | DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results) | 
|   | 
| void  | AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates) | 
|   | 
| void  | DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class) | 
|   | 
| bool  | AdaptableWord (WERD_RES *word) | 
|   | 
| void  | EndAdaptiveClassifier () | 
|   | 
| void  | SettupPass1 () | 
|   | 
| void  | SettupPass2 () | 
|   | 
| void  | AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices) | 
|   | 
| void  | ClassifyAsNoise (ADAPT_RESULTS *Results) | 
|   | 
| void  | ResetAdaptiveClassifierInternal () | 
|   | 
| void  | SwitchAdaptiveClassifier () | 
|   | 
| void  | StartBackupAdaptiveClassifier () | 
|   | 
| int  | GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array) | 
|   | 
| void  | ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array) | 
|   | 
| bool  | TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config) | 
|   | 
| void  | UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob) | 
|   | 
| bool  | AdaptiveClassifierIsFull () const | 
|   | 
| bool  | AdaptiveClassifierIsEmpty () const | 
|   | 
| bool  | LooksLikeGarbage (TBLOB *blob) | 
|   | 
| void  | RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox) | 
|   | 
| void  | ClearCharNormArray (uint8_t *char_norm_array) | 
|   | 
| void  | ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array) | 
|   | 
| void  | ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) | 
|   | 
| INT_TEMPLATES  | ReadIntTemplates (TFile *fp) | 
|   | 
| void  | WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset) | 
|   | 
| CLASS_ID  | GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id) | 
|   | 
| void  | ShowMatchDisplay () | 
|   | 
| UnicityTable< FontInfo > &  | get_fontinfo_table () | 
|   | 
| const UnicityTable< FontInfo > &  | get_fontinfo_table () const | 
|   | 
| UnicityTable< FontSet > &  | get_fontset_table () | 
|   | 
| void  | NormalizeOutlines (LIST Outlines, float *XScale, float *YScale) | 
|   | 
| FEATURE_SET  | ExtractOutlineFeatures (TBLOB *Blob) | 
|   | 
| FEATURE_SET  | ExtractPicoFeatures (TBLOB *Blob) | 
|   | 
| FEATURE_SET  | ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) | 
|   | 
| FEATURE_SET  | ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) | 
|   | 
| void  | LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) | 
|   | 
| bool  | WriteTRFile (const STRING &filename) | 
|   | 
|   | CCStruct ()=default | 
|   | 
|   | ~CCStruct () override | 
|   | 
|   | CCUtil () | 
|   | 
| virtual  | ~CCUtil () | 
|   | 
| void  | main_setup (const char *argv0, const char *basename) | 
|   | CCUtil::main_setup - set location of tessdata and name of image.  More...
  | 
|   | 
| ParamsVectors *  | params () | 
|   | 
Definition at line 103 of file classify.h.
 
◆ Classify()
      
        
          | tesseract::Classify::Classify  | 
          ( | 
           | ) | 
           | 
        
      
 
Definition at line 60 of file classify.cpp.
   64                   "Prioritize blob division over chopping", this->
params()),
 
   72                     "Character Normalization Range ...", this->
params()),
 
   74                     "Veto ratio between classifier ratings", this->
params()),
 
   76                     "Veto difference between classifier certainties",
 
   83                   "Enable adaptive classifier", this->
params()),
 
   85                   "Use pre-adapted classifier templates", this->
params()),
 
   87                   "Save adapted templates to a file", this->
params()),
 
   91                   "Non-linear stroke-density normalization", this->
params()),
 
  111                  "Reliable Config Threshold", this->
params()),
 
  113                  "Enable adaption even if the ambiguities have not been seen",
 
  116                     "Maximum angle delta for prototype clustering",
 
  119                     "Penalty to apply when a non-alnum is vertically out of " 
  120                     "its expected textline position",
 
  126                     "Scale factor for features not used", this->
params()),
 
  129           "Prune poor adapted results this much worse than best result",
 
  132                     "Threshold at which classify_adapted_pruning_factor starts",
 
  135                  "Threshold for good protos during adaptive 0-255",
 
  138                  "Threshold for good features during adaptive 0-255",
 
  141                   "Do not include character fragments in the" 
  142                   " results of the classifier",
 
  146                     "Exclude fragments that do not look like whole" 
  147                     " characters from training and adaption",
 
  150                   "Bring up graphical debugging windows for fragments training",
 
  153                   "Use two different windows for debugging the matching: " 
  154                   "One for the protos and one for the features.",
 
  159                  "Class Pruner Threshold 0-255", this->
params()),
 
  161                  "Class Pruner Multiplier 0-255:       ", this->
params()),
 
  163                  "Class Pruner CutoffStrength:         ", this->
params()),
 
  165                  "Integer Matcher Multiplier  0-255:   ", this->
params()),
 
  167                   "Assume the input is numbers [0-9].", this->
params()),
 
  171                     "Penalty to add to worst rating for noise", this->
params()),
 
  174   using namespace std::placeholders; 
 
 
 
 
◆ ~Classify()
  
  
      
        
          | tesseract::Classify::~Classify  | 
          ( | 
           | ) | 
           | 
         
       
   | 
  
override   | 
  
 
Definition at line 183 of file classify.cpp.
  185   delete learn_debug_win_;
 
  186   delete learn_fragmented_word_debug_win_;
 
  187   delete learn_fragments_debug_win_;
 
 
 
 
◆ AdaptableWord()
      
        
          | bool tesseract::Classify::AdaptableWord  | 
          ( | 
          WERD_RES *  | 
          word | ) | 
           | 
        
      
 
Return true if the specified word is acceptable for adaptation.
Globals: none
- Parameters
 - 
  
  
 
- Returns
 - true or false 
 
Definition at line 821 of file adaptmatch.cpp.
  824   float adaptable_score =
 
  827       BestChoiceLength > 0 &&
 
 
 
 
◆ AdaptiveClassifier()
      
        
          | void tesseract::Classify::AdaptiveClassifier  | 
          ( | 
          TBLOB *  | 
          Blob,  | 
        
        
           | 
           | 
          BLOB_CHOICE_LIST *  | 
          Choices  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.
It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.
Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.
This routine also performs some simple speckle filtering.
- Parameters
 - 
  
     | Blob | blob to be classified  | 
    | [out] | Choices | List of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher.  | 
  
   
Definition at line 191 of file adaptmatch.cpp.
  192   assert(Choices != 
nullptr);
 
  203   Results->ComputeBest();
 
  216 #ifndef GRAPHICS_DISABLED 
 
 
 
◆ AdaptiveClassifierIsEmpty()
  
  
      
        
          | bool tesseract::Classify::AdaptiveClassifierIsEmpty  | 
          ( | 
           | ) | 
           const | 
         
       
   | 
  
inline   | 
  
 
 
◆ AdaptiveClassifierIsFull()
  
  
      
        
          | bool tesseract::Classify::AdaptiveClassifierIsFull  | 
          ( | 
           | ) | 
           const | 
         
       
   | 
  
inline   | 
  
 
Definition at line 325 of file classify.h.
  325 { 
return NumAdaptationsFailed > 0; }
 
 
 
 
◆ AdaptToChar()
      
        
          | void tesseract::Classify::AdaptToChar  | 
          ( | 
          TBLOB *  | 
          Blob,  | 
        
        
           | 
           | 
          CLASS_ID  | 
          ClassId,  | 
        
        
           | 
           | 
          int  | 
          FontinfoId,  | 
        
        
           | 
           | 
          float  | 
          Threshold,  | 
        
        
           | 
           | 
          ADAPT_TEMPLATES  | 
          adaptive_templates  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
- Parameters
 - 
  
    | Blob | blob to add to templates for ClassId  | 
    | ClassId | class to add blob to  | 
    | FontinfoId | font information from pre-trained templates  | 
    | Threshold | minimum match rating to existing template  | 
    | adaptive_templates | current set of adapted templates | 
  
   
Globals:
- AllProtosOn dummy mask to match against all protos
 
- AllConfigsOn dummy mask to match against all configs 
 
Definition at line 853 of file adaptmatch.cpp.
  869   Class = adaptive_templates->
Class[ClassId];
 
  870   assert(Class != 
nullptr);
 
  877     if (NumFeatures <= 0) {
 
  883     for (
int cfg = 0; cfg < IClass->
NumConfigs; ++cfg) {
 
  885         SET_BIT(MatchingFontConfigs, cfg);
 
  891               NumFeatures, IntFeatures,
 
  894     FreeBitVector(MatchingFontConfigs);
 
  898     if (1.0f - int_result.
rating <= Threshold) {
 
  901           tprintf(
"Found good match to perm config %d = %4.1f%%.\n",
 
  913         tprintf(
"Increasing reliability of temp config %d to %d.\n",
 
  922         tprintf(
"Found poor match to temp config %d = %4.1f%%.\n",
 
  929                                  NumFeatures, IntFeatures, FloatFeatures);
 
  930       if (NewTempConfigId >= 0 &&
 
  932         MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
 
  936 #ifndef GRAPHICS_DISABLED 
 
 
 
◆ AddLargeSpeckleTo()
      
        
          | void tesseract::Classify::AddLargeSpeckleTo  | 
          ( | 
          int  | 
          blob_length,  | 
        
        
           | 
           | 
          BLOB_CHOICE_LIST *  | 
          choices  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
Definition at line 201 of file classify.cpp.
  202     BLOB_CHOICE_IT bc_it(choices);
 
  207   if (!choices->empty() && blob_length > 0) {
 
  208     bc_it.move_to_last();
 
  218                                              -1, 0.0f, FLT_MAX, 0,
 
  220   bc_it.add_to_end(blob_choice);
 
 
 
 
◆ AddNewResult()
This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.
Globals:
- Parameters
 - 
  
     | new_result | new result to add  | 
    | [out] | results | results to add new result to  | 
  
   
Definition at line 994 of file adaptmatch.cpp.
  996   int old_match = FindScoredUnichar(new_result.
unichar_id, *results);
 
  998   if (new_result.
rating + matcher_bad_match_pad < results->best_rating ||
 
  999       (old_match < results->match.size() &&
 
 1000        new_result.
rating <= results->
match[old_match].rating))
 
 1006   if (old_match < results->match.size()) {
 
 1007     results->
match[old_match].rating = new_result.
rating;
 
 
 
 
◆ AmbigClassifier()
This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.
Globals:
- Parameters
 - 
  
     | blob | blob to be classified  | 
     | templates | built-in templates to classify against  | 
     | classes | adapted class templates  | 
     | ambiguities | array of unichar id's to match against  | 
    | [out] | results | place to put match results  | 
     | int_features |  | 
     | fx_info |  | 
  
   
Definition at line 1045 of file adaptmatch.cpp.
 1053   if (int_features.
empty()) 
return;
 
 1065   while (*ambiguities >= 0) {
 
 1071               int_features.
size(), &int_features[0],
 
 1079                                     CharNormArray, &int_result, results);
 
 1082   delete [] CharNormArray;
 
 
 
 
◆ BaselineClassifier()
This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.
Globals:
- BaselineCutoffs expected num features for each class
 
- Parameters
 - 
  
    | Blob | blob to be classified  | 
    | Templates | current set of adapted templates  | 
    | Results | place to put match results  | 
    | int_features |  | 
    | fx_info |  | 
  
   
- Returns
 - Array of possible ambiguous chars that should be checked. 
 
Definition at line 1265 of file adaptmatch.cpp.
 1269   if (int_features.
empty()) 
return nullptr;
 
 1275                CharNormArray, BaselineCutoffs, &Results->
CPResults);
 
 1285   delete [] CharNormArray;
 
 1290   return Templates->
Class[ClassId]->
 
 
 
 
◆ CharNormClassifier()
This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.
- Parameters
 - 
  
    | blob | blob to be classified  | 
    | sample | templates to classify unknown against  | 
    | adapt_results | place to put match results | 
  
   
Globals:
- CharNormCutoffs expected num features for each class
 
- AllProtosOn mask that enables all protos
 
- AllConfigsOn mask that enables all configs 
 
Definition at line 1311 of file adaptmatch.cpp.
 1319                                             -1, &unichar_results);
 
 1321   for (
int r = 0; r < unichar_results.size(); ++r) {
 
 1324   return sample.num_features();
 
 
 
 
◆ CharNormTrainingSample()
Definition at line 1329 of file adaptmatch.cpp.
 1335   adapt_results->Initialize();
 
 1337   uint32_t num_features = 
sample.num_features();
 
 1347   auto* pruner_norm_array = 
new uint8_t[num_pruner_classes];
 
 1348   adapt_results->BlobLength =
 
 1355                shape_table_ != 
nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs,
 
 1356                &adapt_results->CPResults);
 
 1357   delete [] pruner_norm_array;
 
 1358   if (keep_this >= 0) {
 
 1359     adapt_results->CPResults[0].Class = keep_this;
 
 1360     adapt_results->CPResults.truncate(1);
 
 1364     for (
int i = 0; i < adapt_results->CPResults.size(); ++i) {
 
 1365       int class_id = adapt_results->CPResults[i].Class;
 
 1367           UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
 
 1374                   blob_box, adapt_results->CPResults, adapt_results);
 
 1376     for (
int i = 0; i < adapt_results->match.size(); i++) {
 
 1377       results->
push_back(adapt_results->match[i]);
 
 1381   delete [] char_norm_array;
 
 1382   delete adapt_results;
 
 1383   return num_features;
 
 
 
 
◆ ClassAndConfigIDToFontOrShapeID()
      
        
          | int tesseract::Classify::ClassAndConfigIDToFontOrShapeID  | 
          ( | 
          int  | 
          class_id,  | 
        
        
           | 
           | 
          int  | 
          int_result_config  | 
        
        
           | 
          ) | 
           |  const | 
        
      
 
Definition at line 2207 of file adaptmatch.cpp.
 2211   if (font_set_id < 0)
 
 2212     return kBlankFontinfoId;
 
 2214   ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
 
 2215   return fs.configs[int_result_config];
 
 
 
 
◆ ClassIDToDebugStr()
◆ ClassifyAsNoise()
      
        
          | void tesseract::Classify::ClassifyAsNoise  | 
          ( | 
          ADAPT_RESULTS *  | 
          results | ) | 
           | 
        
      
 
This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.
- Parameters
 - 
  
    | results | results to add noise classification to | 
  
   
Globals:
- matcher_avg_noise_size avg. length of a noise blob 
 
Definition at line 1399 of file adaptmatch.cpp.
 1402   rating /= 1.0 + rating;
 
 
 
 
◆ ClearCharNormArray()
      
        
          | void tesseract::Classify::ClearCharNormArray  | 
          ( | 
          uint8_t *  | 
          char_norm_array | ) | 
           | 
        
      
 
For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.
Globals:
- Parameters
 - 
  
    | char_norm_array | array to be cleared  | 
  
   
Definition at line 44 of file float2int.cpp.
   45   memset(char_norm_array, 0, 
sizeof(*char_norm_array) * 
unicharset.
size());
 
 
 
 
◆ ComputeCharNormArrays()
Definition at line 1698 of file adaptmatch.cpp.
 1703   if (pruner_array != 
nullptr) {
 
 1707       memset(pruner_array, UINT8_MAX,
 
 1708              templates->
NumClasses * 
sizeof(pruner_array[0]));
 
 1711       for (
int id = 0; 
id < templates->
NumClasses; ++id) {
 
 1714         for (
int config = 0; config < fs.size; ++config) {
 
 1716           for (
int c = 0; c < shape.size(); ++c) {
 
 1717             if (char_norm_array[shape[c].unichar_id] < pruner_array[
id])
 
 1718               pruner_array[id] = char_norm_array[shape[c].unichar_id];
 
 
 
 
◆ ComputeCorrectedRating()
      
        
          | double tesseract::Classify::ComputeCorrectedRating  | 
          ( | 
          bool  | 
          debug,  | 
        
        
           | 
           | 
          int  | 
          unichar_id,  | 
        
        
           | 
           | 
          double  | 
          cp_rating,  | 
        
        
           | 
           | 
          double  | 
          im_rating,  | 
        
        
           | 
           | 
          int  | 
          feature_misses,  | 
        
        
           | 
           | 
          int  | 
          bottom,  | 
        
        
           | 
           | 
          int  | 
          top,  | 
        
        
           | 
           | 
          int  | 
          blob_length,  | 
        
        
           | 
           | 
          int  | 
          matcher_multiplier,  | 
        
        
           | 
           | 
          const uint8_t *  | 
          cn_factors  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
Definition at line 1202 of file adaptmatch.cpp.
 1210                                               cn_factors[unichar_id],
 
 1211                                               matcher_multiplier);
 
 1213   double vertical_penalty = 0.0;
 
 1218     int min_bottom, max_bottom, min_top, max_top;
 
 1220                               &min_top, &max_top);
 
 1222       tprintf(
"top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
 
 1223               top, min_top, max_top, bottom, min_bottom, max_bottom);
 
 1225     if (top < min_top || top > max_top ||
 
 1226         bottom < min_bottom || bottom > max_bottom) {
 
 1230   double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
 
 1234     tprintf(
"%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
 
 1238             (1.0 - im_rating) * 100.0,
 
 1239             (cn_corrected - (1.0 - im_rating)) * 100.0,
 
 1240             cn_factors[unichar_id],
 
 1241             miss_penalty * 100.0,
 
 1242             vertical_penalty * 100.0);
 
 
 
 
◆ ComputeIntCharNormArray()
      
        
          | void tesseract::Classify::ComputeIntCharNormArray  | 
          ( | 
          const FEATURE_STRUCT &  | 
          norm_feature,  | 
        
        
           | 
           | 
          uint8_t *  | 
          char_norm_array  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.
Globals:
- PreTrainedTemplates current set of built-in templates
 
- Parameters
 - 
  
     | norm_feature | character normalization feature  | 
    | [out] | char_norm_array | place to put results of size unicharset.size()  | 
  
   
Definition at line 62 of file float2int.cpp.
   65     if (i < PreTrainedTemplates->NumClasses) {
 
 
 
 
◆ ComputeIntFeatures()
This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.
Globals:
- Parameters
 - 
  
     | Features | floating point pico-features to be converted  | 
    | [out] | IntFeatures | array to put converted features into  | 
  
   
Definition at line 90 of file float2int.cpp.
   99   for (
int Fid = 0; Fid < Features->
NumFeatures; Fid++) {
 
  108     IntFeatures[Fid].CP_misses = 0;
 
 
 
 
◆ ComputeNormMatch()
      
        
          | float tesseract::Classify::ComputeNormMatch  | 
          ( | 
          CLASS_ID  | 
          ClassId,  | 
        
        
           | 
           | 
          const FEATURE_STRUCT &  | 
          feature,  | 
        
        
           | 
           | 
          bool  | 
          DebugMatch  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match. 
- Parameters
 - 
  
    | ClassId | id of class to match against  | 
    | feature | character normalization feature  | 
    | DebugMatch | controls dump of debug info | 
  
   
Globals: NormProtos character normalization prototypes
- Returns
 - Best match rating for Feature against protos of ClassId. 
 
Definition at line 93 of file normmatch.cpp.
  117     return (1.0 - NormEvidenceOf(Match));
 
  133       tprintf(
"YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
 
  140       tprintf(
"Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
 
  147       tprintf(
"Width: Proto=%g, Delta=%g, Var=%g\n",
 
  155       tprintf(
"Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
 
  157               NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
 
  160     if (Match < BestMatch)
 
  165   return 1.0 - NormEvidenceOf(BestMatch);
 
 
 
 
◆ ConvertMatchesToChoices()
      
        
          | void tesseract::Classify::ConvertMatchesToChoices  | 
          ( | 
          const DENORM &  | 
          denorm,  | 
        
        
           | 
           | 
          const TBOX &  | 
          box,  | 
        
        
           | 
           | 
          ADAPT_RESULTS *  | 
          Results,  | 
        
        
           | 
           | 
          BLOB_CHOICE_LIST *  | 
          Choices  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly. 
Definition at line 1413 of file adaptmatch.cpp.
 1416   assert(Choices != 
nullptr);
 
 1419   BLOB_CHOICE_IT temp_it;
 
 1420   bool contains_nonfrag = 
false;
 
 1421   temp_it.set_to_list(Choices);
 
 1422   int choices_length = 0;
 
 1435   float best_certainty = -FLT_MAX;
 
 1436   for (
int i = 0; i < Results->
match.
size(); i++) {
 
 1438     bool adapted = result.
adapted;
 
 1440     if (temp_it.length()+1 == max_matches &&
 
 1441         !contains_nonfrag && current_is_frag) {
 
 1453       Rating = Certainty = (1.0f - result.
rating);
 
 1462     if (Certainty > best_certainty) {
 
 1464     } 
else if (adapted &&
 
 1469     float min_xheight, max_xheight, yshift;
 
 1471                         &min_xheight, &max_xheight, &yshift);
 
 1475                         min_xheight, max_xheight, yshift,
 
 1479     temp_it.add_to_end(choice);
 
 1480     contains_nonfrag |= !current_is_frag;  
 
 1482     if (choices_length >= max_matches) 
break;
 
 
 
 
◆ ConvertProto()
      
        
          | void tesseract::Classify::ConvertProto  | 
          ( | 
          PROTO  | 
          Proto,  | 
        
        
           | 
           | 
          int  | 
          ProtoId,  | 
        
        
           | 
           | 
          INT_CLASS  | 
          Class  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine converts Proto to integer format and installs it as ProtoId in Class. 
- Parameters
 - 
  
    | Proto | floating-pt proto to be converted to integer format  | 
    | ProtoId | id of proto  | 
    | Class | integer class to add converted proto to  | 
  
   
Definition at line 487 of file intproto.cpp.
  492   assert(ProtoId < Class->NumProtos);
 
  496   Param = Proto->
A * 128;
 
  499   Param = -Proto->
B * 256;
 
  502   Param = Proto->
C * 128;
 
  505   Param = Proto->
Angle * 256;
 
  506   if (Param < 0 || Param >= 256)
 
  509     P->
Angle = static_cast<uint8_t>(Param);
 
  515     cprintf(
"Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
 
 
 
 
◆ CreateIntTemplates()
This routine converts from the old floating point format to the new integer format. 
- Parameters
 - 
  
    | FloatProtos | prototypes in old floating pt format  | 
    | target_unicharset | the UNICHARSET to use  | 
  
   
- Returns
 - New set of training templates in integer format. 
 
- Note
 - Globals: none 
 
Definition at line 526 of file intproto.cpp.
  539   for (ClassId = 0; ClassId < target_unicharset.
size(); ClassId++) {
 
  540     FClass = &(FloatProtos[ClassId]);
 
  542         strcmp(target_unicharset.
id_to_unichar(ClassId), 
" ") != 0) {
 
  543       cprintf(
"Warning: no protos/configs for %s in CreateIntTemplates()\n",
 
  551     for (
int i = 0; i < fs.
size; ++i) {
 
  562     for (ProtoId = 0; ProtoId < FClass->
NumProtos; ProtoId++) {
 
  570     for (ConfigId = 0; ConfigId < FClass->
NumConfigs; ConfigId++) {
 
  575   return (IntTemplates);
 
 
 
 
◆ DebugAdaptiveClassifier()
      
        
          | void tesseract::Classify::DebugAdaptiveClassifier  | 
          ( | 
          TBLOB *  | 
          blob,  | 
        
        
           | 
           | 
          ADAPT_RESULTS *  | 
          Results  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
- Parameters
 - 
  
    | blob | blob whose classification is being debugged  | 
    | Results | results of match being debugged | 
  
   
Globals: none 
Definition at line 1497 of file adaptmatch.cpp.
 1499   if (static_classifier_ == 
nullptr) 
return;
 
 1504   if (
sample == 
nullptr) 
return;
 
 
 
 
◆ DisplayAdaptedChar()
Definition at line 946 of file adaptmatch.cpp.
  947 #ifndef GRAPHICS_DISABLED 
  953   if (
sample == 
nullptr) 
return;
 
  957             bl_features.
size(), &bl_features[0],
 
  960   tprintf(
"Best match to temp config %d = %4.1f%%.\n",
 
  961           int_result.config, int_result.rating * 100.0);
 
  964     ConfigMask = 1 << int_result.config;
 
  967               bl_features.
size(), &bl_features[0],
 
 
 
 
◆ DoAdaptiveMatch()
This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.
- Parameters
 - 
  
    | Blob | blob to be classified  | 
    | Results | place to put match results | 
  
   
Globals:
- PreTrainedTemplates built-in training templates
 
- AdaptedTemplates templates adapted for this page
 
- matcher_reliable_adaptive_result rating limit for a great match 
 
Definition at line 1530 of file adaptmatch.cpp.
 1538   if (
sample == 
nullptr) 
return;
 
 1542   if (static_classifier_ == 
nullptr) {
 
 
 
 
◆ EndAdaptiveClassifier()
      
        
          | void tesseract::Classify::EndAdaptiveClassifier  | 
          ( | 
           | ) | 
           | 
        
      
 
This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.
Globals:
Definition at line 459 of file adaptmatch.cpp.
  466     File = fopen (Filename.
c_str(), 
"wb");
 
  468       cprintf (
"Unable to save adapted templates to %s!\n", Filename.
c_str());
 
  470       cprintf (
"\nSaving adapted templates to %s ...", Filename.
c_str());
 
  505   delete static_classifier_;
 
  506   static_classifier_ = 
nullptr;
 
 
 
 
◆ ExpandShapesAndApplyCorrections()
      
        
          | void tesseract::Classify::ExpandShapesAndApplyCorrections  | 
          ( | 
          ADAPT_CLASS *  | 
          classes,  | 
        
        
           | 
           | 
          bool  | 
          debug,  | 
        
        
           | 
           | 
          int  | 
          class_id,  | 
        
        
           | 
           | 
          int  | 
          bottom,  | 
        
        
           | 
           | 
          int  | 
          top,  | 
        
        
           | 
           | 
          float  | 
          cp_rating,  | 
        
        
           | 
           | 
          int  | 
          blob_length,  | 
        
        
           | 
           | 
          int  | 
          matcher_multiplier,  | 
        
        
           | 
           | 
          const uint8_t *  | 
          cn_factors,  | 
        
        
           | 
           | 
          UnicharRating *  | 
          int_result,  | 
        
        
           | 
           | 
          ADAPT_RESULTS *  | 
          final_results  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
Definition at line 1128 of file adaptmatch.cpp.
 1133   if (classes != 
nullptr) {
 
 1136     for (
int f = 0; f < int_result->
fonts.size(); ++f) {
 
 1137       int_result->
fonts[f].fontinfo_id =
 
 1143     for (
int f = 0; f < int_result->
fonts.size(); ++f) {
 
 1144       int_result->
fonts[f].fontinfo_id =
 
 1146                                           int_result->
fonts[f].fontinfo_id);
 
 1157       for (
int f = 0; f < int_result->
fonts.size(); ++f) {
 
 1158         int shape_id = int_result->
fonts[f].fontinfo_id;
 
 1160         for (
int c = 0; c < shape.size(); ++c) {
 
 1161           int unichar_id = shape[c].unichar_id;
 
 1165           for (r = 0; r < mapped_results.
size() &&
 
 1166                mapped_results[r].unichar_id != unichar_id; ++r) {}
 
 1167           if (r == mapped_results.
size()) {
 
 1169             mapped_results[r].unichar_id = unichar_id;
 
 1170             mapped_results[r].fonts.
truncate(0);
 
 1172           for (
int i = 0; i < shape[c].font_ids.size(); ++i) {
 
 1178       for (
int m = 0; m < mapped_results.
size(); ++m) {
 
 1179         mapped_results[m].rating =
 
 1181                                    cp_rating, int_result->
rating,
 
 1183                                    blob_length, matcher_multiplier, cn_factors);
 
 1193                                                 bottom, top, blob_length,
 
 1194                                                 matcher_multiplier, cn_factors);
 
 
 
 
◆ ExtractFeatures()
Definition at line 440 of file intfx.cpp.
  447   DENORM bl_denorm, cn_denorm;
 
  449                                         &bl_denorm, &cn_denorm, results);
 
  450   if (outline_cn_counts != 
nullptr)
 
  455     EDGEPT* loop_pt = ol->FindBestStartPt();
 
  457     if (pt == 
nullptr) 
continue;
 
  463         last_pt = last_pt->
next;
 
  464       } 
while (last_pt != loop_pt && !last_pt->
IsHidden() &&
 
  466       last_pt = last_pt->
prev;
 
  474     } 
while ((pt = pt->
next) != loop_pt);
 
  475     if (outline_cn_counts != 
nullptr)
 
 
 
 
◆ ExtractIntCNFeatures()
- Parameters
 - 
  
    | blob | blob to extract features from  | 
    | fx_info |  | 
  
   
- Returns
 - Integer character-normalized features for blob. 
 
Definition at line 216 of file picofeat.cpp.
  222       blob, 
false, &local_fx_info, &bl_features);
 
  223   if (
sample == 
nullptr) 
return nullptr;
 
  225   uint32_t num_features = 
sample->num_features();
 
  228   for (uint32_t f = 0; f < num_features; ++f) {
 
 
 
 
◆ ExtractIntGeoFeatures()
- Parameters
 - 
  
    | blob | blob to extract features from  | 
    | fx_info |  | 
  
   
- Returns
 - Geometric (top/bottom/width) features for blob. 
 
Definition at line 246 of file picofeat.cpp.
  252       blob, 
false, &local_fx_info, &bl_features);
 
  253   if (
sample == 
nullptr) 
return nullptr;
 
 
 
 
◆ ExtractOutlineFeatures()
Convert each segment in the outline to a feature and return the features. 
- Parameters
 - 
  
    | Blob | blob to extract pico-features from  | 
  
   
- Returns
 - Outline-features for Blob. 
 
- Note
 - Globals: none 
 
Definition at line 54 of file outfeat.cpp.
 
 
◆ ExtractPicoFeatures()
Operation: Dummy for now.
Globals:
- classify_norm_method normalization method currently specified 
- Parameters
 - 
  
    | Blob | blob to extract pico-features from  | 
  
   
- Returns
 - Pico-features for Blob. 
 
 
Definition at line 62 of file picofeat.cpp.
   65   LIST RemainingOutlines;
 
   73   RemainingOutlines = Outlines;
 
 
 
 
◆ FreeNormProtos()
      
        
          | void tesseract::Classify::FreeNormProtos  | 
          ( | 
           | ) | 
           | 
        
      
 
 
◆ get_fontinfo_table() [1/2]
◆ get_fontinfo_table() [2/2]
◆ get_fontset_table()
◆ GetAdaptiveFeatures()
This routine sets up the feature extractor to extract baseline normalized pico-features.
The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.
Globals: none 
- Parameters
 - 
  
     | Blob | blob to extract features from  | 
    | [out] | IntFeatures | array to fill with integer features  | 
    | [out] | FloatFeatures | place to return actual floating-pt features | 
  
   
- Returns
 - Number of pico-features returned (0 if an error occurred) 
 
Definition at line 786 of file adaptmatch.cpp.
  802   *FloatFeatures = Features;
 
 
 
 
◆ GetAmbiguities()
This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.
- Parameters
 - 
  
    | Blob | blob to get classification ambiguities for  | 
    | CorrectClass | correct class for Blob | 
  
   
Globals:
- CurrentRatings used by qsort compare routine
 
- PreTrainedTemplates built-in templates
 
- Returns
 - String containing all possible ambiguous classes. 
 
Definition at line 1592 of file adaptmatch.cpp.
 1598   Results->Initialize();
 
 1616   Ambiguities = 
new UNICHAR_ID[Results->match.size() + 1];
 
 1617   if (Results->match.size() > 1 ||
 
 1618       (Results->match.size() == 1 &&
 
 1619           Results->match[0].unichar_id != CorrectClass)) {
 
 1620     for (i = 0; i < Results->match.size(); i++)
 
 1621       Ambiguities[i] = Results->match[i].unichar_id;
 
 1622     Ambiguities[i] = -1;
 
 1624     Ambiguities[0] = -1;
 
 
 
 
◆ GetCharNormFeature()
      
        
          | int tesseract::Classify::GetCharNormFeature  | 
          ( | 
          const INT_FX_RESULT_STRUCT &  | 
          fx_info,  | 
        
        
           | 
           | 
          INT_TEMPLATES  | 
          templates,  | 
        
        
           | 
           | 
          uint8_t *  | 
          pruner_norm_array,  | 
        
        
           | 
           | 
          uint8_t *  | 
          char_norm_array  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.
The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.
It then copies the char norm features into the IntFeatures array provided by the caller.
- Parameters
 - 
  
    | templates | used to compute char norm adjustments  | 
    | pruner_norm_array | Array of factors from blob normalization process  | 
    | char_norm_array | array to fill with dummy char norm adjustments  | 
    | fx_info | Globals: | 
  
   
- Returns
 - Number of features extracted or 0 if an error occurred. 
 
Definition at line 1678 of file adaptmatch.cpp.
 
 
◆ GetClassToDebug()
      
        
          | CLASS_ID tesseract::Classify::GetClassToDebug  | 
          ( | 
          const char *  | 
          Prompt,  | 
        
        
           | 
           | 
          bool *  | 
          adaptive_on,  | 
        
        
           | 
           | 
          bool *  | 
          pretrained_on,  | 
        
        
           | 
           | 
          int *  | 
          shape_id  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine prompts the user with Prompt and waits for the user to enter something in the debug window. 
- Parameters
 - 
  
    | Prompt | prompt to print while waiting for input from window  | 
    | adaptive_on |  | 
    | pretrained_on |  | 
    | shape_id |  | 
  
   
- Returns
 - Character entered in the debug window. 
 
- Note
 - Globals: none 
 
Definition at line 1256 of file intproto.cpp.
 1262   int unichar_id = INVALID_UNICHAR_ID;
 
 1271           *adaptive_on = 
false;
 
 1272           *pretrained_on = 
true;
 
 1273           if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
 
 1277             tprintf(
"Shape %d, first unichar=%d, font=%d\n",
 
 1278                     *shape_id, unichar_id, font_id);
 
 1283           tprintf(
"No shape table loaded!\n");
 
 1289             *adaptive_on = 
true;
 
 1290             *pretrained_on = 
false;
 
 1293             *adaptive_on = 
false;
 
 1294             *pretrained_on = 
true;
 
 1296             *adaptive_on = 
true;
 
 1297             *pretrained_on = 
true;
 
 1309           tprintf(
"Char class '%s' not found in unicharset",
 
 
 
 
◆ getDict()
  
  
      
        
          | virtual Dict& tesseract::Classify::getDict  | 
          ( | 
           | ) | 
           | 
         
       
   | 
  
inlinevirtual   | 
  
 
 
◆ GetFontinfoId()
      
        
          | int tesseract::Classify::GetFontinfoId  | 
          ( | 
          ADAPT_CLASS  | 
          Class,  | 
        
        
           | 
           | 
          uint8_t  | 
          ConfigId  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
 
◆ InitAdaptedClass()
This routine creates a new adapted class and uses Blob as the model for the first config in that class.
- Parameters
 - 
  
    | Blob | blob to model new class after  | 
    | ClassId | id of the class to be initialized  | 
    | FontinfoId | font information inferred from pre-trained templates  | 
    | Class | adapted class to be initialized  | 
    | Templates | adapted templates to add new class to | 
  
   
Globals:
Definition at line 693 of file adaptmatch.cpp.
  720     BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
 
  724   for (Fid = 0; Fid < Features->
NumFeatures; Fid++) {
 
  730     Proto = &(TempProto->
Proto);
 
  756     tprintf(
"Added new class '%s' with class id %d and %d protos.\n",
 
 
 
 
◆ InitAdaptiveClassifier()
This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be loaded. Should only be set to true if the necessary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates 
Definition at line 527 of file adaptmatch.cpp.
  543         tprintf(
"Error loading shape table!\n");
 
  554     static_classifier_ = 
new TessClassifier(
false, 
this);
 
  567   for (uint16_t& BaselineCutoff : BaselineCutoffs) {
 
  577     if (!fp.Open(Filename.
c_str(), 
nullptr)) {
 
  580       cprintf(
"\nReading pre-adapted templates from %s ...\n",
 
  588         BaselineCutoffs[i] = CharNormCutoffs[i];
 
 
 
 
◆ LargeSpeckle()
      
        
          | bool tesseract::Classify::LargeSpeckle  | 
          ( | 
          const TBLOB &  | 
          blob | ) | 
           | 
        
      
 
 
◆ LearnBlob()
Definition at line 70 of file blobclass.cpp.
   82     tr_file_data_ += 
"\n";
 
   83     tr_file_data_ += fontname;
 
   85     tr_file_data_ += blob_text;
 
   86     tr_file_data_ += 
"\n";
 
   91     tprintf(
"Blob learned was invalid!\n");
 
 
 
 
◆ LearnPieces()
      
        
          | void tesseract::Classify::LearnPieces  | 
          ( | 
          const char *  | 
          fontname,  | 
        
        
           | 
           | 
          int  | 
          start,  | 
        
        
           | 
           | 
          int  | 
          length,  | 
        
        
           | 
           | 
          float  | 
          threshold,  | 
        
        
           | 
           | 
          CharSegmentationType  | 
          segmentation,  | 
        
        
           | 
           | 
          const char *  | 
          correct_text,  | 
        
        
           | 
           | 
          WERD_RES *  | 
          word  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
Definition at line 374 of file adaptmatch.cpp.
  390   if (rotated_blob == 
nullptr)
 
  393   #ifndef GRAPHICS_DISABLED 
  399     learn_debug_win_->
Update();
 
  403     ASSERT_HOST(learn_fragments_debug_win_ != 
nullptr);  
 
  404     blob->
plot(learn_fragments_debug_win_,
 
  406     learn_fragments_debug_win_->
Update();
 
  408   #endif  // GRAPHICS_DISABLED 
  410   if (fontname != 
nullptr) {
 
  414     DENORM bl_denorm, cn_denorm;
 
  417                      &bl_denorm, &cn_denorm, &fx_info);
 
  418     LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
 
  421     int font_id = word->
fontinfo != 
nullptr 
  425       tprintf(
"Adapting to char = %s, thr= %g font_id= %d\n",
 
  433       AdaptToChar(rotated_blob, class_id, font_id, threshold,
 
  437     tprintf(
"Can't adapt to %s not in unicharset\n", correct_text);
 
  439   if (rotated_blob != blob) {
 
 
 
 
◆ LearnWord()
      
        
          | void tesseract::Classify::LearnWord  | 
          ( | 
          const char *  | 
          fontname,  | 
        
        
           | 
           | 
          WERD_RES *  | 
          word  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
Definition at line 250 of file adaptmatch.cpp.
  252   if (word_len == 0) 
return;
 
  254   float* thresholds = 
nullptr;
 
  255   if (fontname == 
nullptr) {
 
  261       tprintf(
"\n\nAdapting to word = %s\n",
 
  263     thresholds = 
new float[word_len];
 
  271   #ifndef GRAPHICS_DISABLED 
  273     if (learn_fragmented_word_debug_win_ != 
nullptr) {
 
  283   #endif  // GRAPHICS_DISABLED 
  285   for (
int ch = 0; ch < word_len; ++ch) {
 
  290       float threshold = thresholds != 
nullptr ? thresholds[ch] : 0.0f;
 
  299         bool garbage = 
false;
 
  301         for (frag = 0; frag < word->
best_state[ch]; ++frag) {
 
  312             for (frag = 0; frag < word->
best_state[ch]; ++frag) {
 
  317                   tokens[0].c_str(), frag, word->
best_state[ch],
 
  321               for (
int i = 0; i < tokens.
size(); i++) {
 
  322                 full_string += tokens[i];
 
  323                 if (i != tokens.
size() - 1)
 
  326               LearnPieces(fontname, start_blob + frag, 1, threshold,
 
  362   delete [] thresholds;
 
 
 
 
◆ LooksLikeGarbage()
      
        
          | bool tesseract::Classify::LooksLikeGarbage  | 
          ( | 
          TBLOB *  | 
          blob | ) | 
           | 
        
      
 
Definition at line 1633 of file adaptmatch.cpp.
 1634   auto *ratings = 
new BLOB_CHOICE_LIST();
 
 1636   BLOB_CHOICE_IT ratings_it(ratings);
 
 1642   for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
 
 1643        ratings_it.forward()) {
 
 1647     float certainty = ratings_it.data()->certainty();
 
 
 
 
◆ MakeNewTemporaryConfig()
- Parameters
 - 
  
    | Templates | adapted templates to add new config to  | 
    | ClassId | class id to associate with new config  | 
    | FontinfoId | font information inferred from pre-trained templates  | 
    | NumFeatures | number of features in IntFeatures  | 
    | Features | features describing model for new config  | 
    | FloatFeatures | floating-pt representation of features | 
  
   
- Returns
 - The id of the new config created, a negative integer in case of error. 
 
Definition at line 1740 of file adaptmatch.cpp.
 1752   int MaxProtoId, OldMaxProtoId;
 
 1764   Class = Templates->
Class[ClassId];
 
 1767     ++NumAdaptationsFailed;
 
 1769       cprintf(
"Cannot make new temporary config: maximum number exceeded.\n");
 
 1776                                     NumFeatures, Features,
 
 1782   for (i = 0; i < NumOldProtos; i++)
 
 1786                                        NumFeatures, Features,
 
 1794     ++NumAdaptationsFailed;
 
 1796       cprintf(
"Cannot make new temp protos: maximum number exceeded.\n");
 
 1807     cprintf(
"Making new temp config %d fontinfo id %d" 
 1808             " using %d old and %d new protos.\n",
 
 1809             ConfigId, 
Config->FontinfoId,
 
 1810             NumOldProtos, MaxProtoId - OldMaxProtoId);
 
 
 
 
◆ MakeNewTempProtos()
This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.
- Parameters
 - 
  
    | Features | floating-pt features describing new character  | 
    | NumBadFeat | number of bad features to turn into protos  | 
    | BadFeat | feature id's of bad features  | 
    | IClass | integer class templates to add new protos to  | 
    | Class | adapted class templates to add new protos to  | 
    | TempProtoMask | proto mask to add new protos to | 
  
   
Globals: none
- Returns
 - Max proto id in class after all protos have been added. 
 
Definition at line 1834 of file adaptmatch.cpp.
 1846   float X1, X2, Y1, Y2;
 
 1847   float A1, A2, AngleDelta;
 
 1848   float SegmentLength;
 
 1851   for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
 
 1852        ProtoStart < LastBad; ProtoStart = ProtoEnd) {
 
 1853     F1 = Features->
Features[*ProtoStart];
 
 1858     for (ProtoEnd = ProtoStart + 1,
 
 1862       F2 = Features->
Features[*ProtoEnd];
 
 1867       AngleDelta = fabs(A1 - A2);
 
 1868       if (AngleDelta > 0.5)
 
 1869         AngleDelta = 1.0 - AngleDelta;
 
 1872           fabs(X1 - X2) > SegmentLength ||
 
 1873           fabs(Y1 - Y2) > SegmentLength)
 
 1877     F2 = Features->
Features[*(ProtoEnd - 1)];
 
 1887     Proto = &(TempProto->
Proto);
 
 1892     Proto->
Length = SegmentLength;
 
 1894     Proto->
X = (X1 + X2) / 2.0;
 
 
 
 
◆ MakePermanent()
- Parameters
 - 
  
    | Templates | current set of adaptive templates  | 
    | ClassId | class containing config to be made permanent  | 
    | ConfigId | config to be made permanent  | 
    | Blob | current blob being adapted to | 
  
   
Globals: none 
Definition at line 1920 of file adaptmatch.cpp.
 1929   Class = Templates->
Class[ClassId];
 
 1940   Perm->Ambigs = Ambigs;
 
 1941   Perm->FontinfoId = 
Config->FontinfoId;
 
 1955     tprintf(
"Making config %d for %s (ClassId %d) permanent:" 
 1956             " fontinfo id %d, ambiguities '",
 
 1957             ConfigId, 
getDict().getUnicharset().debug_str(ClassId).c_str(),
 
 1960         *AmbigsPointer >= 0; ++AmbigsPointer)
 
 
 
 
◆ MasterMatcher()
Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure. 
Definition at line 1088 of file adaptmatch.cpp.
 1098   int top = blob_box.
top();
 
 1099   int bottom = blob_box.
bottom();
 
 1101   for (
int c = 0; c < results.
size(); c++) {
 
 1102     CLASS_ID class_id = results[c].Class;
 
 1111               num_features, features,
 
 1118                                     matcher_multiplier, norm_factors,
 
 1119                                     &int_result, final_results);
 
 
 
 
◆ NewAdaptedTemplates()
      
        
          | ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates  | 
          ( | 
          bool  | 
          InitFromUnicharset | ) | 
           | 
        
      
 
Allocates memory for adapted templates. each char in unicharset to the newly created templates
- Parameters
 - 
  
    | InitFromUnicharset | if true, add an empty class for  | 
  
   
- Returns
 - Ptr to new adapted templates.
 
- Note
 - Globals: none 
 
Definition at line 151 of file adaptive.cpp.
  162     Templates->
Class[i] = 
nullptr;
 
 
 
 
◆ NormalizeOutlines()
      
        
          | void tesseract::Classify::NormalizeOutlines  | 
          ( | 
          LIST  | 
          Outlines,  | 
        
        
           | 
           | 
          float *  | 
          XScale,  | 
        
        
           | 
           | 
          float *  | 
          YScale  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system. Outlines are changed and XScale and YScale are updated.
Globals:
- classify_norm_method method being used for normalization
 
- classify_char_norm_range map radius of gyration to this value 
- Parameters
 - 
  
    | Outlines | list of outlines to be normalized  | 
    | XScale | x-direction scale factor used by routine  | 
    | YScale | y-direction scale factor used by routine  | 
  
   
 
Definition at line 275 of file mfoutline.cpp.
  283       ASSERT_HOST(!
"How did NormalizeOutlines get called in character mode?");
 
 
 
 
◆ PrintAdaptedTemplates()
      
        
          | void tesseract::Classify::PrintAdaptedTemplates  | 
          ( | 
          FILE *  | 
          File,  | 
        
        
           | 
           | 
          ADAPT_TEMPLATES  | 
          Templates  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine prints a summary of the adapted templates in Templates to File.
- Parameters
 - 
  
    | File | open text file to print Templates to  | 
    | Templates | adapted templates to print to File | 
  
   
- Note
 - Globals: none 
 
Definition at line 244 of file adaptive.cpp.
  248   fprintf (File, 
"\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
 
  249   fprintf (File, 
"Num classes = %d;  Num permanent classes = %d\n\n",
 
  251   fprintf (File, 
"   Id  NC NPC  NP NPP\n");
 
  252   fprintf (File, 
"------------------------\n");
 
  254   for (
int i = 0; i < (Templates->
Templates)->NumClasses; i++) {
 
  256     AClass = Templates->
Class[i];
 
  258       fprintf (File, 
"%5d  %s %3d %3d %3d %3d\n",
 
  265   fprintf (File, 
"\n");
 
 
 
 
◆ PrintAdaptiveMatchResults()
      
        
          | void tesseract::Classify::PrintAdaptiveMatchResults  | 
          ( | 
          const ADAPT_RESULTS &  | 
          results | ) | 
           | 
        
      
 
This routine writes the matches in Results to File.
- Parameters
 - 
  
    | results | match results to write to File | 
  
   
Globals: none 
Definition at line 2013 of file adaptmatch.cpp.
 2014   for (
int i = 0; i < results.
match.
size(); ++i) {
 
 2016     results.
match[i].Print();
 
 
 
 
◆ PruneClasses()
Runs the class pruner from int_templates on the given features, returning the number of classes output in results. 
- Parameters
 - 
  
    | int_templates | Class pruner tables  | 
    | num_features | Number of features in blob  | 
    | features | Array of features  | 
    | normalization_factors | Array of fudge factors from blob normalization process (by CLASS_INDEX)  | 
    | expected_num_features | Array of expected number of features for each class (by CLASS_INDEX)  | 
    | results | Sorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses.  | 
    | keep_this |  | 
  
   
Definition at line 451 of file intmatcher.cpp.
  458   ClassPruner pruner(int_templates->
NumClasses);
 
  460   pruner.ComputeScores(int_templates, num_features, features);
 
  462   pruner.AdjustForExpectedNumFeatures(expected_num_features,
 
  472   if (normalization_factors != 
nullptr) {
 
  474                                normalization_factors);
 
  476     pruner.NoNormalization();
 
  483     pruner.DebugMatch(*
this, int_templates, features);
 
  486     pruner.SummarizeResult(*
this, int_templates, expected_num_features,
 
  488                            normalization_factors);
 
  491   return pruner.SetupResults(results);
 
 
 
 
◆ ReadAdaptedTemplates()
Read a set of adapted templates from file and return a ptr to the templates.
- Parameters
 - 
  
    | fp | open text file to read adapted templates from  | 
  
   
- Returns
 - Ptr to adapted templates read from file.
 
- Note
 - Globals: none 
 
Definition at line 332 of file adaptive.cpp.
  343   for (
int i = 0; i < (Templates->
Templates)->NumClasses; i++) {
 
 
 
 
◆ ReadIntTemplates()
This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format. 
- Parameters
 - 
  
    | fp | open file to read templates from  | 
  
   
- Returns
 - Pointer to integer templates read from File. 
 
- Note
 - Globals: none 
 
Definition at line 717 of file intproto.cpp.
  719   int i, j, w, x, y, z;
 
  729   int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
 
  733   auto **TempClassPruner =
 
  735   uint32_t SetBitsForMask =           
 
  737   uint32_t Mask, NewMask, ClassBits;
 
  744   if (fp->FReadEndian(&unicharset_size, 
sizeof(unicharset_size), 1) != 1)
 
  745     tprintf(
"Bad read of inttemp!\n");
 
  750     tprintf(
"Bad read of inttemp!\n");
 
  756       tprintf(
"Bad read of inttemp!\n");
 
  759   if (version_id < 3) {
 
  764   if (version_id < 2) {
 
  765     if (fp->FReadEndian(IndexFor, 
sizeof(IndexFor[0]), unicharset_size) !=
 
  767       tprintf(
"Bad read of inttemp!\n");
 
  769     if (fp->FReadEndian(ClassIdFor, 
sizeof(ClassIdFor[0]),
 
  771       tprintf(
"Bad read of inttemp!\n");
 
  776   const int kNumBuckets =
 
  780     if (fp->FReadEndian(Pruner, 
sizeof(Pruner->
p[0][0][0][0]), kNumBuckets) !=
 
  782       tprintf(
"Bad read of inttemp!\n");
 
  784     if (version_id < 2) {
 
  785       TempClassPruner[i] = Pruner;
 
  792   if (version_id < 2) {
 
  796       if (ClassIdFor[i] > max_class_id)
 
  797         max_class_id = ClassIdFor[i];
 
  810               if (TempClassPruner[i]->p[x][y][z][w] == 0)
 
  814                 if (bit_number > last_cp_bit_number)
 
  818                 Mask = SetBitsForMask << b;
 
  819                 ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
 
  826                   ClassBits <<= (new_b - b);
 
  828                   ClassBits >>= (b - new_b);
 
  832                 NewMask = SetBitsForMask << new_b;
 
  833                 Templates->
ClassPruners[new_i]->
p[x][y][z][new_w] &= ~NewMask;
 
  834                 Templates->
ClassPruners[new_i]->
p[x][y][z][new_w] |= ClassBits;
 
  839       delete TempClassPruner[i];
 
  850       tprintf(
"Bad read of inttemp!\n");
 
  851     if (version_id == 0) {
 
  853       for (j = 0; j < 5; ++j) {
 
  855         if (fp->FRead(&junk, 
sizeof(junk), 1) != 1)
 
  856           tprintf(
"Bad read of inttemp!\n");
 
  859     int num_configs = version_id < 4 ? MaxNumConfigs : Class->
NumConfigs;
 
  861     if (fp->FReadEndian(Class->
ConfigLengths, 
sizeof(uint16_t), num_configs) !=
 
  863       tprintf(
"Bad read of inttemp!\n");
 
  865     if (version_id < 2) {
 
  877         tprintf(
"Bad read of inttemp!\n");
 
  887                           num_buckets) != num_buckets)
 
  888         tprintf(
"Bad read of inttemp!\n");
 
  890         if (fp->FRead(&ProtoSet->
Protos[x].
A, 
sizeof(ProtoSet->
Protos[x].
A),
 
  898           tprintf(
"Bad read of inttemp!\n");
 
  901                             WerdsPerConfigVec) != WerdsPerConfigVec)
 
  902           cprintf(
"Bad read of inttemp!\n");
 
  906     if (version_id < 4) {
 
  913   if (version_id < 2) {
 
  921       if (i < Templates->NumClasses) {
 
  923           fprintf(stderr, 
"Non-contiguous class ids in inttemp\n");
 
  928           fprintf(stderr, 
"Class id %d exceeds NumClassesIn (Templates) %d\n",
 
  935   if (version_id >= 4) {
 
  936     using namespace std::placeholders; 
 
  938     if (version_id >= 5) {
 
  948   delete[] TempClassPruner;
 
 
 
 
◆ ReadNewCutoffs()
      
        
          | void tesseract::Classify::ReadNewCutoffs  | 
          ( | 
          TFile *  | 
          fp,  | 
        
        
           | 
           | 
          uint16_t *  | 
          Cutoffs  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
Open file, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value. 
- Parameters
 - 
  
    | fp | file containing cutoff definitions  | 
    | Cutoffs | array to put cutoffs into  | 
  
   
Definition at line 40 of file cutoffs.cpp.
   46       tprintf(
"Error during read of shapetable pffmtable!\n");
 
   52   const int kMaxLineSize = 100;
 
   53   char line[kMaxLineSize];
 
   54   while (fp->FGets(line, kMaxLineSize) != 
nullptr) {
 
   57     std::istringstream stream(line);
 
   58     stream >> Class >> Cutoff;
 
   62     if (Class.compare(
"NULL") == 0) {
 
   68     Cutoffs[ClassId] = Cutoff;
 
 
 
 
◆ ReadNormProtos()
This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File. 
- Parameters
 - 
  
    | fp | open text file to read normalization protos from Globals: none  | 
  
   
- Returns
 - Character normalization protos. 
 
Definition at line 189 of file normmatch.cpp.
  210   const int kMaxLineSize = 100;
 
  211   char line[kMaxLineSize];
 
  212   while (fp->FGets(line, kMaxLineSize) != 
nullptr) {
 
  213     std::istringstream stream(line);
 
  214     stream >> unichar >> NumProtos;
 
  221       for (i = 0; i < NumProtos; i++)
 
  225       tprintf(
"Error: unichar %s in normproto file is not in unichar set.\n",
 
  227       for (i = 0; i < NumProtos; i++)
 
 
 
 
◆ RefreshDebugWindow()
      
        
          | void tesseract::Classify::RefreshDebugWindow  | 
          ( | 
          ScrollView **  | 
          win,  | 
        
        
           | 
           | 
          const char *  | 
          msg,  | 
        
        
           | 
           | 
          int  | 
          y_offset,  | 
        
        
           | 
           | 
          const TBOX &  | 
          wbox  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
Definition at line 226 of file adaptmatch.cpp.
  228   #ifndef GRAPHICS_DISABLED 
  229   const int kSampleSpaceWidth = 500;
 
  230   if (*win == 
nullptr) {
 
  231     *win = 
new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
 
  232                           kSampleSpaceWidth * 2, 200, 
true);
 
  235   (*win)->Pen(64, 64, 64);
 
  240   (*win)->ZoomToRectangle(wbox.
left(), wbox.
top(),
 
  242   #endif  // GRAPHICS_DISABLED 
 
 
 
◆ RemoveBadMatches()
      
        
          | void tesseract::Classify::RemoveBadMatches  | 
          ( | 
          ADAPT_RESULTS *  | 
          Results | ) | 
           | 
        
      
 
This routine steps through each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.
- Parameters
 - 
  
    | Results | contains matches to be filtered | 
  
   
Globals:
- matcher_bad_match_pad defines a "bad match" 
 
Definition at line 2033 of file adaptmatch.cpp.
 2035   float BadMatchThreshold;
 
 2036   static const char* romans = 
"i v x I V X";
 
 2044     float scored_one = ScoredUnichar(unichar_id_one, *Results);
 
 2045     float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
 
 2047     for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
 
 2049       if (match.
rating >= BadMatchThreshold) {
 
 2054                    scored_one < BadMatchThreshold) {
 
 2055           Results->
match[Next].unichar_id = unichar_id_one;
 
 2057                    scored_zero < BadMatchThreshold) {
 
 2058           Results->
match[Next].unichar_id = unichar_id_zero;
 
 2060           Results->
match[Next].unichar_id = INVALID_UNICHAR_ID;  
 
 2062         if (Results->
match[Next].unichar_id != INVALID_UNICHAR_ID) {
 
 2063           if (NextGood == Next) {
 
 2066             Results->
match[NextGood++] = Results->
match[Next];
 
 2072     for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
 
 2073       if (Results->
match[Next].rating >= BadMatchThreshold) {
 
 2074         if (NextGood == Next) {
 
 2077           Results->
match[NextGood++] = Results->
match[Next];
 
 
 
 
◆ RemoveExtraPuncs()
      
        
          | void tesseract::Classify::RemoveExtraPuncs  | 
          ( | 
          ADAPT_RESULTS *  | 
          Results | ) | 
           | 
        
      
 
This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.
- Parameters
 - 
  
    | Results | contains matches to be filtered  | 
  
   
Definition at line 2093 of file adaptmatch.cpp.
 2098   static char punc_chars[] = 
". , ; : / ` ~ ' - = \\ | \" ! _ ^";
 
 2099   static char digit_chars[] = 
"0 1 2 3 4 5 6 7 8 9";
 
 2103   for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
 
 2106     if (strstr(punc_chars,
 
 2108       if (punc_count >= 2)
 
 2112       if (strstr(digit_chars,
 
 2114         if (digit_count >= 1)
 
 2120       if (NextGood == Next) {
 
 2123         Results->
match[NextGood++] = match;
 
 
 
 
◆ ResetAdaptiveClassifierInternal()
      
        
          | void tesseract::Classify::ResetAdaptiveClassifierInternal  | 
          ( | 
           | ) | 
           | 
        
      
 
Definition at line 598 of file adaptmatch.cpp.
  600     tprintf(
"Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
 
  601             NumAdaptationsFailed);
 
  608   NumAdaptationsFailed = 0;
 
 
 
 
◆ SetAdaptiveThreshold()
      
        
          | void tesseract::Classify::SetAdaptiveThreshold  | 
          ( | 
          float  | 
          Threshold | ) | 
           | 
        
      
 
This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.
- Parameters
 - 
  
    | Threshold | threshold for creating new templates | 
  
   
Globals:
- matcher_good_threshold default good match rating 
 
Definition at line 2141 of file adaptmatch.cpp.
 2144       ClipToRange<int>(255 * Threshold, 0, 255));
 
 2146       ClipToRange<int>(255 * Threshold, 0, 255));
 
 
 
 
◆ SetStaticClassifier()
      
        
          | void tesseract::Classify::SetStaticClassifier  | 
          ( | 
          ShapeClassifier *  | 
          static_classifier | ) | 
           | 
        
      
 
Definition at line 193 of file classify.cpp.
  194   delete static_classifier_;
 
  195   static_classifier_ = static_classifier;
 
 
 
 
◆ SettupPass1()
      
        
          | void tesseract::Classify::SettupPass1  | 
          ( | 
           | ) | 
           | 
        
      
 
This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).
- Note
 - this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.
 
Globals:
Definition at line 652 of file adaptmatch.cpp.
 
 
◆ SettupPass2()
      
        
          | void tesseract::Classify::SettupPass2  | 
          ( | 
           | ) | 
           | 
        
      
 
This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.
Globals:
Definition at line 669 of file adaptmatch.cpp.
 
 
◆ SetupBLCNDenorms()
Definition at line 127 of file intfx.cpp.
  132   FCOORD center, second_moments;
 
  134   if (fx_info != 
nullptr) {
 
  144                                 1.0f, 1.0f, 128.0f, 128.0f);
 
  146   if (nonlinear_norm) {
 
  154                               0.0f, 0.0f, x_coords, y_coords);
 
  157                                   center.
x(), center.
y(),
 
  158                                   51.2f / second_moments.
x(),
 
  159                                   51.2f / second_moments.
y(),
 
 
 
 
◆ shape_table()
  
  
      
        
          | const ShapeTable* tesseract::Classify::shape_table  | 
          ( | 
           | ) | 
           const | 
         
       
   | 
  
inline   | 
  
 
 
◆ ShapeIDToClassID()
      
        
          | int tesseract::Classify::ShapeIDToClassID  | 
          ( | 
          int  | 
          shape_id | ) | 
           const | 
        
      
 
Definition at line 2220 of file adaptmatch.cpp.
 2225     for (
int config = 0; config < fs.size; ++config) {
 
 2226       if (fs.configs[config] == shape_id)
 
 2230   tprintf(
"Shape %d not found\n", shape_id);
 
 
 
 
◆ ShowBestMatchFor()
      
        
          | void tesseract::Classify::ShowBestMatchFor  | 
          ( | 
          int  | 
          shape_id,  | 
        
        
           | 
           | 
          const INT_FEATURE_STRUCT *  | 
          features,  | 
        
        
           | 
           | 
          int  | 
          num_features  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine displays debug information for the best config of the given shape_id for the given set of features.
- Parameters
 - 
  
    | shape_id | classifier id to work with  | 
    | features | features of the unknown character  | 
    | num_features | Number of features in the features array.  | 
  
   
Definition at line 2159 of file adaptmatch.cpp.
 2162 #ifndef GRAPHICS_DISABLED 
 2163   uint32_t config_mask;
 
 2165     tprintf(
"No built-in templates for class/shape %d\n", shape_id);
 
 2168   if (num_features <= 0) {
 
 2169     tprintf(
"Illegal blob (char norm features)!\n");
 
 2176             num_features, features, &cn_result,
 
 2180   config_mask = 1 << cn_result.
config;
 
 2182   tprintf(
"Static Shape ID: %d\n", shape_id);
 
 2185             &config_mask, num_features, features, &cn_result,
 
 2189 #endif  // GRAPHICS_DISABLED 
 
 
 
◆ ShowMatchDisplay()
      
        
          | void tesseract::Classify::ShowMatchDisplay  | 
          ( | 
           | ) | 
           | 
        
      
 
This routine sends the shapes in the global display lists to the match debugger window.
Globals:
- FeatureShapes display list containing feature matches
 
- ProtoShapes display list containing proto matches 
 
Definition at line 962 of file intproto.cpp.
  965   if (ProtoDisplayWindow) {
 
  966     ProtoDisplayWindow->
Clear();
 
  968   if (FeatureDisplayWindow) {
 
  969     FeatureDisplayWindow->
Clear();
 
  976   if (ProtoDisplayWindow) {
 
  980   if (FeatureDisplayWindow) {
 
 
 
 
◆ StartBackupAdaptiveClassifier()
      
        
          | void tesseract::Classify::StartBackupAdaptiveClassifier  | 
          ( | 
           | ) | 
           | 
        
      
 
 
◆ SwitchAdaptiveClassifier()
      
        
          | void tesseract::Classify::SwitchAdaptiveClassifier  | 
          ( | 
           | ) | 
           | 
        
      
 
Definition at line 613 of file adaptmatch.cpp.
  619     tprintf(
"Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
 
  620             NumAdaptationsFailed);
 
  625   NumAdaptationsFailed = 0;
 
 
 
 
◆ TempConfigReliable()
Definition at line 2236 of file adaptmatch.cpp.
 2239     tprintf(
"NumTimesSeen for config of %s is %d\n",
 
 2240             getDict().getUnicharset().debug_str(class_id).c_str(),
 
 2252     int ambigs_size = (ambigs == 
nullptr) ? 0 : ambigs->
size();
 
 2253     for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
 
 2255       assert(ambig_class != 
nullptr);
 
 2260           tprintf(
"Ambig %s has not been seen enough times," 
 2261                   " not making config for %s permanent\n",
 
 2262                   getDict().getUnicharset().debug_str(
 
 2263                       (*ambigs)[ambig]).c_str(),
 
 2264                   getDict().getUnicharset().debug_str(class_id).c_str());
 
 
 
 
◆ UpdateAmbigsGroup()
      
        
          | void tesseract::Classify::UpdateAmbigsGroup  | 
          ( | 
          CLASS_ID  | 
          class_id,  | 
        
        
           | 
           | 
          TBLOB *  | 
          Blob  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
Definition at line 2273 of file adaptmatch.cpp.
 2276   int ambigs_size = (ambigs == 
nullptr) ? 0 : ambigs->
size();
 
 2278     tprintf(
"Running UpdateAmbigsGroup for %s class_id=%d\n",
 
 2279             getDict().getUnicharset().debug_str(class_id).c_str(), class_id);
 
 2281   for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
 
 2282     CLASS_ID ambig_class_id = (*ambigs)[ambig];
 
 2290           tprintf(
"Making config %d of %s permanent\n", cfg,
 
 2291                   getDict().getUnicharset().debug_str(
 
 2292                       ambig_class_id).c_str());
 
 
 
 
◆ WriteAdaptedTemplates()
      
        
          | void tesseract::Classify::WriteAdaptedTemplates  | 
          ( | 
          FILE *  | 
          File,  | 
        
        
           | 
           | 
          ADAPT_TEMPLATES  | 
          Templates  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine saves Templates to File in a binary format.
- Parameters
 - 
  
    | File | open text file to write Templates to  | 
    | Templates | set of adapted templates to write to File | 
  
   
- Note
 - Globals: none 
 
Definition at line 453 of file adaptive.cpp.
  463   for (i = 0; i < (Templates->
Templates)->NumClasses; i++) {
 
 
 
 
◆ WriteIntTemplates()
      
        
          | void tesseract::Classify::WriteIntTemplates  | 
          ( | 
          FILE *  | 
          File,  | 
        
        
           | 
           | 
          INT_TEMPLATES  | 
          Templates,  | 
        
        
           | 
           | 
          const UNICHARSET &  | 
          target_unicharset  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing. 
- Parameters
 - 
  
    | File | open file to write templates to  | 
    | Templates | templates to save into File  | 
    | target_unicharset | the UNICHARSET to use  | 
  
   
Definition at line 1017 of file intproto.cpp.
 1022   int unicharset_size = target_unicharset.
size();
 
 1023   int version_id = -5;  
 
 1025   if (Templates->
NumClasses != unicharset_size) {
 
 1026     cprintf(
"Warning: executing WriteIntTemplates() with %d classes in" 
 1027             " Templates, while target_unicharset size is %d\n",
 
 1032   fwrite(&unicharset_size, 
sizeof(unicharset_size), 1, File);
 
 1033   fwrite(&version_id, 
sizeof(version_id), 1, File);
 
 1044   for (i = 0; i < Templates->
NumClasses; i++) {
 
 1045     Class = Templates->
Class[i];
 
 1053       fwrite(&Class->
ConfigLengths[j], 
sizeof(uint16_t), 1, File);
 
 1067     fwrite(&Class->
font_set_id, 
sizeof(
int), 1, File);
 
 1071   using namespace std::placeholders; 
 
 
 
 
◆ WriteTRFile()
      
        
          | bool tesseract::Classify::WriteTRFile  | 
          ( | 
          const STRING &  | 
          filename | ) | 
           | 
        
      
 
Definition at line 98 of file blobclass.cpp.
  100   STRING tr_filename = filename + 
".tr";
 
  101   FILE* fp = fopen(tr_filename.
c_str(), 
"wb");
 
 
 
 
◆ AdaptedTemplates
◆ AllConfigsOff
      
        
          | BIT_VECTOR tesseract::Classify::AllConfigsOff = nullptr | 
        
      
 
 
◆ AllConfigsOn
      
        
          | BIT_VECTOR tesseract::Classify::AllConfigsOn = nullptr | 
        
      
 
 
◆ allow_blob_division
      
        
          | bool tesseract::Classify::allow_blob_division = true | 
        
      
 
"Use divisible blobs chopping" 
 
Definition at line 423 of file classify.h.
 
 
◆ AllProtosOn
      
        
          | BIT_VECTOR tesseract::Classify::AllProtosOn = nullptr | 
        
      
 
 
◆ BackupAdaptedTemplates
◆ certainty_scale
      
        
          | double tesseract::Classify::certainty_scale = 20.0 | 
        
      
 
"Certainty scaling factor" 
 
Definition at line 473 of file classify.h.
 
 
◆ classify_adapt_feature_threshold
      
        
          | int tesseract::Classify::classify_adapt_feature_threshold = 230 | 
        
      
 
"Threshold for good features during adaptive 0-255" 
 
Definition at line 483 of file classify.h.
 
 
◆ classify_adapt_proto_threshold
      
        
          | int tesseract::Classify::classify_adapt_proto_threshold = 230 | 
        
      
 
"Threshold for good protos during adaptive 0-255" 
 
Definition at line 481 of file classify.h.
 
 
◆ classify_adapted_pruning_factor
      
        
          | double tesseract::Classify::classify_adapted_pruning_factor = 2.5 | 
        
      
 
"Prune poor adapted results this much worse than best result" 
 
Definition at line 477 of file classify.h.
 
 
◆ classify_adapted_pruning_threshold
      
        
          | double tesseract::Classify::classify_adapted_pruning_threshold = -1.0 | 
        
      
 
"Threshold at which classify_adapted_pruning_factor starts" 
 
Definition at line 479 of file classify.h.
 
 
◆ classify_bln_numeric_mode
      
        
          | bool tesseract::Classify::classify_bln_numeric_mode = 0 | 
        
      
 
"Assume the input is numbers [0-9]." 
 
Definition at line 508 of file classify.h.
 
 
◆ classify_char_norm_range
      
        
          | double tesseract::Classify::classify_char_norm_range = 0.2 | 
        
      
 
"Character Normalization Range ..." 
 
Definition at line 436 of file classify.h.
 
 
◆ classify_character_fragments_garbage_certainty_threshold
      
        
          | double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0 | 
        
      
 
"Exclude fragments that do not match any whole character" " with at least this certainty" 
 
Definition at line 489 of file classify.h.
 
 
◆ classify_class_pruner_multiplier
      
        
          | int tesseract::Classify::classify_class_pruner_multiplier = 15 | 
        
      
 
"Class Pruner Multiplier 0-255:       " 
 
Definition at line 501 of file classify.h.
 
 
◆ classify_class_pruner_threshold
      
        
          | int tesseract::Classify::classify_class_pruner_threshold = 229 | 
        
      
 
"Class Pruner Threshold 0-255" 
 
Definition at line 499 of file classify.h.
 
 
◆ classify_cp_cutoff_strength
      
        
          | int tesseract::Classify::classify_cp_cutoff_strength = 7 | 
        
      
 
"Class Pruner CutoffStrength:         " 
 
Definition at line 503 of file classify.h.
 
 
◆ classify_debug_character_fragments
      
        
          | bool tesseract::Classify::classify_debug_character_fragments = false | 
        
      
 
"Bring up graphical debugging windows for fragments training" 
 
Definition at line 491 of file classify.h.
 
 
◆ classify_debug_level
      
        
          | int tesseract::Classify::classify_debug_level = 0 | 
        
      
 
"Classify debug level" 
 
Definition at line 430 of file classify.h.
 
 
◆ classify_enable_adaptive_debugger
      
        
          | bool tesseract::Classify::classify_enable_adaptive_debugger = 0 | 
        
      
 
"Enable match debugger" 
 
Definition at line 450 of file classify.h.
 
 
◆ classify_enable_adaptive_matcher
      
        
          | bool tesseract::Classify::classify_enable_adaptive_matcher = 1 | 
        
      
 
"Enable adaptive classifier" 
 
Definition at line 445 of file classify.h.
 
 
◆ classify_enable_learning
      
        
          | bool tesseract::Classify::classify_enable_learning = true | 
        
      
 
"Enable adaptive classifier" 
 
Definition at line 429 of file classify.h.
 
 
◆ classify_integer_matcher_multiplier
      
        
          | int tesseract::Classify::classify_integer_matcher_multiplier = 10 | 
        
      
 
"Integer Matcher Multiplier  0-255:   " 
 
Definition at line 505 of file classify.h.
 
 
◆ classify_learn_debug_str
      
        
          | char* tesseract::Classify::classify_learn_debug_str = "" | 
        
      
 
"Class str to debug learning" 
 
Definition at line 495 of file classify.h.
 
 
◆ classify_learning_debug_level
      
        
          | int tesseract::Classify::classify_learning_debug_level = 0 | 
        
      
 
"Learning Debug Level: " 
 
Definition at line 455 of file classify.h.
 
 
◆ classify_max_certainty_margin
      
        
          | double tesseract::Classify::classify_max_certainty_margin = 5.5 | 
        
      
 
"Veto difference between classifier certainties" 
 
Definition at line 440 of file classify.h.
 
 
◆ classify_max_rating_ratio
      
        
          | double tesseract::Classify::classify_max_rating_ratio = 1.5 | 
        
      
 
"Veto ratio between classifier ratings" 
 
Definition at line 438 of file classify.h.
 
 
◆ classify_misfit_junk_penalty
      
        
          | double tesseract::Classify::classify_misfit_junk_penalty = 0.0 | 
        
      
 
"Penalty to apply when a non-alnum is vertically out of " "its expected textline position" 
 
Definition at line 471 of file classify.h.
 
 
◆ classify_nonlinear_norm
      
        
          | bool tesseract::Classify::classify_nonlinear_norm = 0 | 
        
      
 
"Non-linear stroke-density normalization" 
 
Definition at line 452 of file classify.h.
 
 
◆ classify_norm_method
      
        
          | int tesseract::Classify::classify_norm_method = character | 
        
      
 
"Normalization Method   ..." 
 
Definition at line 434 of file classify.h.
 
 
◆ classify_save_adapted_templates
      
        
          | bool tesseract::Classify::classify_save_adapted_templates = 0 | 
        
      
 
"Save adapted templates to a file" 
 
Definition at line 449 of file classify.h.
 
 
◆ classify_use_pre_adapted_templates
      
        
          | bool tesseract::Classify::classify_use_pre_adapted_templates = 0 | 
        
      
 
"Use pre-adapted classifier templates" 
 
Definition at line 447 of file classify.h.
 
 
◆ disable_character_fragments
      
        
          | bool tesseract::Classify::disable_character_fragments = true | 
        
      
 
"Do not include character fragments in the" " results of the classifier" 
 
Definition at line 486 of file classify.h.
 
 
◆ EnableLearning
      
        
          | bool tesseract::Classify::EnableLearning = true | 
        
      
 
 
◆ feature_defs_
◆ fontinfo_table_
◆ fontset_table_
◆ im_
◆ matcher_avg_noise_size
      
        
          | double tesseract::Classify::matcher_avg_noise_size = 12.0 | 
        
      
 
"Avg. noise blob length: " 
 
Definition at line 461 of file classify.h.
 
 
◆ matcher_bad_match_pad
      
        
          | double tesseract::Classify::matcher_bad_match_pad = 0.15 | 
        
      
 
 
◆ matcher_clustering_max_angle_delta
      
        
          | double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015 | 
        
      
 
"Maximum angle delta for prototype clustering" 
 
Definition at line 468 of file classify.h.
 
 
◆ matcher_debug_flags
      
        
          | int tesseract::Classify::matcher_debug_flags = 0 | 
        
      
 
 
◆ matcher_debug_level
      
        
          | int tesseract::Classify::matcher_debug_level = 0 | 
        
      
 
 
◆ matcher_debug_separate_windows
      
        
          | bool tesseract::Classify::matcher_debug_separate_windows = false | 
        
      
 
"Use two different windows for debugging the matching: " "One for the protos and one for the features." 
 
Definition at line 494 of file classify.h.
 
 
◆ matcher_good_threshold
      
        
          | double tesseract::Classify::matcher_good_threshold = 0.125 | 
        
      
 
 
◆ matcher_min_examples_for_prototyping
      
        
          | int tesseract::Classify::matcher_min_examples_for_prototyping = 3 | 
        
      
 
"Reliable Config Threshold" 
 
Definition at line 464 of file classify.h.
 
 
◆ matcher_perfect_threshold
      
        
          | double tesseract::Classify::matcher_perfect_threshold = 0.02 | 
        
      
 
 
◆ matcher_permanent_classes_min
      
        
          | int tesseract::Classify::matcher_permanent_classes_min = 1 | 
        
      
 
"Min # of permanent classes" 
 
Definition at line 462 of file classify.h.
 
 
◆ matcher_rating_margin
      
        
          | double tesseract::Classify::matcher_rating_margin = 0.1 | 
        
      
 
"New template margin (0-1)" 
 
Definition at line 460 of file classify.h.
 
 
◆ matcher_reliable_adaptive_result
      
        
          | double tesseract::Classify::matcher_reliable_adaptive_result = 0.0 | 
        
      
 
 
◆ matcher_sufficient_examples_for_prototyping
      
        
          | int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5 | 
        
      
 
"Enable adaption even if the ambiguities have not been seen" 
 
Definition at line 466 of file classify.h.
 
 
◆ NormProtos
◆ PreTrainedTemplates
◆ prioritize_division
      
        
          | bool tesseract::Classify::prioritize_division = false | 
        
      
 
"Prioritize blob division over chopping" 
 
Definition at line 428 of file classify.h.
 
 
◆ rating_scale
      
        
          | double tesseract::Classify::rating_scale = 1.5 | 
        
      
 
"Rating scaling factor" 
 
Definition at line 472 of file classify.h.
 
 
◆ shape_table_
  
  
      
        
          | ShapeTable* tesseract::Classify::shape_table_ = nullptr | 
         
       
   | 
  
protected   | 
  
 
 
◆ speckle_large_max_size
      
        
          | double tesseract::Classify::speckle_large_max_size = 0.30 | 
        
      
 
"Max large speckle size" 
 
Definition at line 509 of file classify.h.
 
 
◆ speckle_rating_penalty
      
        
          | double tesseract::Classify::speckle_rating_penalty = 10.0 | 
        
      
 
"Penalty to add to worst rating for noise" 
 
Definition at line 511 of file classify.h.
 
 
◆ TempProtoMask
      
        
          | BIT_VECTOR tesseract::Classify::TempProtoMask = nullptr | 
        
      
 
 
◆ tess_bn_matching
      
        
          | bool tesseract::Classify::tess_bn_matching = 0 | 
        
      
 
"Baseline Normalized Matching" 
 
Definition at line 444 of file classify.h.
 
 
◆ tess_cn_matching
      
        
          | bool tesseract::Classify::tess_cn_matching = 0 | 
        
      
 
"Character Normalized Matching" 
 
Definition at line 443 of file classify.h.
 
 
◆ tessedit_class_miss_scale
      
        
          | double tesseract::Classify::tessedit_class_miss_scale = 0.00390625 | 
        
      
 
"Scale factor for features not used" 
 
Definition at line 475 of file classify.h.
 
 
The documentation for this class was generated from the following files:
 
 
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
 
double tessedit_class_miss_scale
 
void FontInfoDeleteCallback(FontInfo f)
 
bool ContainsUnichar(int unichar_id) const
 
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
TBLOB * ClassifyNormalizeIfNeeded() const
 
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
 
bool classify_enable_adaptive_matcher
 
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
 
uint16_t ConfigLengths[MAX_NUM_CONFIGS]
 
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
 
#define PROTOS_PER_PROTO_SET
 
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
 
void ClearCharNormArray(uint8_t *char_norm_array)
 
#define MAX_INT_CHAR_NORM
 
#define BITS_PER_CP_VECTOR
 
bool use_ambigs_for_adaption
 
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &cn_denorm)
 
INT_TEMPLATES NewIntTemplates()
 
double matcher_clustering_max_angle_delta
 
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
 
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
 
void NormalizePicoX(FEATURE_SET FeatureSet)
 
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
 
bool PiecesAllNatural(int start, int count) const
 
#define TempConfigFor(Class, ConfigId)
 
int classify_class_pruner_threshold
 
uint8_t Bucket8For(float param, float offset, int num_buckets)
 
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
 
bool get_isdigit(UNICHAR_ID unichar_id) const
 
UnicityTable< FontInfo > fontinfo_table_
 
bool get_isalpha(UNICHAR_ID unichar_id) const
 
int classify_adapt_proto_threshold
 
bool write_set(FILE *f, const FontSet &fs)
 
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
 
void MasterMatcher(INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
bool classify_enable_learning
 
#define INT_MEMBER(name, val, comment, vec)
 
GenericVector< ScoredFont > fonts
 
const DENORM & denorm() const
 
const UnicharAmbigs & getUnicharAmbigs() const
 
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
bool classify_enable_adaptive_debugger
 
void EndAdaptiveClassifier()
 
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
 
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
 
void FreeProtoList(LIST *ProtoList)
 
float ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
 
bool write_info(FILE *f, const FontInfo &fi)
 
const double kStandardFeatureLength
 
double matcher_good_threshold
 
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
 
double classify_adapted_pruning_threshold
 
int matcher_permanent_classes_min
 
#define INT_CHAR_NORM_RANGE
 
uint32_t Configs[WERDS_PER_CONFIG_VEC]
 
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
 
void truncate_at(int32_t index)
 
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
 
int classify_cp_cutoff_strength
 
const FontInfo * fontinfo
 
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
 
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
 
void cprintf(const char *format,...)
 
void EndDangerousAmbigs()
 
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
 
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
 
PARAM_DESC * ReadParamDesc(TFile *fp, uint16_t N)
 
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
double classify_max_certainty_margin
 
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
 
int classify_learning_debug_level
 
int IntCastRounded(double x)
 
int classify_class_pruner_multiplier
 
uint8_t CircBucketFor(float param, float offset, int num_buckets)
 
bool read_spacing_info(TFile *f, FontInfo *fi)
 
const float MF_SCALE_FACTOR
 
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
 
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
GenericVector< int > best_state
 
bool LooksLikeGarbage(TBLOB *blob)
 
void RemoveBadMatches(ADAPT_RESULTS *Results)
 
#define CPrunerBitIndexFor(c)
 
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
 
#define PRINT_FEATURE_MATCHES
 
STRING language_data_path_prefix
 
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
void ZoomToRectangle(int x1, int y1, int x2, int y2)
 
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
 
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
 
bool write_spacing_info(FILE *f, const FontInfo &fi)
 
#define MAX_NUM_CLASS_PRUNERS
 
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
 
bool DeSerialize(TFile *fp)
 
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
#define CPrunerWordIndexFor(c)
 
void SetAdaptiveThreshold(float Threshold)
 
#define ProtoIn(Class, Pid)
 
double matcher_reliable_adaptive_result
 
int get_script(UNICHAR_ID unichar_id) const
 
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
 
int AddIntProto(INT_CLASS Class)
 
const double kWidthErrorWeighting
 
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
void ClassifyAsNoise(ADAPT_RESULTS *Results)
 
int FRead(void *buffer, size_t size, int count)
 
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
 
double matcher_avg_noise_size
 
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
 
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str)
 
GenericVector< UNICHAR_ID > UnicharIdVector
 
#define reset_bit(array, bit)
 
GenericVector< CP_RESULT_STRUCT > CPResults
 
void free_adapted_templates(ADAPT_TEMPLATES templates)
 
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
 
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
 
#define ADAPTABLE_WERD_ADJUSTMENT
 
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
 
int matcher_min_examples_for_prototyping
 
#define STRING_MEMBER(name, val, comment, vec)
 
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
 
STRING debug_str(UNICHAR_ID id) const
 
WERD_CHOICE * best_choice
 
const char * c_str() const
 
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
 
bool get_enabled(UNICHAR_ID unichar_id) const
 
double matcher_perfect_threshold
 
#define ADAPT_TEMPLATE_SUFFIX
 
bool DeSerialize(bool swap, FILE *fp)
 
#define MakeConfigPermanent(Class, ConfigId)
 
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
 
LIST delete_d(LIST list, void *key, int_compare is_equal)
 
ShapeTable * shape_table_
 
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
double speckle_large_max_size
 
bool disable_character_fragments
 
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void FreeFeature(FEATURE Feature)
 
void FreeTempConfig(TEMP_CONFIG Config)
 
#define MaxNumIntProtosIn(C)
 
#define LENGTH_COMPRESSION
 
void InitIntMatchWindowIfReqd()
 
ADAPT_TEMPLATES BackupAdaptedTemplates
 
bool classify_save_adapted_templates
 
bool classify_nonlinear_norm
 
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
 
void FontSetDeleteCallback(FontSet fs)
 
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
LIST ConvertBlob(TBLOB *blob)
 
double segment_penalty_dict_case_ok
 
GenericVector< TBLOB * > blobs
 
#define IncreaseConfidence(TempConfig)
 
#define MAX_NUM_INT_FEATURES
 
float adjust_factor() const
 
double classify_max_rating_ratio
 
STRING DebugStr(int shape_id) const
 
double classify_char_norm_range
 
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
#define double_MEMBER(name, val, comment, vec)
 
NORM_PROTOS * ReadNormProtos(TFile *fp)
 
uint16_t ReadSampleSize(TFile *fp)
 
FEATURE_DEFS_STRUCT feature_defs_
 
UnicityTableEqEq< int > font_set
 
const Shape & GetShape(int shape_id) const
 
ADAPT_TEMPLATES AdaptedTemplates
 
int matcher_sufficient_examples_for_prototyping
 
char * classify_learn_debug_str
 
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
 
int AddIntConfig(INT_CLASS Class)
 
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
 
const T & get(int id) const
Return the object from an id.
 
void GetPreciseBoundingBox(TBOX *precise_box) const
 
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
 
PROTOTYPE * ReadPrototype(TFile *fp, uint16_t N)
 
const STRING debug_string() const
 
#define WERDS_PER_CONFIG_VEC
 
ADAPT_TEMPLATES Templates
 
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
 
#define ConfigIsPermanent(Class, ConfigId)
 
LIST push(LIST list, void *element)
 
void FillABC(PROTO Proto)
 
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
 
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
 
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
 
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId)
 
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
 
GenericVector< SEAM * > seam_array
 
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
 
double speckle_rating_penalty
 
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
 
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
 
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
 
TBOX bounding_box() const
 
void FreeOutlines(LIST Outlines)
 
int TruncateParam(float Param, int Min, int Max, char *Id)
 
bool classify_use_pre_adapted_templates
 
ADAPT_CLASS Class[MAX_NUM_CLASSES]
 
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
 
int classify_adapt_feature_threshold
 
int size() const
Return the size used.
 
void ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs)
 
TEMP_PROTO NewTempProto()
 
SVEvent * AwaitEvent(SVEventType type)
 
#define UNLIKELY_NUM_FEAT
 
#define SET_BIT(array, bit)
 
double classify_adapted_pruning_factor
 
const FEATURE_DESC_STRUCT CharNormDesc
 
double classify_norm_adj_midpoint
 
static int SortDescendingRating(const void *t1, const void *t2)
 
bool read_info(TFile *f, FontInfo *fi)
 
#define WORST_POSSIBLE_RATING
 
INT_CLASS Class[MAX_NUM_CLASSES]
 
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
 
void pad(int xpad, int ypad)
 
FEATURE_SET NewFeatureSet(int NumFeatures)
 
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
 
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
double classify_misfit_junk_penalty
 
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
 
bool contains_unichar(const char *const unichar_repr) const
 
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void FreePrototype(void *arg)
 
int MakeTempProtoPerm(void *item1, void *item2)
 
bool MarginalMatch(float confidence, float matcher_great_threshold)
 
void FreeFeatureSet(FEATURE_SET FeatureSet)
 
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
 
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
 
void UpdateMatchDisplay()
 
UNICHAR_ID best_unichar_id
 
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
 
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
 
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
 
#define PRINT_MATCH_SUMMARY
 
void NormalizeOutlineX(FEATURE_SET FeatureSet)
 
GenericVector< STRING > correct_text
 
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
 
const FEATURE_DESC_STRUCT IntFeatDesc
 
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
 
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
 
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
 
#define WERDS_PER_CP_VECTOR
 
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
 
DLLSYM void tprintf(const char *format,...)
 
bool LargeSpeckle(const TBLOB &blob)
 
const UNICHARSET & getUnicharset() const
 
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
 
INT_TEMPLATES ReadIntTemplates(TFile *fp)
 
INT_TEMPLATES PreTrainedTemplates
 
uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
 
void free_int_templates(INT_TEMPLATES templates)
 
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc)
 
#define NUM_BITS_PER_CLASS
 
bool matcher_debug_separate_windows
 
void plot(ScrollView *window)
 
GenericVector< UnicharRating > match
 
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
 
TBOX bounding_box() const
 
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
 
CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs)
 
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
 
ADAPT_CLASS NewAdaptedClass()
 
int classify_integer_matcher_multiplier
 
const char * id_to_unichar(UNICHAR_ID id) const
 
bool classify_debug_character_fragments
 
#define WERDS_PER_PP_VECTOR
 
double matcher_rating_margin
 
#define PermConfigFor(Class, ConfigId)
 
void ResetAdaptiveClassifierInternal()
 
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
 
#define ProtoForProtoId(C, P)
 
bool classify_bln_numeric_mode
 
double matcher_bad_match_pad
 
#define BOOL_MEMBER(name, val, comment, vec)
 
UnicityTable< FontSet > fontset_table_
 
#define OLD_MAX_NUM_CONFIGS
 
#define UnusedClassIdIn(T, c)
 
#define PRINT_PROTO_MATCHES
 
float ActualOutlineLength(FEATURE Feature)
 
double classify_character_fragments_garbage_certainty_threshold
 
char window_wait(ScrollView *win)
 
bool Serialize(FILE *fp, const char *data, size_t n=1)
 
#define ClassForClassId(T, c)
 
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
 
#define MAX_PICO_FEATURES
 
LIST push_last(LIST list, void *item)
 
ADAPT_CLASS ReadAdaptedClass(TFile *fp)
 
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
 
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
 
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
 
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
 
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
#define OLD_WERDS_PER_CONFIG_VEC
 
void NormalizeOutline(MFOUTLINE Outline, float XOrigin)
 
int MaxNumUnichars() const
 
const int kBlnBaselineOffset
 
void FreeCharDescription(CHAR_DESC CharDesc)
 
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
#define IsEmptyAdaptedClass(Class)
 
#define MAX_ADAPTABLE_WERD_SIZE
 
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
 
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
 
bool read_set(TFile *f, FontSet *fs)
 
#define GetPicoFeatureLength()