tesseract
5.0.0-alpha-619-ge9db
|
#include <classify.h>
Public Member Functions | |
Classify () | |
~Classify () override | |
virtual Dict & | getDict () |
const ShapeTable * | shape_table () const |
void | SetStaticClassifier (ShapeClassifier *static_classifier) |
void | AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices) |
bool | LargeSpeckle (const TBLOB &blob) |
ADAPT_TEMPLATES | NewAdaptedTemplates (bool InitFromUnicharset) |
int | GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId) |
int | PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results) |
void | ReadNewCutoffs (TFile *fp, uint16_t *Cutoffs) |
void | PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) |
void | WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) |
ADAPT_TEMPLATES | ReadAdaptedTemplates (TFile *File) |
float | ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch) |
void | FreeNormProtos () |
NORM_PROTOS * | ReadNormProtos (TFile *fp) |
void | ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class) |
INT_TEMPLATES | CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset) |
void | LearnWord (const char *fontname, WERD_RES *word) |
void | LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word) |
void | InitAdaptiveClassifier (TessdataManager *mgr) |
void | InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) |
void | AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results) |
void | MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results) |
void | ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results) |
double | ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors) |
void | ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) |
void | AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results) |
int | GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) |
void | DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results) |
PROTO_ID | MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) |
int | MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) |
void | MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob) |
void | PrintAdaptiveMatchResults (const ADAPT_RESULTS &results) |
void | RemoveExtraPuncs (ADAPT_RESULTS *Results) |
void | RemoveBadMatches (ADAPT_RESULTS *Results) |
void | SetAdaptiveThreshold (float Threshold) |
void | ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features) |
STRING | ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const |
int | ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const |
int | ShapeIDToClassID (int shape_id) const |
UNICHAR_ID * | BaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) |
int | CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results) |
int | CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results) |
UNICHAR_ID * | GetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass) |
void | DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results) |
void | AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates) |
void | DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class) |
bool | AdaptableWord (WERD_RES *word) |
void | EndAdaptiveClassifier () |
void | SettupPass1 () |
void | SettupPass2 () |
void | AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices) |
void | ClassifyAsNoise (ADAPT_RESULTS *Results) |
void | ResetAdaptiveClassifierInternal () |
void | SwitchAdaptiveClassifier () |
void | StartBackupAdaptiveClassifier () |
int | GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array) |
void | ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array) |
bool | TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config) |
void | UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob) |
bool | AdaptiveClassifierIsFull () const |
bool | AdaptiveClassifierIsEmpty () const |
bool | LooksLikeGarbage (TBLOB *blob) |
void | RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox) |
void | ClearCharNormArray (uint8_t *char_norm_array) |
void | ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array) |
void | ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) |
INT_TEMPLATES | ReadIntTemplates (TFile *fp) |
void | WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset) |
CLASS_ID | GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id) |
void | ShowMatchDisplay () |
UnicityTable< FontInfo > & | get_fontinfo_table () |
const UnicityTable< FontInfo > & | get_fontinfo_table () const |
UnicityTable< FontSet > & | get_fontset_table () |
void | NormalizeOutlines (LIST Outlines, float *XScale, float *YScale) |
FEATURE_SET | ExtractOutlineFeatures (TBLOB *Blob) |
FEATURE_SET | ExtractPicoFeatures (TBLOB *Blob) |
FEATURE_SET | ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) |
FEATURE_SET | ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) |
void | LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) |
bool | WriteTRFile (const STRING &filename) |
Public Member Functions inherited from tesseract::CCStruct | |
CCStruct ()=default | |
~CCStruct () override | |
Public Member Functions inherited from tesseract::CCUtil | |
CCUtil () | |
virtual | ~CCUtil () |
void | main_setup (const char *argv0, const char *basename) |
CCUtil::main_setup - set location of tessdata and name of image. More... | |
ParamsVectors * | params () |
Static Public Member Functions | |
static void | SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info) |
static void | ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts) |
Protected Attributes | |
IntegerMatcher | im_ |
FEATURE_DEFS_STRUCT | feature_defs_ |
ShapeTable * | shape_table_ = nullptr |
Additional Inherited Members | |
Static Public Attributes inherited from tesseract::CCStruct | |
static const double | kDescenderFraction = 0.25 |
static const double | kXHeightFraction = 0.5 |
static const double | kAscenderFraction = 0.25 |
static const double | kXHeightCapRatio |
Definition at line 103 of file classify.h.
tesseract::Classify::Classify | ( | ) |
Definition at line 60 of file classify.cpp.
|
override |
Definition at line 183 of file classify.cpp.
bool tesseract::Classify::AdaptableWord | ( | WERD_RES * | word | ) |
Return true if the specified word is acceptable for adaptation.
Globals: none
word | current word |
Definition at line 821 of file adaptmatch.cpp.
void tesseract::Classify::AdaptiveClassifier | ( | TBLOB * | Blob, |
BLOB_CHOICE_LIST * | Choices | ||
) |
This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.
It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.
Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.
This routine also performs some simple speckle filtering.
Blob | blob to be classified | |
[out] | Choices | List of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher. |
Definition at line 191 of file adaptmatch.cpp.
|
inline |
Definition at line 326 of file classify.h.
|
inline |
Definition at line 325 of file classify.h.
void tesseract::Classify::AdaptToChar | ( | TBLOB * | Blob, |
CLASS_ID | ClassId, | ||
int | FontinfoId, | ||
float | Threshold, | ||
ADAPT_TEMPLATES | adaptive_templates | ||
) |
Blob | blob to add to templates for ClassId |
ClassId | class to add blob to |
FontinfoId | font information from pre-trained templates |
Threshold | minimum match rating to existing template |
adaptive_templates | current set of adapted templates |
Globals:
Definition at line 853 of file adaptmatch.cpp.
void tesseract::Classify::AddLargeSpeckleTo | ( | int | blob_length, |
BLOB_CHOICE_LIST * | choices | ||
) |
Definition at line 201 of file classify.cpp.
void tesseract::Classify::AddNewResult | ( | const UnicharRating & | new_result, |
ADAPT_RESULTS * | results | ||
) |
This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.
Globals:
new_result | new result to add | |
[out] | results | results to add new result to |
Definition at line 994 of file adaptmatch.cpp.
void tesseract::Classify::AmbigClassifier | ( | const GenericVector< INT_FEATURE_STRUCT > & | int_features, |
const INT_FX_RESULT_STRUCT & | fx_info, | ||
const TBLOB * | blob, | ||
INT_TEMPLATES | templates, | ||
ADAPT_CLASS * | classes, | ||
UNICHAR_ID * | ambiguities, | ||
ADAPT_RESULTS * | results | ||
) |
This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.
Globals:
blob | blob to be classified | |
templates | built-in templates to classify against | |
classes | adapted class templates | |
ambiguities | array of unichar id's to match against | |
[out] | results | place to put match results |
int_features | ||
fx_info |
Definition at line 1045 of file adaptmatch.cpp.
UNICHAR_ID * tesseract::Classify::BaselineClassifier | ( | TBLOB * | Blob, |
const GenericVector< INT_FEATURE_STRUCT > & | int_features, | ||
const INT_FX_RESULT_STRUCT & | fx_info, | ||
ADAPT_TEMPLATES | Templates, | ||
ADAPT_RESULTS * | Results | ||
) |
This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.
Globals:
Blob | blob to be classified |
Templates | current set of adapted templates |
Results | place to put match results |
int_features | |
fx_info |
Definition at line 1265 of file adaptmatch.cpp.
int tesseract::Classify::CharNormClassifier | ( | TBLOB * | blob, |
const TrainingSample & | sample, | ||
ADAPT_RESULTS * | adapt_results | ||
) |
This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.
blob | blob to be classified |
sample | templates to classify unknown against |
adapt_results | place to put match results |
Globals:
Definition at line 1311 of file adaptmatch.cpp.
int tesseract::Classify::CharNormTrainingSample | ( | bool | pruner_only, |
int | keep_this, | ||
const TrainingSample & | sample, | ||
GenericVector< UnicharRating > * | results | ||
) |
Definition at line 1329 of file adaptmatch.cpp.
int tesseract::Classify::ClassAndConfigIDToFontOrShapeID | ( | int | class_id, |
int | int_result_config | ||
) | const |
Definition at line 2207 of file adaptmatch.cpp.
STRING tesseract::Classify::ClassIDToDebugStr | ( | const INT_TEMPLATES_STRUCT * | templates, |
int | class_id, | ||
int | config_id | ||
) | const |
Definition at line 2194 of file adaptmatch.cpp.
void tesseract::Classify::ClassifyAsNoise | ( | ADAPT_RESULTS * | results | ) |
This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.
results | results to add noise classification to |
Globals:
Definition at line 1399 of file adaptmatch.cpp.
void tesseract::Classify::ClearCharNormArray | ( | uint8_t * | char_norm_array | ) |
For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.
Globals:
char_norm_array | array to be cleared |
Definition at line 44 of file float2int.cpp.
void tesseract::Classify::ComputeCharNormArrays | ( | FEATURE_STRUCT * | norm_feature, |
INT_TEMPLATES_STRUCT * | templates, | ||
uint8_t * | char_norm_array, | ||
uint8_t * | pruner_array | ||
) |
Definition at line 1698 of file adaptmatch.cpp.
double tesseract::Classify::ComputeCorrectedRating | ( | bool | debug, |
int | unichar_id, | ||
double | cp_rating, | ||
double | im_rating, | ||
int | feature_misses, | ||
int | bottom, | ||
int | top, | ||
int | blob_length, | ||
int | matcher_multiplier, | ||
const uint8_t * | cn_factors | ||
) |
Definition at line 1202 of file adaptmatch.cpp.
void tesseract::Classify::ComputeIntCharNormArray | ( | const FEATURE_STRUCT & | norm_feature, |
uint8_t * | char_norm_array | ||
) |
For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.
Globals:
norm_feature | character normalization feature | |
[out] | char_norm_array | place to put results of size unicharset.size() |
Definition at line 62 of file float2int.cpp.
void tesseract::Classify::ComputeIntFeatures | ( | FEATURE_SET | Features, |
INT_FEATURE_ARRAY | IntFeatures | ||
) |
This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.
Globals:
Features | floating point pico-features to be converted | |
[out] | IntFeatures | array to put converted features into |
Definition at line 90 of file float2int.cpp.
float tesseract::Classify::ComputeNormMatch | ( | CLASS_ID | ClassId, |
const FEATURE_STRUCT & | feature, | ||
bool | DebugMatch | ||
) |
This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match.
ClassId | id of class to match against |
feature | character normalization feature |
DebugMatch | controls dump of debug info |
Globals: NormProtos character normalization prototypes
Definition at line 93 of file normmatch.cpp.
void tesseract::Classify::ConvertMatchesToChoices | ( | const DENORM & | denorm, |
const TBOX & | box, | ||
ADAPT_RESULTS * | Results, | ||
BLOB_CHOICE_LIST * | Choices | ||
) |
The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.
Definition at line 1413 of file adaptmatch.cpp.
This routine converts Proto to integer format and installs it as ProtoId in Class.
Proto | floating-pt proto to be converted to integer format |
ProtoId | id of proto |
Class | integer class to add converted proto to |
Definition at line 487 of file intproto.cpp.
INT_TEMPLATES tesseract::Classify::CreateIntTemplates | ( | CLASSES | FloatProtos, |
const UNICHARSET & | target_unicharset | ||
) |
This routine converts from the old floating point format to the new integer format.
FloatProtos | prototypes in old floating pt format |
target_unicharset | the UNICHARSET to use |
Definition at line 526 of file intproto.cpp.
void tesseract::Classify::DebugAdaptiveClassifier | ( | TBLOB * | blob, |
ADAPT_RESULTS * | Results | ||
) |
blob | blob whose classification is being debugged |
Results | results of match being debugged |
Globals: none
Definition at line 1497 of file adaptmatch.cpp.
void tesseract::Classify::DisplayAdaptedChar | ( | TBLOB * | blob, |
INT_CLASS_STRUCT * | int_class | ||
) |
Definition at line 946 of file adaptmatch.cpp.
void tesseract::Classify::DoAdaptiveMatch | ( | TBLOB * | Blob, |
ADAPT_RESULTS * | Results | ||
) |
This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.
Blob | blob to be classified |
Results | place to put match results |
Globals:
Definition at line 1530 of file adaptmatch.cpp.
void tesseract::Classify::EndAdaptiveClassifier | ( | ) |
This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.
Globals:
Definition at line 459 of file adaptmatch.cpp.
void tesseract::Classify::ExpandShapesAndApplyCorrections | ( | ADAPT_CLASS * | classes, |
bool | debug, | ||
int | class_id, | ||
int | bottom, | ||
int | top, | ||
float | cp_rating, | ||
int | blob_length, | ||
int | matcher_multiplier, | ||
const uint8_t * | cn_factors, | ||
UnicharRating * | int_result, | ||
ADAPT_RESULTS * | final_results | ||
) |
Definition at line 1128 of file adaptmatch.cpp.
|
static |
FEATURE_SET tesseract::Classify::ExtractIntCNFeatures | ( | const TBLOB & | blob, |
const INT_FX_RESULT_STRUCT & | fx_info | ||
) |
blob | blob to extract features from |
fx_info |
Definition at line 216 of file picofeat.cpp.
FEATURE_SET tesseract::Classify::ExtractIntGeoFeatures | ( | const TBLOB & | blob, |
const INT_FX_RESULT_STRUCT & | fx_info | ||
) |
blob | blob to extract features from |
fx_info |
Definition at line 246 of file picofeat.cpp.
FEATURE_SET tesseract::Classify::ExtractOutlineFeatures | ( | TBLOB * | Blob | ) |
Convert each segment in the outline to a feature and return the features.
Blob | blob to extract pico-features from |
Definition at line 54 of file outfeat.cpp.
FEATURE_SET tesseract::Classify::ExtractPicoFeatures | ( | TBLOB * | Blob | ) |
Operation: Dummy for now.
Globals:
Blob | blob to extract pico-features from |
Definition at line 62 of file picofeat.cpp.
void tesseract::Classify::FreeNormProtos | ( | ) |
Definition at line 167 of file normmatch.cpp.
|
inline |
Definition at line 386 of file classify.h.
|
inline |
Definition at line 389 of file classify.h.
|
inline |
Definition at line 392 of file classify.h.
int tesseract::Classify::GetAdaptiveFeatures | ( | TBLOB * | Blob, |
INT_FEATURE_ARRAY | IntFeatures, | ||
FEATURE_SET * | FloatFeatures | ||
) |
This routine sets up the feature extractor to extract baseline normalized pico-features.
The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.
Globals: none
Blob | blob to extract features from | |
[out] | IntFeatures | array to fill with integer features |
[out] | FloatFeatures | place to return actual floating-pt features |
Definition at line 786 of file adaptmatch.cpp.
UNICHAR_ID * tesseract::Classify::GetAmbiguities | ( | TBLOB * | Blob, |
CLASS_ID | CorrectClass | ||
) |
This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.
Blob | blob to get classification ambiguities for |
CorrectClass | correct class for Blob |
Globals:
Definition at line 1592 of file adaptmatch.cpp.
int tesseract::Classify::GetCharNormFeature | ( | const INT_FX_RESULT_STRUCT & | fx_info, |
INT_TEMPLATES | templates, | ||
uint8_t * | pruner_norm_array, | ||
uint8_t * | char_norm_array | ||
) |
This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.
The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.
It then copies the char norm features into the IntFeatures array provided by the caller.
templates | used to compute char norm adjustments |
pruner_norm_array | Array of factors from blob normalization process |
char_norm_array | array to fill with dummy char norm adjustments |
fx_info | Globals: |
Definition at line 1678 of file adaptmatch.cpp.
CLASS_ID tesseract::Classify::GetClassToDebug | ( | const char * | Prompt, |
bool * | adaptive_on, | ||
bool * | pretrained_on, | ||
int * | shape_id | ||
) |
This routine prompts the user with Prompt and waits for the user to enter something in the debug window.
Prompt | prompt to print while waiting for input from window |
adaptive_on | |
pretrained_on | |
shape_id |
Definition at line 1256 of file intproto.cpp.
|
inlinevirtual |
Reimplemented in tesseract::Tesseract.
Definition at line 107 of file classify.h.
int tesseract::Classify::GetFontinfoId | ( | ADAPT_CLASS | Class, |
uint8_t | ConfigId | ||
) |
Definition at line 173 of file adaptive.cpp.
void tesseract::Classify::InitAdaptedClass | ( | TBLOB * | Blob, |
CLASS_ID | ClassId, | ||
int | FontinfoId, | ||
ADAPT_CLASS | Class, | ||
ADAPT_TEMPLATES | Templates | ||
) |
This routine creates a new adapted class and uses Blob as the model for the first config in that class.
Blob | blob to model new class after |
ClassId | id of the class to be initialized |
FontinfoId | font information inferred from pre-trained templates |
Class | adapted class to be initialized |
Templates | adapted templates to add new class to |
Globals:
Definition at line 693 of file adaptmatch.cpp.
void tesseract::Classify::InitAdaptiveClassifier | ( | TessdataManager * | mgr | ) |
This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be loaded. Should only be set to true if the necessary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates
Definition at line 527 of file adaptmatch.cpp.
bool tesseract::Classify::LargeSpeckle | ( | const TBLOB & | blob | ) |
Definition at line 224 of file classify.cpp.
void tesseract::Classify::LearnBlob | ( | const STRING & | fontname, |
TBLOB * | Blob, | ||
const DENORM & | cn_denorm, | ||
const INT_FX_RESULT_STRUCT & | fx_info, | ||
const char * | blob_text | ||
) |
Definition at line 70 of file blobclass.cpp.
void tesseract::Classify::LearnPieces | ( | const char * | fontname, |
int | start, | ||
int | length, | ||
float | threshold, | ||
CharSegmentationType | segmentation, | ||
const char * | correct_text, | ||
WERD_RES * | word | ||
) |
Definition at line 374 of file adaptmatch.cpp.
void tesseract::Classify::LearnWord | ( | const char * | fontname, |
WERD_RES * | word | ||
) |
Definition at line 250 of file adaptmatch.cpp.
bool tesseract::Classify::LooksLikeGarbage | ( | TBLOB * | blob | ) |
Definition at line 1633 of file adaptmatch.cpp.
int tesseract::Classify::MakeNewTemporaryConfig | ( | ADAPT_TEMPLATES | Templates, |
CLASS_ID | ClassId, | ||
int | FontinfoId, | ||
int | NumFeatures, | ||
INT_FEATURE_ARRAY | Features, | ||
FEATURE_SET | FloatFeatures | ||
) |
Templates | adapted templates to add new config to |
ClassId | class id to associate with new config |
FontinfoId | font information inferred from pre-trained templates |
NumFeatures | number of features in IntFeatures |
Features | features describing model for new config |
FloatFeatures | floating-pt representation of features |
Definition at line 1740 of file adaptmatch.cpp.
PROTO_ID tesseract::Classify::MakeNewTempProtos | ( | FEATURE_SET | Features, |
int | NumBadFeat, | ||
FEATURE_ID | BadFeat[], | ||
INT_CLASS | IClass, | ||
ADAPT_CLASS | Class, | ||
BIT_VECTOR | TempProtoMask | ||
) |
This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.
Features | floating-pt features describing new character |
NumBadFeat | number of bad features to turn into protos |
BadFeat | feature id's of bad features |
IClass | integer class templates to add new protos to |
Class | adapted class templates to add new protos to |
TempProtoMask | proto mask to add new protos to |
Globals: none
Definition at line 1834 of file adaptmatch.cpp.
void tesseract::Classify::MakePermanent | ( | ADAPT_TEMPLATES | Templates, |
CLASS_ID | ClassId, | ||
int | ConfigId, | ||
TBLOB * | Blob | ||
) |
Templates | current set of adaptive templates |
ClassId | class containing config to be made permanent |
ConfigId | config to be made permanent |
Blob | current blob being adapted to |
Globals: none
Definition at line 1920 of file adaptmatch.cpp.
void tesseract::Classify::MasterMatcher | ( | INT_TEMPLATES | templates, |
int16_t | num_features, | ||
const INT_FEATURE_STRUCT * | features, | ||
const uint8_t * | norm_factors, | ||
ADAPT_CLASS * | classes, | ||
int | debug, | ||
int | matcher_multiplier, | ||
const TBOX & | blob_box, | ||
const GenericVector< CP_RESULT_STRUCT > & | results, | ||
ADAPT_RESULTS * | final_results | ||
) |
Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.
Definition at line 1088 of file adaptmatch.cpp.
ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates | ( | bool | InitFromUnicharset | ) |
Allocates memory for adapted templates. each char in unicharset to the newly created templates
InitFromUnicharset | if true, add an empty class for |
Definition at line 151 of file adaptive.cpp.
void tesseract::Classify::NormalizeOutlines | ( | LIST | Outlines, |
float * | XScale, | ||
float * | YScale | ||
) |
This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system. Outlines are changed and XScale and YScale are updated.
Globals:
Outlines | list of outlines to be normalized |
XScale | x-direction scale factor used by routine |
YScale | y-direction scale factor used by routine |
Definition at line 275 of file mfoutline.cpp.
void tesseract::Classify::PrintAdaptedTemplates | ( | FILE * | File, |
ADAPT_TEMPLATES | Templates | ||
) |
This routine prints a summary of the adapted templates in Templates to File.
Definition at line 244 of file adaptive.cpp.
void tesseract::Classify::PrintAdaptiveMatchResults | ( | const ADAPT_RESULTS & | results | ) |
This routine writes the matches in Results to File.
results | match results to write to File |
Globals: none
Definition at line 2013 of file adaptmatch.cpp.
int tesseract::Classify::PruneClasses | ( | const INT_TEMPLATES_STRUCT * | int_templates, |
int | num_features, | ||
int | keep_this, | ||
const INT_FEATURE_STRUCT * | features, | ||
const uint8_t * | normalization_factors, | ||
const uint16_t * | expected_num_features, | ||
GenericVector< CP_RESULT_STRUCT > * | results | ||
) |
Runs the class pruner from int_templates on the given features, returning the number of classes output in results.
int_templates | Class pruner tables |
num_features | Number of features in blob |
features | Array of features |
normalization_factors | Array of fudge factors from blob normalization process (by CLASS_INDEX) |
expected_num_features | Array of expected number of features for each class (by CLASS_INDEX) |
results | Sorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses. |
keep_this |
Definition at line 451 of file intmatcher.cpp.
ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates | ( | TFile * | fp | ) |
Read a set of adapted templates from file and return a ptr to the templates.
fp | open text file to read adapted templates from |
Definition at line 332 of file adaptive.cpp.
INT_TEMPLATES tesseract::Classify::ReadIntTemplates | ( | TFile * | fp | ) |
This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format.
fp | open file to read templates from |
Definition at line 717 of file intproto.cpp.
void tesseract::Classify::ReadNewCutoffs | ( | TFile * | fp, |
uint16_t * | Cutoffs | ||
) |
Open file, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value.
fp | file containing cutoff definitions |
Cutoffs | array to put cutoffs into |
Definition at line 40 of file cutoffs.cpp.
NORM_PROTOS * tesseract::Classify::ReadNormProtos | ( | TFile * | fp | ) |
This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File.
fp | open text file to read normalization protos from Globals: none |
Definition at line 189 of file normmatch.cpp.
void tesseract::Classify::RefreshDebugWindow | ( | ScrollView ** | win, |
const char * | msg, | ||
int | y_offset, | ||
const TBOX & | wbox | ||
) |
Definition at line 226 of file adaptmatch.cpp.
void tesseract::Classify::RemoveBadMatches | ( | ADAPT_RESULTS * | Results | ) |
This routine steps through each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.
Results | contains matches to be filtered |
Globals:
Definition at line 2033 of file adaptmatch.cpp.
void tesseract::Classify::RemoveExtraPuncs | ( | ADAPT_RESULTS * | Results | ) |
This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.
Results | contains matches to be filtered |
Definition at line 2093 of file adaptmatch.cpp.
void tesseract::Classify::ResetAdaptiveClassifierInternal | ( | ) |
Definition at line 598 of file adaptmatch.cpp.
void tesseract::Classify::SetAdaptiveThreshold | ( | float | Threshold | ) |
This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.
Threshold | threshold for creating new templates |
Globals:
Definition at line 2141 of file adaptmatch.cpp.
void tesseract::Classify::SetStaticClassifier | ( | ShapeClassifier * | static_classifier | ) |
Definition at line 193 of file classify.cpp.
void tesseract::Classify::SettupPass1 | ( | ) |
This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).
Globals:
Definition at line 652 of file adaptmatch.cpp.
void tesseract::Classify::SettupPass2 | ( | ) |
This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.
Globals:
Definition at line 669 of file adaptmatch.cpp.
|
static |
|
inline |
Definition at line 111 of file classify.h.
int tesseract::Classify::ShapeIDToClassID | ( | int | shape_id | ) | const |
Definition at line 2220 of file adaptmatch.cpp.
void tesseract::Classify::ShowBestMatchFor | ( | int | shape_id, |
const INT_FEATURE_STRUCT * | features, | ||
int | num_features | ||
) |
This routine displays debug information for the best config of the given shape_id for the given set of features.
shape_id | classifier id to work with |
features | features of the unknown character |
num_features | Number of features in the features array. |
Definition at line 2159 of file adaptmatch.cpp.
void tesseract::Classify::ShowMatchDisplay | ( | ) |
This routine sends the shapes in the global display lists to the match debugger window.
Globals:
Definition at line 962 of file intproto.cpp.
void tesseract::Classify::StartBackupAdaptiveClassifier | ( | ) |
Definition at line 629 of file adaptmatch.cpp.
void tesseract::Classify::SwitchAdaptiveClassifier | ( | ) |
Definition at line 613 of file adaptmatch.cpp.
bool tesseract::Classify::TempConfigReliable | ( | CLASS_ID | class_id, |
const TEMP_CONFIG & | config | ||
) |
Definition at line 2236 of file adaptmatch.cpp.
Definition at line 2273 of file adaptmatch.cpp.
void tesseract::Classify::WriteAdaptedTemplates | ( | FILE * | File, |
ADAPT_TEMPLATES | Templates | ||
) |
This routine saves Templates to File in a binary format.
File | open text file to write Templates to |
Templates | set of adapted templates to write to File |
Definition at line 453 of file adaptive.cpp.
void tesseract::Classify::WriteIntTemplates | ( | FILE * | File, |
INT_TEMPLATES | Templates, | ||
const UNICHARSET & | target_unicharset | ||
) |
This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing.
File | open file to write templates to |
Templates | templates to save into File |
target_unicharset | the UNICHARSET to use |
Definition at line 1017 of file intproto.cpp.
bool tesseract::Classify::WriteTRFile | ( | const STRING & | filename | ) |
Definition at line 98 of file blobclass.cpp.
ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates = nullptr |
Definition at line 515 of file classify.h.
BIT_VECTOR tesseract::Classify::AllConfigsOff = nullptr |
Definition at line 524 of file classify.h.
BIT_VECTOR tesseract::Classify::AllConfigsOn = nullptr |
Definition at line 523 of file classify.h.
bool tesseract::Classify::allow_blob_division = true |
"Use divisible blobs chopping"
Definition at line 423 of file classify.h.
BIT_VECTOR tesseract::Classify::AllProtosOn = nullptr |
Definition at line 522 of file classify.h.
ADAPT_TEMPLATES tesseract::Classify::BackupAdaptedTemplates = nullptr |
Definition at line 519 of file classify.h.
double tesseract::Classify::certainty_scale = 20.0 |
"Certainty scaling factor"
Definition at line 473 of file classify.h.
int tesseract::Classify::classify_adapt_feature_threshold = 230 |
"Threshold for good features during adaptive 0-255"
Definition at line 483 of file classify.h.
int tesseract::Classify::classify_adapt_proto_threshold = 230 |
"Threshold for good protos during adaptive 0-255"
Definition at line 481 of file classify.h.
double tesseract::Classify::classify_adapted_pruning_factor = 2.5 |
"Prune poor adapted results this much worse than best result"
Definition at line 477 of file classify.h.
double tesseract::Classify::classify_adapted_pruning_threshold = -1.0 |
"Threshold at which classify_adapted_pruning_factor starts"
Definition at line 479 of file classify.h.
bool tesseract::Classify::classify_bln_numeric_mode = 0 |
"Assume the input is numbers [0-9]."
Definition at line 508 of file classify.h.
double tesseract::Classify::classify_char_norm_range = 0.2 |
"Character Normalization Range ..."
Definition at line 436 of file classify.h.
double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0 |
"Exclude fragments that do not match any whole character" " with at least this certainty"
Definition at line 489 of file classify.h.
int tesseract::Classify::classify_class_pruner_multiplier = 15 |
"Class Pruner Multiplier 0-255: "
Definition at line 501 of file classify.h.
int tesseract::Classify::classify_class_pruner_threshold = 229 |
"Class Pruner Threshold 0-255"
Definition at line 499 of file classify.h.
int tesseract::Classify::classify_cp_cutoff_strength = 7 |
"Class Pruner CutoffStrength: "
Definition at line 503 of file classify.h.
bool tesseract::Classify::classify_debug_character_fragments = false |
"Bring up graphical debugging windows for fragments training"
Definition at line 491 of file classify.h.
int tesseract::Classify::classify_debug_level = 0 |
"Classify debug level"
Definition at line 430 of file classify.h.
bool tesseract::Classify::classify_enable_adaptive_debugger = 0 |
"Enable match debugger"
Definition at line 450 of file classify.h.
bool tesseract::Classify::classify_enable_adaptive_matcher = 1 |
"Enable adaptive classifier"
Definition at line 445 of file classify.h.
bool tesseract::Classify::classify_enable_learning = true |
"Enable adaptive classifier"
Definition at line 429 of file classify.h.
int tesseract::Classify::classify_integer_matcher_multiplier = 10 |
"Integer Matcher Multiplier 0-255: "
Definition at line 505 of file classify.h.
char* tesseract::Classify::classify_learn_debug_str = "" |
"Class str to debug learning"
Definition at line 495 of file classify.h.
int tesseract::Classify::classify_learning_debug_level = 0 |
"Learning Debug Level: "
Definition at line 455 of file classify.h.
double tesseract::Classify::classify_max_certainty_margin = 5.5 |
"Veto difference between classifier certainties"
Definition at line 440 of file classify.h.
double tesseract::Classify::classify_max_rating_ratio = 1.5 |
"Veto ratio between classifier ratings"
Definition at line 438 of file classify.h.
double tesseract::Classify::classify_misfit_junk_penalty = 0.0 |
"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"
Definition at line 471 of file classify.h.
bool tesseract::Classify::classify_nonlinear_norm = 0 |
"Non-linear stroke-density normalization"
Definition at line 452 of file classify.h.
int tesseract::Classify::classify_norm_method = character |
"Normalization Method ..."
Definition at line 434 of file classify.h.
bool tesseract::Classify::classify_save_adapted_templates = 0 |
"Save adapted templates to a file"
Definition at line 449 of file classify.h.
bool tesseract::Classify::classify_use_pre_adapted_templates = 0 |
"Use pre-adapted classifier templates"
Definition at line 447 of file classify.h.
bool tesseract::Classify::disable_character_fragments = true |
"Do not include character fragments in the" " results of the classifier"
Definition at line 486 of file classify.h.
bool tesseract::Classify::EnableLearning = true |
Definition at line 577 of file classify.h.
|
protected |
Definition at line 541 of file classify.h.
UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_ |
Definition at line 529 of file classify.h.
UnicityTable<FontSet> tesseract::Classify::fontset_table_ |
Definition at line 537 of file classify.h.
|
protected |
Definition at line 540 of file classify.h.
double tesseract::Classify::matcher_avg_noise_size = 12.0 |
"Avg. noise blob length: "
Definition at line 461 of file classify.h.
double tesseract::Classify::matcher_bad_match_pad = 0.15 |
"Bad Match Pad (0-1)"
Definition at line 459 of file classify.h.
double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015 |
"Maximum angle delta for prototype clustering"
Definition at line 468 of file classify.h.
int tesseract::Classify::matcher_debug_flags = 0 |
"Matcher Debug Flags"
Definition at line 454 of file classify.h.
int tesseract::Classify::matcher_debug_level = 0 |
"Matcher Debug Level"
Definition at line 453 of file classify.h.
bool tesseract::Classify::matcher_debug_separate_windows = false |
"Use two different windows for debugging the matching: " "One for the protos and one for the features."
Definition at line 494 of file classify.h.
double tesseract::Classify::matcher_good_threshold = 0.125 |
"Good Match (0-1)"
Definition at line 456 of file classify.h.
int tesseract::Classify::matcher_min_examples_for_prototyping = 3 |
"Reliable Config Threshold"
Definition at line 464 of file classify.h.
double tesseract::Classify::matcher_perfect_threshold = 0.02 |
"Perfect Match (0-1)"
Definition at line 458 of file classify.h.
int tesseract::Classify::matcher_permanent_classes_min = 1 |
"Min # of permanent classes"
Definition at line 462 of file classify.h.
double tesseract::Classify::matcher_rating_margin = 0.1 |
"New template margin (0-1)"
Definition at line 460 of file classify.h.
double tesseract::Classify::matcher_reliable_adaptive_result = 0.0 |
"Great Match (0-1)"
Definition at line 457 of file classify.h.
int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5 |
"Enable adaption even if the ambiguities have not been seen"
Definition at line 466 of file classify.h.
NORM_PROTOS* tesseract::Classify::NormProtos = nullptr |
Definition at line 527 of file classify.h.
INT_TEMPLATES tesseract::Classify::PreTrainedTemplates = nullptr |
Definition at line 514 of file classify.h.
bool tesseract::Classify::prioritize_division = false |
"Prioritize blob division over chopping"
Definition at line 428 of file classify.h.
double tesseract::Classify::rating_scale = 1.5 |
"Rating scaling factor"
Definition at line 472 of file classify.h.
|
protected |
Definition at line 546 of file classify.h.
double tesseract::Classify::speckle_large_max_size = 0.30 |
"Max large speckle size"
Definition at line 509 of file classify.h.
double tesseract::Classify::speckle_rating_penalty = 10.0 |
"Penalty to add to worst rating for noise"
Definition at line 511 of file classify.h.
BIT_VECTOR tesseract::Classify::TempProtoMask = nullptr |
Definition at line 525 of file classify.h.
bool tesseract::Classify::tess_bn_matching = 0 |
"Baseline Normalized Matching"
Definition at line 444 of file classify.h.
bool tesseract::Classify::tess_cn_matching = 0 |
"Character Normalized Matching"
Definition at line 443 of file classify.h.
double tesseract::Classify::tessedit_class_miss_scale = 0.00390625 |
"Scale factor for features not used"
Definition at line 475 of file classify.h.