|
tesseract
5.0.0-alpha-619-ge9db
|
#include <classify.h>
|
| | Classify () |
| |
| | ~Classify () override |
| |
| virtual Dict & | getDict () |
| |
| const ShapeTable * | shape_table () const |
| |
| void | SetStaticClassifier (ShapeClassifier *static_classifier) |
| |
| void | AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices) |
| |
| bool | LargeSpeckle (const TBLOB &blob) |
| |
| ADAPT_TEMPLATES | NewAdaptedTemplates (bool InitFromUnicharset) |
| |
| int | GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId) |
| |
| int | PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results) |
| |
| void | ReadNewCutoffs (TFile *fp, uint16_t *Cutoffs) |
| |
| void | PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) |
| |
| void | WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) |
| |
| ADAPT_TEMPLATES | ReadAdaptedTemplates (TFile *File) |
| |
| float | ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch) |
| |
| void | FreeNormProtos () |
| |
| NORM_PROTOS * | ReadNormProtos (TFile *fp) |
| |
| void | ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class) |
| |
| INT_TEMPLATES | CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset) |
| |
| void | LearnWord (const char *fontname, WERD_RES *word) |
| |
| void | LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word) |
| |
| void | InitAdaptiveClassifier (TessdataManager *mgr) |
| |
| void | InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) |
| |
| void | AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results) |
| |
| void | MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results) |
| |
| void | ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results) |
| |
| double | ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors) |
| |
| void | ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) |
| |
| void | AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results) |
| |
| int | GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) |
| |
| void | DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results) |
| |
| PROTO_ID | MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) |
| |
| int | MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) |
| |
| void | MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob) |
| |
| void | PrintAdaptiveMatchResults (const ADAPT_RESULTS &results) |
| |
| void | RemoveExtraPuncs (ADAPT_RESULTS *Results) |
| |
| void | RemoveBadMatches (ADAPT_RESULTS *Results) |
| |
| void | SetAdaptiveThreshold (float Threshold) |
| |
| void | ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features) |
| |
| STRING | ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const |
| |
| int | ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const |
| |
| int | ShapeIDToClassID (int shape_id) const |
| |
| UNICHAR_ID * | BaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) |
| |
| int | CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results) |
| |
| int | CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results) |
| |
| UNICHAR_ID * | GetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass) |
| |
| void | DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results) |
| |
| void | AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates) |
| |
| void | DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class) |
| |
| bool | AdaptableWord (WERD_RES *word) |
| |
| void | EndAdaptiveClassifier () |
| |
| void | SettupPass1 () |
| |
| void | SettupPass2 () |
| |
| void | AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices) |
| |
| void | ClassifyAsNoise (ADAPT_RESULTS *Results) |
| |
| void | ResetAdaptiveClassifierInternal () |
| |
| void | SwitchAdaptiveClassifier () |
| |
| void | StartBackupAdaptiveClassifier () |
| |
| int | GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array) |
| |
| void | ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array) |
| |
| bool | TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config) |
| |
| void | UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob) |
| |
| bool | AdaptiveClassifierIsFull () const |
| |
| bool | AdaptiveClassifierIsEmpty () const |
| |
| bool | LooksLikeGarbage (TBLOB *blob) |
| |
| void | RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox) |
| |
| void | ClearCharNormArray (uint8_t *char_norm_array) |
| |
| void | ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array) |
| |
| void | ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) |
| |
| INT_TEMPLATES | ReadIntTemplates (TFile *fp) |
| |
| void | WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset) |
| |
| CLASS_ID | GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id) |
| |
| void | ShowMatchDisplay () |
| |
| UnicityTable< FontInfo > & | get_fontinfo_table () |
| |
| const UnicityTable< FontInfo > & | get_fontinfo_table () const |
| |
| UnicityTable< FontSet > & | get_fontset_table () |
| |
| void | NormalizeOutlines (LIST Outlines, float *XScale, float *YScale) |
| |
| FEATURE_SET | ExtractOutlineFeatures (TBLOB *Blob) |
| |
| FEATURE_SET | ExtractPicoFeatures (TBLOB *Blob) |
| |
| FEATURE_SET | ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) |
| |
| FEATURE_SET | ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) |
| |
| void | LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) |
| |
| bool | WriteTRFile (const STRING &filename) |
| |
| | CCStruct ()=default |
| |
| | ~CCStruct () override |
| |
| | CCUtil () |
| |
| virtual | ~CCUtil () |
| |
| void | main_setup (const char *argv0, const char *basename) |
| | CCUtil::main_setup - set location of tessdata and name of image. More...
|
| |
| ParamsVectors * | params () |
| |
Definition at line 103 of file classify.h.
◆ Classify()
| tesseract::Classify::Classify |
( |
| ) |
|
Definition at line 60 of file classify.cpp.
64 "Prioritize blob division over chopping", this->
params()),
72 "Character Normalization Range ...", this->
params()),
74 "Veto ratio between classifier ratings", this->
params()),
76 "Veto difference between classifier certainties",
83 "Enable adaptive classifier", this->
params()),
85 "Use pre-adapted classifier templates", this->
params()),
87 "Save adapted templates to a file", this->
params()),
91 "Non-linear stroke-density normalization", this->
params()),
111 "Reliable Config Threshold", this->
params()),
113 "Enable adaption even if the ambiguities have not been seen",
116 "Maximum angle delta for prototype clustering",
119 "Penalty to apply when a non-alnum is vertically out of "
120 "its expected textline position",
126 "Scale factor for features not used", this->
params()),
129 "Prune poor adapted results this much worse than best result",
132 "Threshold at which classify_adapted_pruning_factor starts",
135 "Threshold for good protos during adaptive 0-255",
138 "Threshold for good features during adaptive 0-255",
141 "Do not include character fragments in the"
142 " results of the classifier",
146 "Exclude fragments that do not look like whole"
147 " characters from training and adaption",
150 "Bring up graphical debugging windows for fragments training",
153 "Use two different windows for debugging the matching: "
154 "One for the protos and one for the features.",
159 "Class Pruner Threshold 0-255", this->
params()),
161 "Class Pruner Multiplier 0-255: ", this->
params()),
163 "Class Pruner CutoffStrength: ", this->
params()),
165 "Integer Matcher Multiplier 0-255: ", this->
params()),
167 "Assume the input is numbers [0-9].", this->
params()),
171 "Penalty to add to worst rating for noise", this->
params()),
174 using namespace std::placeholders;
◆ ~Classify()
| tesseract::Classify::~Classify |
( |
| ) |
|
|
override |
Definition at line 183 of file classify.cpp.
185 delete learn_debug_win_;
186 delete learn_fragmented_word_debug_win_;
187 delete learn_fragments_debug_win_;
◆ AdaptableWord()
| bool tesseract::Classify::AdaptableWord |
( |
WERD_RES * |
word | ) |
|
Return true if the specified word is acceptable for adaptation.
Globals: none
- Parameters
-
- Returns
- true or false
Definition at line 821 of file adaptmatch.cpp.
824 float adaptable_score =
827 BestChoiceLength > 0 &&
◆ AdaptiveClassifier()
| void tesseract::Classify::AdaptiveClassifier |
( |
TBLOB * |
Blob, |
|
|
BLOB_CHOICE_LIST * |
Choices |
|
) |
| |
This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.
It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.
Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.
This routine also performs some simple speckle filtering.
- Parameters
-
| Blob | blob to be classified |
| [out] | Choices | List of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher. |
Definition at line 191 of file adaptmatch.cpp.
192 assert(Choices !=
nullptr);
203 Results->ComputeBest();
216 #ifndef GRAPHICS_DISABLED
◆ AdaptiveClassifierIsEmpty()
| bool tesseract::Classify::AdaptiveClassifierIsEmpty |
( |
| ) |
const |
|
inline |
◆ AdaptiveClassifierIsFull()
| bool tesseract::Classify::AdaptiveClassifierIsFull |
( |
| ) |
const |
|
inline |
Definition at line 325 of file classify.h.
325 {
return NumAdaptationsFailed > 0; }
◆ AdaptToChar()
| void tesseract::Classify::AdaptToChar |
( |
TBLOB * |
Blob, |
|
|
CLASS_ID |
ClassId, |
|
|
int |
FontinfoId, |
|
|
float |
Threshold, |
|
|
ADAPT_TEMPLATES |
adaptive_templates |
|
) |
| |
- Parameters
-
| Blob | blob to add to templates for ClassId |
| ClassId | class to add blob to |
| FontinfoId | font information from pre-trained templates |
| Threshold | minimum match rating to existing template |
| adaptive_templates | current set of adapted templates |
Globals:
- AllProtosOn dummy mask to match against all protos
- AllConfigsOn dummy mask to match against all configs
Definition at line 853 of file adaptmatch.cpp.
869 Class = adaptive_templates->
Class[ClassId];
870 assert(Class !=
nullptr);
877 if (NumFeatures <= 0) {
883 for (
int cfg = 0; cfg < IClass->
NumConfigs; ++cfg) {
885 SET_BIT(MatchingFontConfigs, cfg);
891 NumFeatures, IntFeatures,
894 FreeBitVector(MatchingFontConfigs);
898 if (1.0f - int_result.
rating <= Threshold) {
901 tprintf(
"Found good match to perm config %d = %4.1f%%.\n",
913 tprintf(
"Increasing reliability of temp config %d to %d.\n",
922 tprintf(
"Found poor match to temp config %d = %4.1f%%.\n",
929 NumFeatures, IntFeatures, FloatFeatures);
930 if (NewTempConfigId >= 0 &&
932 MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
936 #ifndef GRAPHICS_DISABLED
◆ AddLargeSpeckleTo()
| void tesseract::Classify::AddLargeSpeckleTo |
( |
int |
blob_length, |
|
|
BLOB_CHOICE_LIST * |
choices |
|
) |
| |
Definition at line 201 of file classify.cpp.
202 BLOB_CHOICE_IT bc_it(choices);
207 if (!choices->empty() && blob_length > 0) {
208 bc_it.move_to_last();
218 -1, 0.0f, FLT_MAX, 0,
220 bc_it.add_to_end(blob_choice);
◆ AddNewResult()
This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.
Globals:
- Parameters
-
| new_result | new result to add |
| [out] | results | results to add new result to |
Definition at line 994 of file adaptmatch.cpp.
996 int old_match = FindScoredUnichar(new_result.
unichar_id, *results);
998 if (new_result.
rating + matcher_bad_match_pad < results->best_rating ||
999 (old_match < results->match.size() &&
1000 new_result.
rating <= results->
match[old_match].rating))
1006 if (old_match < results->match.size()) {
1007 results->
match[old_match].rating = new_result.
rating;
◆ AmbigClassifier()
This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.
Globals:
- Parameters
-
| blob | blob to be classified |
| templates | built-in templates to classify against |
| classes | adapted class templates |
| ambiguities | array of unichar id's to match against |
| [out] | results | place to put match results |
| int_features | |
| fx_info | |
Definition at line 1045 of file adaptmatch.cpp.
1053 if (int_features.
empty())
return;
1065 while (*ambiguities >= 0) {
1071 int_features.
size(), &int_features[0],
1079 CharNormArray, &int_result, results);
1082 delete [] CharNormArray;
◆ BaselineClassifier()
This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.
Globals:
- BaselineCutoffs expected num features for each class
- Parameters
-
| Blob | blob to be classified |
| Templates | current set of adapted templates |
| Results | place to put match results |
| int_features | |
| fx_info | |
- Returns
- Array of possible ambiguous chars that should be checked.
Definition at line 1265 of file adaptmatch.cpp.
1269 if (int_features.
empty())
return nullptr;
1275 CharNormArray, BaselineCutoffs, &Results->
CPResults);
1285 delete [] CharNormArray;
1290 return Templates->
Class[ClassId]->
◆ CharNormClassifier()
This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.
- Parameters
-
| blob | blob to be classified |
| sample | templates to classify unknown against |
| adapt_results | place to put match results |
Globals:
- CharNormCutoffs expected num features for each class
- AllProtosOn mask that enables all protos
- AllConfigsOn mask that enables all configs
Definition at line 1311 of file adaptmatch.cpp.
1319 -1, &unichar_results);
1321 for (
int r = 0; r < unichar_results.size(); ++r) {
1324 return sample.num_features();
◆ CharNormTrainingSample()
Definition at line 1329 of file adaptmatch.cpp.
1335 adapt_results->Initialize();
1337 uint32_t num_features =
sample.num_features();
1347 auto* pruner_norm_array =
new uint8_t[num_pruner_classes];
1348 adapt_results->BlobLength =
1355 shape_table_ !=
nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1356 &adapt_results->CPResults);
1357 delete [] pruner_norm_array;
1358 if (keep_this >= 0) {
1359 adapt_results->CPResults[0].Class = keep_this;
1360 adapt_results->CPResults.truncate(1);
1364 for (
int i = 0; i < adapt_results->CPResults.size(); ++i) {
1365 int class_id = adapt_results->CPResults[i].Class;
1367 UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1374 blob_box, adapt_results->CPResults, adapt_results);
1376 for (
int i = 0; i < adapt_results->match.size(); i++) {
1377 results->
push_back(adapt_results->match[i]);
1381 delete [] char_norm_array;
1382 delete adapt_results;
1383 return num_features;
◆ ClassAndConfigIDToFontOrShapeID()
| int tesseract::Classify::ClassAndConfigIDToFontOrShapeID |
( |
int |
class_id, |
|
|
int |
int_result_config |
|
) |
| const |
Definition at line 2207 of file adaptmatch.cpp.
2211 if (font_set_id < 0)
2212 return kBlankFontinfoId;
2214 ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2215 return fs.configs[int_result_config];
◆ ClassIDToDebugStr()
◆ ClassifyAsNoise()
| void tesseract::Classify::ClassifyAsNoise |
( |
ADAPT_RESULTS * |
results | ) |
|
This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.
- Parameters
-
| results | results to add noise classification to |
Globals:
- matcher_avg_noise_size avg. length of a noise blob
Definition at line 1399 of file adaptmatch.cpp.
1402 rating /= 1.0 + rating;
◆ ClearCharNormArray()
| void tesseract::Classify::ClearCharNormArray |
( |
uint8_t * |
char_norm_array | ) |
|
For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.
Globals:
- Parameters
-
| char_norm_array | array to be cleared |
Definition at line 44 of file float2int.cpp.
45 memset(char_norm_array, 0,
sizeof(*char_norm_array) *
unicharset.
size());
◆ ComputeCharNormArrays()
Definition at line 1698 of file adaptmatch.cpp.
1703 if (pruner_array !=
nullptr) {
1707 memset(pruner_array, UINT8_MAX,
1708 templates->
NumClasses *
sizeof(pruner_array[0]));
1711 for (
int id = 0;
id < templates->
NumClasses; ++id) {
1714 for (
int config = 0; config < fs.size; ++config) {
1716 for (
int c = 0; c < shape.size(); ++c) {
1717 if (char_norm_array[shape[c].unichar_id] < pruner_array[
id])
1718 pruner_array[id] = char_norm_array[shape[c].unichar_id];
◆ ComputeCorrectedRating()
| double tesseract::Classify::ComputeCorrectedRating |
( |
bool |
debug, |
|
|
int |
unichar_id, |
|
|
double |
cp_rating, |
|
|
double |
im_rating, |
|
|
int |
feature_misses, |
|
|
int |
bottom, |
|
|
int |
top, |
|
|
int |
blob_length, |
|
|
int |
matcher_multiplier, |
|
|
const uint8_t * |
cn_factors |
|
) |
| |
Definition at line 1202 of file adaptmatch.cpp.
1210 cn_factors[unichar_id],
1211 matcher_multiplier);
1213 double vertical_penalty = 0.0;
1218 int min_bottom, max_bottom, min_top, max_top;
1220 &min_top, &max_top);
1222 tprintf(
"top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1223 top, min_top, max_top, bottom, min_bottom, max_bottom);
1225 if (top < min_top || top > max_top ||
1226 bottom < min_bottom || bottom > max_bottom) {
1230 double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1234 tprintf(
"%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1238 (1.0 - im_rating) * 100.0,
1239 (cn_corrected - (1.0 - im_rating)) * 100.0,
1240 cn_factors[unichar_id],
1241 miss_penalty * 100.0,
1242 vertical_penalty * 100.0);
◆ ComputeIntCharNormArray()
| void tesseract::Classify::ComputeIntCharNormArray |
( |
const FEATURE_STRUCT & |
norm_feature, |
|
|
uint8_t * |
char_norm_array |
|
) |
| |
For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.
Globals:
- PreTrainedTemplates current set of built-in templates
- Parameters
-
| norm_feature | character normalization feature |
| [out] | char_norm_array | place to put results of size unicharset.size() |
Definition at line 62 of file float2int.cpp.
65 if (i < PreTrainedTemplates->NumClasses) {
◆ ComputeIntFeatures()
This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.
Globals:
- Parameters
-
| Features | floating point pico-features to be converted |
| [out] | IntFeatures | array to put converted features into |
Definition at line 90 of file float2int.cpp.
99 for (
int Fid = 0; Fid < Features->
NumFeatures; Fid++) {
108 IntFeatures[Fid].CP_misses = 0;
◆ ComputeNormMatch()
| float tesseract::Classify::ComputeNormMatch |
( |
CLASS_ID |
ClassId, |
|
|
const FEATURE_STRUCT & |
feature, |
|
|
bool |
DebugMatch |
|
) |
| |
This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match.
- Parameters
-
| ClassId | id of class to match against |
| feature | character normalization feature |
| DebugMatch | controls dump of debug info |
Globals: NormProtos character normalization prototypes
- Returns
- Best match rating for Feature against protos of ClassId.
Definition at line 93 of file normmatch.cpp.
117 return (1.0 - NormEvidenceOf(Match));
133 tprintf(
"YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
140 tprintf(
"Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
147 tprintf(
"Width: Proto=%g, Delta=%g, Var=%g\n",
155 tprintf(
"Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
157 NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
160 if (Match < BestMatch)
165 return 1.0 - NormEvidenceOf(BestMatch);
◆ ConvertMatchesToChoices()
| void tesseract::Classify::ConvertMatchesToChoices |
( |
const DENORM & |
denorm, |
|
|
const TBOX & |
box, |
|
|
ADAPT_RESULTS * |
Results, |
|
|
BLOB_CHOICE_LIST * |
Choices |
|
) |
| |
The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.
Definition at line 1413 of file adaptmatch.cpp.
1416 assert(Choices !=
nullptr);
1419 BLOB_CHOICE_IT temp_it;
1420 bool contains_nonfrag =
false;
1421 temp_it.set_to_list(Choices);
1422 int choices_length = 0;
1435 float best_certainty = -FLT_MAX;
1436 for (
int i = 0; i < Results->
match.
size(); i++) {
1438 bool adapted = result.
adapted;
1440 if (temp_it.length()+1 == max_matches &&
1441 !contains_nonfrag && current_is_frag) {
1453 Rating = Certainty = (1.0f - result.
rating);
1462 if (Certainty > best_certainty) {
1464 }
else if (adapted &&
1469 float min_xheight, max_xheight, yshift;
1471 &min_xheight, &max_xheight, &yshift);
1475 min_xheight, max_xheight, yshift,
1479 temp_it.add_to_end(choice);
1480 contains_nonfrag |= !current_is_frag;
1482 if (choices_length >= max_matches)
break;
◆ ConvertProto()
| void tesseract::Classify::ConvertProto |
( |
PROTO |
Proto, |
|
|
int |
ProtoId, |
|
|
INT_CLASS |
Class |
|
) |
| |
This routine converts Proto to integer format and installs it as ProtoId in Class.
- Parameters
-
| Proto | floating-pt proto to be converted to integer format |
| ProtoId | id of proto |
| Class | integer class to add converted proto to |
Definition at line 487 of file intproto.cpp.
492 assert(ProtoId < Class->NumProtos);
496 Param = Proto->
A * 128;
499 Param = -Proto->
B * 256;
502 Param = Proto->
C * 128;
505 Param = Proto->
Angle * 256;
506 if (Param < 0 || Param >= 256)
509 P->
Angle = static_cast<uint8_t>(Param);
515 cprintf(
"Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
◆ CreateIntTemplates()
This routine converts from the old floating point format to the new integer format.
- Parameters
-
| FloatProtos | prototypes in old floating pt format |
| target_unicharset | the UNICHARSET to use |
- Returns
- New set of training templates in integer format.
- Note
- Globals: none
Definition at line 526 of file intproto.cpp.
539 for (ClassId = 0; ClassId < target_unicharset.
size(); ClassId++) {
540 FClass = &(FloatProtos[ClassId]);
542 strcmp(target_unicharset.
id_to_unichar(ClassId),
" ") != 0) {
543 cprintf(
"Warning: no protos/configs for %s in CreateIntTemplates()\n",
551 for (
int i = 0; i < fs.
size; ++i) {
562 for (ProtoId = 0; ProtoId < FClass->
NumProtos; ProtoId++) {
570 for (ConfigId = 0; ConfigId < FClass->
NumConfigs; ConfigId++) {
575 return (IntTemplates);
◆ DebugAdaptiveClassifier()
| void tesseract::Classify::DebugAdaptiveClassifier |
( |
TBLOB * |
blob, |
|
|
ADAPT_RESULTS * |
Results |
|
) |
| |
- Parameters
-
| blob | blob whose classification is being debugged |
| Results | results of match being debugged |
Globals: none
Definition at line 1497 of file adaptmatch.cpp.
1499 if (static_classifier_ ==
nullptr)
return;
1504 if (
sample ==
nullptr)
return;
◆ DisplayAdaptedChar()
Definition at line 946 of file adaptmatch.cpp.
947 #ifndef GRAPHICS_DISABLED
953 if (
sample ==
nullptr)
return;
957 bl_features.
size(), &bl_features[0],
960 tprintf(
"Best match to temp config %d = %4.1f%%.\n",
961 int_result.config, int_result.rating * 100.0);
964 ConfigMask = 1 << int_result.config;
967 bl_features.
size(), &bl_features[0],
◆ DoAdaptiveMatch()
This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.
- Parameters
-
| Blob | blob to be classified |
| Results | place to put match results |
Globals:
- PreTrainedTemplates built-in training templates
- AdaptedTemplates templates adapted for this page
- matcher_reliable_adaptive_result rating limit for a great match
Definition at line 1530 of file adaptmatch.cpp.
1538 if (
sample ==
nullptr)
return;
1542 if (static_classifier_ ==
nullptr) {
◆ EndAdaptiveClassifier()
| void tesseract::Classify::EndAdaptiveClassifier |
( |
| ) |
|
This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.
Globals:
Definition at line 459 of file adaptmatch.cpp.
466 File = fopen (Filename.
c_str(),
"wb");
468 cprintf (
"Unable to save adapted templates to %s!\n", Filename.
c_str());
470 cprintf (
"\nSaving adapted templates to %s ...", Filename.
c_str());
505 delete static_classifier_;
506 static_classifier_ =
nullptr;
◆ ExpandShapesAndApplyCorrections()
| void tesseract::Classify::ExpandShapesAndApplyCorrections |
( |
ADAPT_CLASS * |
classes, |
|
|
bool |
debug, |
|
|
int |
class_id, |
|
|
int |
bottom, |
|
|
int |
top, |
|
|
float |
cp_rating, |
|
|
int |
blob_length, |
|
|
int |
matcher_multiplier, |
|
|
const uint8_t * |
cn_factors, |
|
|
UnicharRating * |
int_result, |
|
|
ADAPT_RESULTS * |
final_results |
|
) |
| |
Definition at line 1128 of file adaptmatch.cpp.
1133 if (classes !=
nullptr) {
1136 for (
int f = 0; f < int_result->
fonts.size(); ++f) {
1137 int_result->
fonts[f].fontinfo_id =
1143 for (
int f = 0; f < int_result->
fonts.size(); ++f) {
1144 int_result->
fonts[f].fontinfo_id =
1146 int_result->
fonts[f].fontinfo_id);
1157 for (
int f = 0; f < int_result->
fonts.size(); ++f) {
1158 int shape_id = int_result->
fonts[f].fontinfo_id;
1160 for (
int c = 0; c < shape.size(); ++c) {
1161 int unichar_id = shape[c].unichar_id;
1165 for (r = 0; r < mapped_results.
size() &&
1166 mapped_results[r].unichar_id != unichar_id; ++r) {}
1167 if (r == mapped_results.
size()) {
1169 mapped_results[r].unichar_id = unichar_id;
1170 mapped_results[r].fonts.
truncate(0);
1172 for (
int i = 0; i < shape[c].font_ids.size(); ++i) {
1178 for (
int m = 0; m < mapped_results.
size(); ++m) {
1179 mapped_results[m].rating =
1181 cp_rating, int_result->
rating,
1183 blob_length, matcher_multiplier, cn_factors);
1193 bottom, top, blob_length,
1194 matcher_multiplier, cn_factors);
◆ ExtractFeatures()
Definition at line 440 of file intfx.cpp.
447 DENORM bl_denorm, cn_denorm;
449 &bl_denorm, &cn_denorm, results);
450 if (outline_cn_counts !=
nullptr)
455 EDGEPT* loop_pt = ol->FindBestStartPt();
457 if (pt ==
nullptr)
continue;
463 last_pt = last_pt->
next;
464 }
while (last_pt != loop_pt && !last_pt->
IsHidden() &&
466 last_pt = last_pt->
prev;
474 }
while ((pt = pt->
next) != loop_pt);
475 if (outline_cn_counts !=
nullptr)
◆ ExtractIntCNFeatures()
- Parameters
-
| blob | blob to extract features from |
| fx_info | |
- Returns
- Integer character-normalized features for blob.
Definition at line 216 of file picofeat.cpp.
222 blob,
false, &local_fx_info, &bl_features);
223 if (
sample ==
nullptr)
return nullptr;
225 uint32_t num_features =
sample->num_features();
228 for (uint32_t f = 0; f < num_features; ++f) {
◆ ExtractIntGeoFeatures()
- Parameters
-
| blob | blob to extract features from |
| fx_info | |
- Returns
- Geometric (top/bottom/width) features for blob.
Definition at line 246 of file picofeat.cpp.
252 blob,
false, &local_fx_info, &bl_features);
253 if (
sample ==
nullptr)
return nullptr;
◆ ExtractOutlineFeatures()
Convert each segment in the outline to a feature and return the features.
- Parameters
-
| Blob | blob to extract pico-features from |
- Returns
- Outline-features for Blob.
- Note
- Globals: none
Definition at line 54 of file outfeat.cpp.
◆ ExtractPicoFeatures()
Operation: Dummy for now.
Globals:
- classify_norm_method normalization method currently specified
- Parameters
-
| Blob | blob to extract pico-features from |
- Returns
- Pico-features for Blob.
Definition at line 62 of file picofeat.cpp.
65 LIST RemainingOutlines;
73 RemainingOutlines = Outlines;
◆ FreeNormProtos()
| void tesseract::Classify::FreeNormProtos |
( |
| ) |
|
◆ get_fontinfo_table() [1/2]
◆ get_fontinfo_table() [2/2]
◆ get_fontset_table()
◆ GetAdaptiveFeatures()
This routine sets up the feature extractor to extract baseline normalized pico-features.
The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.
Globals: none
- Parameters
-
| Blob | blob to extract features from |
| [out] | IntFeatures | array to fill with integer features |
| [out] | FloatFeatures | place to return actual floating-pt features |
- Returns
- Number of pico-features returned (0 if an error occurred)
Definition at line 786 of file adaptmatch.cpp.
802 *FloatFeatures = Features;
◆ GetAmbiguities()
This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.
- Parameters
-
| Blob | blob to get classification ambiguities for |
| CorrectClass | correct class for Blob |
Globals:
- CurrentRatings used by qsort compare routine
- PreTrainedTemplates built-in templates
- Returns
- String containing all possible ambiguous classes.
Definition at line 1592 of file adaptmatch.cpp.
1598 Results->Initialize();
1616 Ambiguities =
new UNICHAR_ID[Results->match.size() + 1];
1617 if (Results->match.size() > 1 ||
1618 (Results->match.size() == 1 &&
1619 Results->match[0].unichar_id != CorrectClass)) {
1620 for (i = 0; i < Results->match.size(); i++)
1621 Ambiguities[i] = Results->match[i].unichar_id;
1622 Ambiguities[i] = -1;
1624 Ambiguities[0] = -1;
◆ GetCharNormFeature()
| int tesseract::Classify::GetCharNormFeature |
( |
const INT_FX_RESULT_STRUCT & |
fx_info, |
|
|
INT_TEMPLATES |
templates, |
|
|
uint8_t * |
pruner_norm_array, |
|
|
uint8_t * |
char_norm_array |
|
) |
| |
This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.
The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.
It then copies the char norm features into the IntFeatures array provided by the caller.
- Parameters
-
| templates | used to compute char norm adjustments |
| pruner_norm_array | Array of factors from blob normalization process |
| char_norm_array | array to fill with dummy char norm adjustments |
| fx_info | Globals: |
- Returns
- Number of features extracted or 0 if an error occurred.
Definition at line 1678 of file adaptmatch.cpp.
◆ GetClassToDebug()
| CLASS_ID tesseract::Classify::GetClassToDebug |
( |
const char * |
Prompt, |
|
|
bool * |
adaptive_on, |
|
|
bool * |
pretrained_on, |
|
|
int * |
shape_id |
|
) |
| |
This routine prompts the user with Prompt and waits for the user to enter something in the debug window.
- Parameters
-
| Prompt | prompt to print while waiting for input from window |
| adaptive_on | |
| pretrained_on | |
| shape_id | |
- Returns
- Character entered in the debug window.
- Note
- Globals: none
Definition at line 1256 of file intproto.cpp.
1262 int unichar_id = INVALID_UNICHAR_ID;
1271 *adaptive_on =
false;
1272 *pretrained_on =
true;
1273 if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
1277 tprintf(
"Shape %d, first unichar=%d, font=%d\n",
1278 *shape_id, unichar_id, font_id);
1283 tprintf(
"No shape table loaded!\n");
1289 *adaptive_on =
true;
1290 *pretrained_on =
false;
1293 *adaptive_on =
false;
1294 *pretrained_on =
true;
1296 *adaptive_on =
true;
1297 *pretrained_on =
true;
1309 tprintf(
"Char class '%s' not found in unicharset",
◆ getDict()
| virtual Dict& tesseract::Classify::getDict |
( |
| ) |
|
|
inlinevirtual |
◆ GetFontinfoId()
| int tesseract::Classify::GetFontinfoId |
( |
ADAPT_CLASS |
Class, |
|
|
uint8_t |
ConfigId |
|
) |
| |
◆ InitAdaptedClass()
This routine creates a new adapted class and uses Blob as the model for the first config in that class.
- Parameters
-
| Blob | blob to model new class after |
| ClassId | id of the class to be initialized |
| FontinfoId | font information inferred from pre-trained templates |
| Class | adapted class to be initialized |
| Templates | adapted templates to add new class to |
Globals:
Definition at line 693 of file adaptmatch.cpp.
720 BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
724 for (Fid = 0; Fid < Features->
NumFeatures; Fid++) {
730 Proto = &(TempProto->
Proto);
756 tprintf(
"Added new class '%s' with class id %d and %d protos.\n",
◆ InitAdaptiveClassifier()
This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be loaded. Should only be set to true if the necessary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates
Definition at line 527 of file adaptmatch.cpp.
543 tprintf(
"Error loading shape table!\n");
554 static_classifier_ =
new TessClassifier(
false,
this);
567 for (uint16_t& BaselineCutoff : BaselineCutoffs) {
577 if (!fp.Open(Filename.
c_str(),
nullptr)) {
580 cprintf(
"\nReading pre-adapted templates from %s ...\n",
588 BaselineCutoffs[i] = CharNormCutoffs[i];
◆ LargeSpeckle()
| bool tesseract::Classify::LargeSpeckle |
( |
const TBLOB & |
blob | ) |
|
◆ LearnBlob()
Definition at line 70 of file blobclass.cpp.
82 tr_file_data_ +=
"\n";
83 tr_file_data_ += fontname;
85 tr_file_data_ += blob_text;
86 tr_file_data_ +=
"\n";
91 tprintf(
"Blob learned was invalid!\n");
◆ LearnPieces()
| void tesseract::Classify::LearnPieces |
( |
const char * |
fontname, |
|
|
int |
start, |
|
|
int |
length, |
|
|
float |
threshold, |
|
|
CharSegmentationType |
segmentation, |
|
|
const char * |
correct_text, |
|
|
WERD_RES * |
word |
|
) |
| |
Definition at line 374 of file adaptmatch.cpp.
390 if (rotated_blob ==
nullptr)
393 #ifndef GRAPHICS_DISABLED
399 learn_debug_win_->
Update();
403 ASSERT_HOST(learn_fragments_debug_win_ !=
nullptr);
404 blob->
plot(learn_fragments_debug_win_,
406 learn_fragments_debug_win_->
Update();
408 #endif // GRAPHICS_DISABLED
410 if (fontname !=
nullptr) {
414 DENORM bl_denorm, cn_denorm;
417 &bl_denorm, &cn_denorm, &fx_info);
418 LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
421 int font_id = word->
fontinfo !=
nullptr
425 tprintf(
"Adapting to char = %s, thr= %g font_id= %d\n",
433 AdaptToChar(rotated_blob, class_id, font_id, threshold,
437 tprintf(
"Can't adapt to %s not in unicharset\n", correct_text);
439 if (rotated_blob != blob) {
◆ LearnWord()
| void tesseract::Classify::LearnWord |
( |
const char * |
fontname, |
|
|
WERD_RES * |
word |
|
) |
| |
Definition at line 250 of file adaptmatch.cpp.
252 if (word_len == 0)
return;
254 float* thresholds =
nullptr;
255 if (fontname ==
nullptr) {
261 tprintf(
"\n\nAdapting to word = %s\n",
263 thresholds =
new float[word_len];
271 #ifndef GRAPHICS_DISABLED
273 if (learn_fragmented_word_debug_win_ !=
nullptr) {
283 #endif // GRAPHICS_DISABLED
285 for (
int ch = 0; ch < word_len; ++ch) {
290 float threshold = thresholds !=
nullptr ? thresholds[ch] : 0.0f;
299 bool garbage =
false;
301 for (frag = 0; frag < word->
best_state[ch]; ++frag) {
312 for (frag = 0; frag < word->
best_state[ch]; ++frag) {
317 tokens[0].c_str(), frag, word->
best_state[ch],
321 for (
int i = 0; i < tokens.
size(); i++) {
322 full_string += tokens[i];
323 if (i != tokens.
size() - 1)
326 LearnPieces(fontname, start_blob + frag, 1, threshold,
362 delete [] thresholds;
◆ LooksLikeGarbage()
| bool tesseract::Classify::LooksLikeGarbage |
( |
TBLOB * |
blob | ) |
|
Definition at line 1633 of file adaptmatch.cpp.
1634 auto *ratings =
new BLOB_CHOICE_LIST();
1636 BLOB_CHOICE_IT ratings_it(ratings);
1642 for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1643 ratings_it.forward()) {
1647 float certainty = ratings_it.data()->certainty();
◆ MakeNewTemporaryConfig()
- Parameters
-
| Templates | adapted templates to add new config to |
| ClassId | class id to associate with new config |
| FontinfoId | font information inferred from pre-trained templates |
| NumFeatures | number of features in IntFeatures |
| Features | features describing model for new config |
| FloatFeatures | floating-pt representation of features |
- Returns
- The id of the new config created, a negative integer in case of error.
Definition at line 1740 of file adaptmatch.cpp.
1752 int MaxProtoId, OldMaxProtoId;
1764 Class = Templates->
Class[ClassId];
1767 ++NumAdaptationsFailed;
1769 cprintf(
"Cannot make new temporary config: maximum number exceeded.\n");
1776 NumFeatures, Features,
1782 for (i = 0; i < NumOldProtos; i++)
1786 NumFeatures, Features,
1794 ++NumAdaptationsFailed;
1796 cprintf(
"Cannot make new temp protos: maximum number exceeded.\n");
1807 cprintf(
"Making new temp config %d fontinfo id %d"
1808 " using %d old and %d new protos.\n",
1809 ConfigId,
Config->FontinfoId,
1810 NumOldProtos, MaxProtoId - OldMaxProtoId);
◆ MakeNewTempProtos()
This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.
- Parameters
-
| Features | floating-pt features describing new character |
| NumBadFeat | number of bad features to turn into protos |
| BadFeat | feature id's of bad features |
| IClass | integer class templates to add new protos to |
| Class | adapted class templates to add new protos to |
| TempProtoMask | proto mask to add new protos to |
Globals: none
- Returns
- Max proto id in class after all protos have been added.
Definition at line 1834 of file adaptmatch.cpp.
1846 float X1, X2, Y1, Y2;
1847 float A1, A2, AngleDelta;
1848 float SegmentLength;
1851 for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1852 ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1853 F1 = Features->
Features[*ProtoStart];
1858 for (ProtoEnd = ProtoStart + 1,
1862 F2 = Features->
Features[*ProtoEnd];
1867 AngleDelta = fabs(A1 - A2);
1868 if (AngleDelta > 0.5)
1869 AngleDelta = 1.0 - AngleDelta;
1872 fabs(X1 - X2) > SegmentLength ||
1873 fabs(Y1 - Y2) > SegmentLength)
1877 F2 = Features->
Features[*(ProtoEnd - 1)];
1887 Proto = &(TempProto->
Proto);
1892 Proto->
Length = SegmentLength;
1894 Proto->
X = (X1 + X2) / 2.0;
◆ MakePermanent()
- Parameters
-
| Templates | current set of adaptive templates |
| ClassId | class containing config to be made permanent |
| ConfigId | config to be made permanent |
| Blob | current blob being adapted to |
Globals: none
Definition at line 1920 of file adaptmatch.cpp.
1929 Class = Templates->
Class[ClassId];
1940 Perm->Ambigs = Ambigs;
1941 Perm->FontinfoId =
Config->FontinfoId;
1955 tprintf(
"Making config %d for %s (ClassId %d) permanent:"
1956 " fontinfo id %d, ambiguities '",
1957 ConfigId,
getDict().getUnicharset().debug_str(ClassId).c_str(),
1960 *AmbigsPointer >= 0; ++AmbigsPointer)
◆ MasterMatcher()
Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.
Definition at line 1088 of file adaptmatch.cpp.
1098 int top = blob_box.
top();
1099 int bottom = blob_box.
bottom();
1101 for (
int c = 0; c < results.
size(); c++) {
1102 CLASS_ID class_id = results[c].Class;
1111 num_features, features,
1118 matcher_multiplier, norm_factors,
1119 &int_result, final_results);
◆ NewAdaptedTemplates()
| ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates |
( |
bool |
InitFromUnicharset | ) |
|
Allocates memory for adapted templates. each char in unicharset to the newly created templates
- Parameters
-
| InitFromUnicharset | if true, add an empty class for |
- Returns
- Ptr to new adapted templates.
- Note
- Globals: none
Definition at line 151 of file adaptive.cpp.
162 Templates->
Class[i] =
nullptr;
◆ NormalizeOutlines()
| void tesseract::Classify::NormalizeOutlines |
( |
LIST |
Outlines, |
|
|
float * |
XScale, |
|
|
float * |
YScale |
|
) |
| |
This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system. Outlines are changed and XScale and YScale are updated.
Globals:
- classify_norm_method method being used for normalization
- classify_char_norm_range map radius of gyration to this value
- Parameters
-
| Outlines | list of outlines to be normalized |
| XScale | x-direction scale factor used by routine |
| YScale | y-direction scale factor used by routine |
Definition at line 275 of file mfoutline.cpp.
283 ASSERT_HOST(!
"How did NormalizeOutlines get called in character mode?");
◆ PrintAdaptedTemplates()
| void tesseract::Classify::PrintAdaptedTemplates |
( |
FILE * |
File, |
|
|
ADAPT_TEMPLATES |
Templates |
|
) |
| |
This routine prints a summary of the adapted templates in Templates to File.
- Parameters
-
| File | open text file to print Templates to |
| Templates | adapted templates to print to File |
- Note
- Globals: none
Definition at line 244 of file adaptive.cpp.
248 fprintf (File,
"\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
249 fprintf (File,
"Num classes = %d; Num permanent classes = %d\n\n",
251 fprintf (File,
" Id NC NPC NP NPP\n");
252 fprintf (File,
"------------------------\n");
254 for (
int i = 0; i < (Templates->
Templates)->NumClasses; i++) {
256 AClass = Templates->
Class[i];
258 fprintf (File,
"%5d %s %3d %3d %3d %3d\n",
265 fprintf (File,
"\n");
◆ PrintAdaptiveMatchResults()
| void tesseract::Classify::PrintAdaptiveMatchResults |
( |
const ADAPT_RESULTS & |
results | ) |
|
This routine writes the matches in Results to File.
- Parameters
-
| results | match results to write to File |
Globals: none
Definition at line 2013 of file adaptmatch.cpp.
2014 for (
int i = 0; i < results.
match.
size(); ++i) {
2016 results.
match[i].Print();
◆ PruneClasses()
Runs the class pruner from int_templates on the given features, returning the number of classes output in results.
- Parameters
-
| int_templates | Class pruner tables |
| num_features | Number of features in blob |
| features | Array of features |
| normalization_factors | Array of fudge factors from blob normalization process (by CLASS_INDEX) |
| expected_num_features | Array of expected number of features for each class (by CLASS_INDEX) |
| results | Sorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses. |
| keep_this | |
Definition at line 451 of file intmatcher.cpp.
458 ClassPruner pruner(int_templates->
NumClasses);
460 pruner.ComputeScores(int_templates, num_features, features);
462 pruner.AdjustForExpectedNumFeatures(expected_num_features,
472 if (normalization_factors !=
nullptr) {
474 normalization_factors);
476 pruner.NoNormalization();
483 pruner.DebugMatch(*
this, int_templates, features);
486 pruner.SummarizeResult(*
this, int_templates, expected_num_features,
488 normalization_factors);
491 return pruner.SetupResults(results);
◆ ReadAdaptedTemplates()
Read a set of adapted templates from file and return a ptr to the templates.
- Parameters
-
| fp | open text file to read adapted templates from |
- Returns
- Ptr to adapted templates read from file.
- Note
- Globals: none
Definition at line 332 of file adaptive.cpp.
343 for (
int i = 0; i < (Templates->
Templates)->NumClasses; i++) {
◆ ReadIntTemplates()
This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format.
- Parameters
-
| fp | open file to read templates from |
- Returns
- Pointer to integer templates read from File.
- Note
- Globals: none
Definition at line 717 of file intproto.cpp.
719 int i, j, w, x, y, z;
729 int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
733 auto **TempClassPruner =
735 uint32_t SetBitsForMask =
737 uint32_t Mask, NewMask, ClassBits;
744 if (fp->FReadEndian(&unicharset_size,
sizeof(unicharset_size), 1) != 1)
745 tprintf(
"Bad read of inttemp!\n");
750 tprintf(
"Bad read of inttemp!\n");
756 tprintf(
"Bad read of inttemp!\n");
759 if (version_id < 3) {
764 if (version_id < 2) {
765 if (fp->FReadEndian(IndexFor,
sizeof(IndexFor[0]), unicharset_size) !=
767 tprintf(
"Bad read of inttemp!\n");
769 if (fp->FReadEndian(ClassIdFor,
sizeof(ClassIdFor[0]),
771 tprintf(
"Bad read of inttemp!\n");
776 const int kNumBuckets =
780 if (fp->FReadEndian(Pruner,
sizeof(Pruner->
p[0][0][0][0]), kNumBuckets) !=
782 tprintf(
"Bad read of inttemp!\n");
784 if (version_id < 2) {
785 TempClassPruner[i] = Pruner;
792 if (version_id < 2) {
796 if (ClassIdFor[i] > max_class_id)
797 max_class_id = ClassIdFor[i];
810 if (TempClassPruner[i]->p[x][y][z][w] == 0)
814 if (bit_number > last_cp_bit_number)
818 Mask = SetBitsForMask << b;
819 ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
826 ClassBits <<= (new_b - b);
828 ClassBits >>= (b - new_b);
832 NewMask = SetBitsForMask << new_b;
833 Templates->
ClassPruners[new_i]->
p[x][y][z][new_w] &= ~NewMask;
834 Templates->
ClassPruners[new_i]->
p[x][y][z][new_w] |= ClassBits;
839 delete TempClassPruner[i];
850 tprintf(
"Bad read of inttemp!\n");
851 if (version_id == 0) {
853 for (j = 0; j < 5; ++j) {
855 if (fp->FRead(&junk,
sizeof(junk), 1) != 1)
856 tprintf(
"Bad read of inttemp!\n");
859 int num_configs = version_id < 4 ? MaxNumConfigs : Class->
NumConfigs;
861 if (fp->FReadEndian(Class->
ConfigLengths,
sizeof(uint16_t), num_configs) !=
863 tprintf(
"Bad read of inttemp!\n");
865 if (version_id < 2) {
877 tprintf(
"Bad read of inttemp!\n");
887 num_buckets) != num_buckets)
888 tprintf(
"Bad read of inttemp!\n");
890 if (fp->FRead(&ProtoSet->
Protos[x].
A,
sizeof(ProtoSet->
Protos[x].
A),
898 tprintf(
"Bad read of inttemp!\n");
901 WerdsPerConfigVec) != WerdsPerConfigVec)
902 cprintf(
"Bad read of inttemp!\n");
906 if (version_id < 4) {
913 if (version_id < 2) {
921 if (i < Templates->NumClasses) {
923 fprintf(stderr,
"Non-contiguous class ids in inttemp\n");
928 fprintf(stderr,
"Class id %d exceeds NumClassesIn (Templates) %d\n",
935 if (version_id >= 4) {
936 using namespace std::placeholders;
938 if (version_id >= 5) {
948 delete[] TempClassPruner;
◆ ReadNewCutoffs()
| void tesseract::Classify::ReadNewCutoffs |
( |
TFile * |
fp, |
|
|
uint16_t * |
Cutoffs |
|
) |
| |
Open file, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value.
- Parameters
-
| fp | file containing cutoff definitions |
| Cutoffs | array to put cutoffs into |
Definition at line 40 of file cutoffs.cpp.
46 tprintf(
"Error during read of shapetable pffmtable!\n");
52 const int kMaxLineSize = 100;
53 char line[kMaxLineSize];
54 while (fp->FGets(line, kMaxLineSize) !=
nullptr) {
57 std::istringstream stream(line);
58 stream >> Class >> Cutoff;
62 if (Class.compare(
"NULL") == 0) {
68 Cutoffs[ClassId] = Cutoff;
◆ ReadNormProtos()
This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File.
- Parameters
-
| fp | open text file to read normalization protos from Globals: none |
- Returns
- Character normalization protos.
Definition at line 189 of file normmatch.cpp.
210 const int kMaxLineSize = 100;
211 char line[kMaxLineSize];
212 while (fp->FGets(line, kMaxLineSize) !=
nullptr) {
213 std::istringstream stream(line);
214 stream >> unichar >> NumProtos;
221 for (i = 0; i < NumProtos; i++)
225 tprintf(
"Error: unichar %s in normproto file is not in unichar set.\n",
227 for (i = 0; i < NumProtos; i++)
◆ RefreshDebugWindow()
| void tesseract::Classify::RefreshDebugWindow |
( |
ScrollView ** |
win, |
|
|
const char * |
msg, |
|
|
int |
y_offset, |
|
|
const TBOX & |
wbox |
|
) |
| |
Definition at line 226 of file adaptmatch.cpp.
228 #ifndef GRAPHICS_DISABLED
229 const int kSampleSpaceWidth = 500;
230 if (*win ==
nullptr) {
231 *win =
new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
232 kSampleSpaceWidth * 2, 200,
true);
235 (*win)->Pen(64, 64, 64);
240 (*win)->ZoomToRectangle(wbox.
left(), wbox.
top(),
242 #endif // GRAPHICS_DISABLED
◆ RemoveBadMatches()
| void tesseract::Classify::RemoveBadMatches |
( |
ADAPT_RESULTS * |
Results | ) |
|
This routine steps through each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.
- Parameters
-
| Results | contains matches to be filtered |
Globals:
- matcher_bad_match_pad defines a "bad match"
Definition at line 2033 of file adaptmatch.cpp.
2035 float BadMatchThreshold;
2036 static const char* romans =
"i v x I V X";
2044 float scored_one = ScoredUnichar(unichar_id_one, *Results);
2045 float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2047 for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
2049 if (match.
rating >= BadMatchThreshold) {
2054 scored_one < BadMatchThreshold) {
2055 Results->
match[Next].unichar_id = unichar_id_one;
2057 scored_zero < BadMatchThreshold) {
2058 Results->
match[Next].unichar_id = unichar_id_zero;
2060 Results->
match[Next].unichar_id = INVALID_UNICHAR_ID;
2062 if (Results->
match[Next].unichar_id != INVALID_UNICHAR_ID) {
2063 if (NextGood == Next) {
2066 Results->
match[NextGood++] = Results->
match[Next];
2072 for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
2073 if (Results->
match[Next].rating >= BadMatchThreshold) {
2074 if (NextGood == Next) {
2077 Results->
match[NextGood++] = Results->
match[Next];
◆ RemoveExtraPuncs()
| void tesseract::Classify::RemoveExtraPuncs |
( |
ADAPT_RESULTS * |
Results | ) |
|
This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.
- Parameters
-
| Results | contains matches to be filtered |
Definition at line 2093 of file adaptmatch.cpp.
2098 static char punc_chars[] =
". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2099 static char digit_chars[] =
"0 1 2 3 4 5 6 7 8 9";
2103 for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
2106 if (strstr(punc_chars,
2108 if (punc_count >= 2)
2112 if (strstr(digit_chars,
2114 if (digit_count >= 1)
2120 if (NextGood == Next) {
2123 Results->
match[NextGood++] = match;
◆ ResetAdaptiveClassifierInternal()
| void tesseract::Classify::ResetAdaptiveClassifierInternal |
( |
| ) |
|
Definition at line 598 of file adaptmatch.cpp.
600 tprintf(
"Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
601 NumAdaptationsFailed);
608 NumAdaptationsFailed = 0;
◆ SetAdaptiveThreshold()
| void tesseract::Classify::SetAdaptiveThreshold |
( |
float |
Threshold | ) |
|
This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.
- Parameters
-
| Threshold | threshold for creating new templates |
Globals:
- matcher_good_threshold default good match rating
Definition at line 2141 of file adaptmatch.cpp.
2144 ClipToRange<int>(255 * Threshold, 0, 255));
2146 ClipToRange<int>(255 * Threshold, 0, 255));
◆ SetStaticClassifier()
| void tesseract::Classify::SetStaticClassifier |
( |
ShapeClassifier * |
static_classifier | ) |
|
Definition at line 193 of file classify.cpp.
194 delete static_classifier_;
195 static_classifier_ = static_classifier;
◆ SettupPass1()
| void tesseract::Classify::SettupPass1 |
( |
| ) |
|
This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).
- Note
- this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.
Globals:
Definition at line 652 of file adaptmatch.cpp.
◆ SettupPass2()
| void tesseract::Classify::SettupPass2 |
( |
| ) |
|
This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.
Globals:
Definition at line 669 of file adaptmatch.cpp.
◆ SetupBLCNDenorms()
Definition at line 127 of file intfx.cpp.
132 FCOORD center, second_moments;
134 if (fx_info !=
nullptr) {
144 1.0f, 1.0f, 128.0f, 128.0f);
146 if (nonlinear_norm) {
154 0.0f, 0.0f, x_coords, y_coords);
157 center.
x(), center.
y(),
158 51.2f / second_moments.
x(),
159 51.2f / second_moments.
y(),
◆ shape_table()
| const ShapeTable* tesseract::Classify::shape_table |
( |
| ) |
const |
|
inline |
◆ ShapeIDToClassID()
| int tesseract::Classify::ShapeIDToClassID |
( |
int |
shape_id | ) |
const |
Definition at line 2220 of file adaptmatch.cpp.
2225 for (
int config = 0; config < fs.size; ++config) {
2226 if (fs.configs[config] == shape_id)
2230 tprintf(
"Shape %d not found\n", shape_id);
◆ ShowBestMatchFor()
| void tesseract::Classify::ShowBestMatchFor |
( |
int |
shape_id, |
|
|
const INT_FEATURE_STRUCT * |
features, |
|
|
int |
num_features |
|
) |
| |
This routine displays debug information for the best config of the given shape_id for the given set of features.
- Parameters
-
| shape_id | classifier id to work with |
| features | features of the unknown character |
| num_features | Number of features in the features array. |
Definition at line 2159 of file adaptmatch.cpp.
2162 #ifndef GRAPHICS_DISABLED
2163 uint32_t config_mask;
2165 tprintf(
"No built-in templates for class/shape %d\n", shape_id);
2168 if (num_features <= 0) {
2169 tprintf(
"Illegal blob (char norm features)!\n");
2176 num_features, features, &cn_result,
2180 config_mask = 1 << cn_result.
config;
2182 tprintf(
"Static Shape ID: %d\n", shape_id);
2185 &config_mask, num_features, features, &cn_result,
2189 #endif // GRAPHICS_DISABLED
◆ ShowMatchDisplay()
| void tesseract::Classify::ShowMatchDisplay |
( |
| ) |
|
This routine sends the shapes in the global display lists to the match debugger window.
Globals:
- FeatureShapes display list containing feature matches
- ProtoShapes display list containing proto matches
Definition at line 962 of file intproto.cpp.
965 if (ProtoDisplayWindow) {
966 ProtoDisplayWindow->
Clear();
968 if (FeatureDisplayWindow) {
969 FeatureDisplayWindow->
Clear();
976 if (ProtoDisplayWindow) {
980 if (FeatureDisplayWindow) {
◆ StartBackupAdaptiveClassifier()
| void tesseract::Classify::StartBackupAdaptiveClassifier |
( |
| ) |
|
◆ SwitchAdaptiveClassifier()
| void tesseract::Classify::SwitchAdaptiveClassifier |
( |
| ) |
|
Definition at line 613 of file adaptmatch.cpp.
619 tprintf(
"Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
620 NumAdaptationsFailed);
625 NumAdaptationsFailed = 0;
◆ TempConfigReliable()
Definition at line 2236 of file adaptmatch.cpp.
2239 tprintf(
"NumTimesSeen for config of %s is %d\n",
2240 getDict().getUnicharset().debug_str(class_id).c_str(),
2252 int ambigs_size = (ambigs ==
nullptr) ? 0 : ambigs->
size();
2253 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2255 assert(ambig_class !=
nullptr);
2260 tprintf(
"Ambig %s has not been seen enough times,"
2261 " not making config for %s permanent\n",
2262 getDict().getUnicharset().debug_str(
2263 (*ambigs)[ambig]).c_str(),
2264 getDict().getUnicharset().debug_str(class_id).c_str());
◆ UpdateAmbigsGroup()
| void tesseract::Classify::UpdateAmbigsGroup |
( |
CLASS_ID |
class_id, |
|
|
TBLOB * |
Blob |
|
) |
| |
Definition at line 2273 of file adaptmatch.cpp.
2276 int ambigs_size = (ambigs ==
nullptr) ? 0 : ambigs->
size();
2278 tprintf(
"Running UpdateAmbigsGroup for %s class_id=%d\n",
2279 getDict().getUnicharset().debug_str(class_id).c_str(), class_id);
2281 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2282 CLASS_ID ambig_class_id = (*ambigs)[ambig];
2290 tprintf(
"Making config %d of %s permanent\n", cfg,
2291 getDict().getUnicharset().debug_str(
2292 ambig_class_id).c_str());
◆ WriteAdaptedTemplates()
| void tesseract::Classify::WriteAdaptedTemplates |
( |
FILE * |
File, |
|
|
ADAPT_TEMPLATES |
Templates |
|
) |
| |
This routine saves Templates to File in a binary format.
- Parameters
-
| File | open text file to write Templates to |
| Templates | set of adapted templates to write to File |
- Note
- Globals: none
Definition at line 453 of file adaptive.cpp.
463 for (i = 0; i < (Templates->
Templates)->NumClasses; i++) {
◆ WriteIntTemplates()
| void tesseract::Classify::WriteIntTemplates |
( |
FILE * |
File, |
|
|
INT_TEMPLATES |
Templates, |
|
|
const UNICHARSET & |
target_unicharset |
|
) |
| |
This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing.
- Parameters
-
| File | open file to write templates to |
| Templates | templates to save into File |
| target_unicharset | the UNICHARSET to use |
Definition at line 1017 of file intproto.cpp.
1022 int unicharset_size = target_unicharset.
size();
1023 int version_id = -5;
1025 if (Templates->
NumClasses != unicharset_size) {
1026 cprintf(
"Warning: executing WriteIntTemplates() with %d classes in"
1027 " Templates, while target_unicharset size is %d\n",
1032 fwrite(&unicharset_size,
sizeof(unicharset_size), 1, File);
1033 fwrite(&version_id,
sizeof(version_id), 1, File);
1044 for (i = 0; i < Templates->
NumClasses; i++) {
1045 Class = Templates->
Class[i];
1053 fwrite(&Class->
ConfigLengths[j],
sizeof(uint16_t), 1, File);
1067 fwrite(&Class->
font_set_id,
sizeof(
int), 1, File);
1071 using namespace std::placeholders;
◆ WriteTRFile()
| bool tesseract::Classify::WriteTRFile |
( |
const STRING & |
filename | ) |
|
Definition at line 98 of file blobclass.cpp.
100 STRING tr_filename = filename +
".tr";
101 FILE* fp = fopen(tr_filename.
c_str(),
"wb");
◆ AdaptedTemplates
◆ AllConfigsOff
| BIT_VECTOR tesseract::Classify::AllConfigsOff = nullptr |
◆ AllConfigsOn
| BIT_VECTOR tesseract::Classify::AllConfigsOn = nullptr |
◆ allow_blob_division
| bool tesseract::Classify::allow_blob_division = true |
"Use divisible blobs chopping"
Definition at line 423 of file classify.h.
◆ AllProtosOn
| BIT_VECTOR tesseract::Classify::AllProtosOn = nullptr |
◆ BackupAdaptedTemplates
◆ certainty_scale
| double tesseract::Classify::certainty_scale = 20.0 |
"Certainty scaling factor"
Definition at line 473 of file classify.h.
◆ classify_adapt_feature_threshold
| int tesseract::Classify::classify_adapt_feature_threshold = 230 |
"Threshold for good features during adaptive 0-255"
Definition at line 483 of file classify.h.
◆ classify_adapt_proto_threshold
| int tesseract::Classify::classify_adapt_proto_threshold = 230 |
"Threshold for good protos during adaptive 0-255"
Definition at line 481 of file classify.h.
◆ classify_adapted_pruning_factor
| double tesseract::Classify::classify_adapted_pruning_factor = 2.5 |
"Prune poor adapted results this much worse than best result"
Definition at line 477 of file classify.h.
◆ classify_adapted_pruning_threshold
| double tesseract::Classify::classify_adapted_pruning_threshold = -1.0 |
"Threshold at which classify_adapted_pruning_factor starts"
Definition at line 479 of file classify.h.
◆ classify_bln_numeric_mode
| bool tesseract::Classify::classify_bln_numeric_mode = 0 |
"Assume the input is numbers [0-9]."
Definition at line 508 of file classify.h.
◆ classify_char_norm_range
| double tesseract::Classify::classify_char_norm_range = 0.2 |
"Character Normalization Range ..."
Definition at line 436 of file classify.h.
◆ classify_character_fragments_garbage_certainty_threshold
| double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0 |
"Exclude fragments that do not match any whole character" " with at least this certainty"
Definition at line 489 of file classify.h.
◆ classify_class_pruner_multiplier
| int tesseract::Classify::classify_class_pruner_multiplier = 15 |
"Class Pruner Multiplier 0-255: "
Definition at line 501 of file classify.h.
◆ classify_class_pruner_threshold
| int tesseract::Classify::classify_class_pruner_threshold = 229 |
"Class Pruner Threshold 0-255"
Definition at line 499 of file classify.h.
◆ classify_cp_cutoff_strength
| int tesseract::Classify::classify_cp_cutoff_strength = 7 |
"Class Pruner CutoffStrength: "
Definition at line 503 of file classify.h.
◆ classify_debug_character_fragments
| bool tesseract::Classify::classify_debug_character_fragments = false |
"Bring up graphical debugging windows for fragments training"
Definition at line 491 of file classify.h.
◆ classify_debug_level
| int tesseract::Classify::classify_debug_level = 0 |
"Classify debug level"
Definition at line 430 of file classify.h.
◆ classify_enable_adaptive_debugger
| bool tesseract::Classify::classify_enable_adaptive_debugger = 0 |
"Enable match debugger"
Definition at line 450 of file classify.h.
◆ classify_enable_adaptive_matcher
| bool tesseract::Classify::classify_enable_adaptive_matcher = 1 |
"Enable adaptive classifier"
Definition at line 445 of file classify.h.
◆ classify_enable_learning
| bool tesseract::Classify::classify_enable_learning = true |
"Enable adaptive classifier"
Definition at line 429 of file classify.h.
◆ classify_integer_matcher_multiplier
| int tesseract::Classify::classify_integer_matcher_multiplier = 10 |
"Integer Matcher Multiplier 0-255: "
Definition at line 505 of file classify.h.
◆ classify_learn_debug_str
| char* tesseract::Classify::classify_learn_debug_str = "" |
"Class str to debug learning"
Definition at line 495 of file classify.h.
◆ classify_learning_debug_level
| int tesseract::Classify::classify_learning_debug_level = 0 |
"Learning Debug Level: "
Definition at line 455 of file classify.h.
◆ classify_max_certainty_margin
| double tesseract::Classify::classify_max_certainty_margin = 5.5 |
"Veto difference between classifier certainties"
Definition at line 440 of file classify.h.
◆ classify_max_rating_ratio
| double tesseract::Classify::classify_max_rating_ratio = 1.5 |
"Veto ratio between classifier ratings"
Definition at line 438 of file classify.h.
◆ classify_misfit_junk_penalty
| double tesseract::Classify::classify_misfit_junk_penalty = 0.0 |
"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"
Definition at line 471 of file classify.h.
◆ classify_nonlinear_norm
| bool tesseract::Classify::classify_nonlinear_norm = 0 |
"Non-linear stroke-density normalization"
Definition at line 452 of file classify.h.
◆ classify_norm_method
| int tesseract::Classify::classify_norm_method = character |
"Normalization Method ..."
Definition at line 434 of file classify.h.
◆ classify_save_adapted_templates
| bool tesseract::Classify::classify_save_adapted_templates = 0 |
"Save adapted templates to a file"
Definition at line 449 of file classify.h.
◆ classify_use_pre_adapted_templates
| bool tesseract::Classify::classify_use_pre_adapted_templates = 0 |
"Use pre-adapted classifier templates"
Definition at line 447 of file classify.h.
◆ disable_character_fragments
| bool tesseract::Classify::disable_character_fragments = true |
"Do not include character fragments in the" " results of the classifier"
Definition at line 486 of file classify.h.
◆ EnableLearning
| bool tesseract::Classify::EnableLearning = true |
◆ feature_defs_
◆ fontinfo_table_
◆ fontset_table_
◆ im_
◆ matcher_avg_noise_size
| double tesseract::Classify::matcher_avg_noise_size = 12.0 |
"Avg. noise blob length: "
Definition at line 461 of file classify.h.
◆ matcher_bad_match_pad
| double tesseract::Classify::matcher_bad_match_pad = 0.15 |
◆ matcher_clustering_max_angle_delta
| double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015 |
"Maximum angle delta for prototype clustering"
Definition at line 468 of file classify.h.
◆ matcher_debug_flags
| int tesseract::Classify::matcher_debug_flags = 0 |
◆ matcher_debug_level
| int tesseract::Classify::matcher_debug_level = 0 |
◆ matcher_debug_separate_windows
| bool tesseract::Classify::matcher_debug_separate_windows = false |
"Use two different windows for debugging the matching: " "One for the protos and one for the features."
Definition at line 494 of file classify.h.
◆ matcher_good_threshold
| double tesseract::Classify::matcher_good_threshold = 0.125 |
◆ matcher_min_examples_for_prototyping
| int tesseract::Classify::matcher_min_examples_for_prototyping = 3 |
"Reliable Config Threshold"
Definition at line 464 of file classify.h.
◆ matcher_perfect_threshold
| double tesseract::Classify::matcher_perfect_threshold = 0.02 |
◆ matcher_permanent_classes_min
| int tesseract::Classify::matcher_permanent_classes_min = 1 |
"Min # of permanent classes"
Definition at line 462 of file classify.h.
◆ matcher_rating_margin
| double tesseract::Classify::matcher_rating_margin = 0.1 |
"New template margin (0-1)"
Definition at line 460 of file classify.h.
◆ matcher_reliable_adaptive_result
| double tesseract::Classify::matcher_reliable_adaptive_result = 0.0 |
◆ matcher_sufficient_examples_for_prototyping
| int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5 |
"Enable adaption even if the ambiguities have not been seen"
Definition at line 466 of file classify.h.
◆ NormProtos
◆ PreTrainedTemplates
◆ prioritize_division
| bool tesseract::Classify::prioritize_division = false |
"Prioritize blob division over chopping"
Definition at line 428 of file classify.h.
◆ rating_scale
| double tesseract::Classify::rating_scale = 1.5 |
"Rating scaling factor"
Definition at line 472 of file classify.h.
◆ shape_table_
| ShapeTable* tesseract::Classify::shape_table_ = nullptr |
|
protected |
◆ speckle_large_max_size
| double tesseract::Classify::speckle_large_max_size = 0.30 |
"Max large speckle size"
Definition at line 509 of file classify.h.
◆ speckle_rating_penalty
| double tesseract::Classify::speckle_rating_penalty = 10.0 |
"Penalty to add to worst rating for noise"
Definition at line 511 of file classify.h.
◆ TempProtoMask
| BIT_VECTOR tesseract::Classify::TempProtoMask = nullptr |
◆ tess_bn_matching
| bool tesseract::Classify::tess_bn_matching = 0 |
"Baseline Normalized Matching"
Definition at line 444 of file classify.h.
◆ tess_cn_matching
| bool tesseract::Classify::tess_cn_matching = 0 |
"Character Normalized Matching"
Definition at line 443 of file classify.h.
◆ tessedit_class_miss_scale
| double tesseract::Classify::tessedit_class_miss_scale = 0.00390625 |
"Scale factor for features not used"
Definition at line 475 of file classify.h.
The documentation for this class was generated from the following files:
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
double tessedit_class_miss_scale
void FontInfoDeleteCallback(FontInfo f)
bool ContainsUnichar(int unichar_id) const
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
TBLOB * ClassifyNormalizeIfNeeded() const
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
bool classify_enable_adaptive_matcher
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
uint16_t ConfigLengths[MAX_NUM_CONFIGS]
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
#define PROTOS_PER_PROTO_SET
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
void ClearCharNormArray(uint8_t *char_norm_array)
#define MAX_INT_CHAR_NORM
#define BITS_PER_CP_VECTOR
bool use_ambigs_for_adaption
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &cn_denorm)
INT_TEMPLATES NewIntTemplates()
double matcher_clustering_max_angle_delta
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
void NormalizePicoX(FEATURE_SET FeatureSet)
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
bool PiecesAllNatural(int start, int count) const
#define TempConfigFor(Class, ConfigId)
int classify_class_pruner_threshold
uint8_t Bucket8For(float param, float offset, int num_buckets)
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
bool get_isdigit(UNICHAR_ID unichar_id) const
UnicityTable< FontInfo > fontinfo_table_
bool get_isalpha(UNICHAR_ID unichar_id) const
int classify_adapt_proto_threshold
bool write_set(FILE *f, const FontSet &fs)
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
void MasterMatcher(INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
bool classify_enable_learning
#define INT_MEMBER(name, val, comment, vec)
GenericVector< ScoredFont > fonts
const DENORM & denorm() const
const UnicharAmbigs & getUnicharAmbigs() const
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
bool classify_enable_adaptive_debugger
void EndAdaptiveClassifier()
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
void FreeProtoList(LIST *ProtoList)
float ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
bool write_info(FILE *f, const FontInfo &fi)
const double kStandardFeatureLength
double matcher_good_threshold
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
double classify_adapted_pruning_threshold
int matcher_permanent_classes_min
#define INT_CHAR_NORM_RANGE
uint32_t Configs[WERDS_PER_CONFIG_VEC]
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
void truncate_at(int32_t index)
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
int classify_cp_cutoff_strength
const FontInfo * fontinfo
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
void cprintf(const char *format,...)
void EndDangerousAmbigs()
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
PARAM_DESC * ReadParamDesc(TFile *fp, uint16_t N)
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
double classify_max_certainty_margin
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
int classify_learning_debug_level
int IntCastRounded(double x)
int classify_class_pruner_multiplier
uint8_t CircBucketFor(float param, float offset, int num_buckets)
bool read_spacing_info(TFile *f, FontInfo *fi)
const float MF_SCALE_FACTOR
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
GenericVector< int > best_state
bool LooksLikeGarbage(TBLOB *blob)
void RemoveBadMatches(ADAPT_RESULTS *Results)
#define CPrunerBitIndexFor(c)
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
#define PRINT_FEATURE_MATCHES
STRING language_data_path_prefix
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
void ZoomToRectangle(int x1, int y1, int x2, int y2)
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
bool write_spacing_info(FILE *f, const FontInfo &fi)
#define MAX_NUM_CLASS_PRUNERS
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
bool DeSerialize(TFile *fp)
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
#define CPrunerWordIndexFor(c)
void SetAdaptiveThreshold(float Threshold)
#define ProtoIn(Class, Pid)
double matcher_reliable_adaptive_result
int get_script(UNICHAR_ID unichar_id) const
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
int AddIntProto(INT_CLASS Class)
const double kWidthErrorWeighting
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
void ClassifyAsNoise(ADAPT_RESULTS *Results)
int FRead(void *buffer, size_t size, int count)
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
double matcher_avg_noise_size
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str)
GenericVector< UNICHAR_ID > UnicharIdVector
#define reset_bit(array, bit)
GenericVector< CP_RESULT_STRUCT > CPResults
void free_adapted_templates(ADAPT_TEMPLATES templates)
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
#define ADAPTABLE_WERD_ADJUSTMENT
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
int matcher_min_examples_for_prototyping
#define STRING_MEMBER(name, val, comment, vec)
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
STRING debug_str(UNICHAR_ID id) const
WERD_CHOICE * best_choice
const char * c_str() const
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
bool get_enabled(UNICHAR_ID unichar_id) const
double matcher_perfect_threshold
#define ADAPT_TEMPLATE_SUFFIX
bool DeSerialize(bool swap, FILE *fp)
#define MakeConfigPermanent(Class, ConfigId)
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
LIST delete_d(LIST list, void *key, int_compare is_equal)
ShapeTable * shape_table_
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
double speckle_large_max_size
bool disable_character_fragments
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
void FreeFeature(FEATURE Feature)
void FreeTempConfig(TEMP_CONFIG Config)
#define MaxNumIntProtosIn(C)
#define LENGTH_COMPRESSION
void InitIntMatchWindowIfReqd()
ADAPT_TEMPLATES BackupAdaptedTemplates
bool classify_save_adapted_templates
bool classify_nonlinear_norm
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
void FontSetDeleteCallback(FontSet fs)
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
LIST ConvertBlob(TBLOB *blob)
double segment_penalty_dict_case_ok
GenericVector< TBLOB * > blobs
#define IncreaseConfidence(TempConfig)
#define MAX_NUM_INT_FEATURES
float adjust_factor() const
double classify_max_rating_ratio
STRING DebugStr(int shape_id) const
double classify_char_norm_range
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
#define double_MEMBER(name, val, comment, vec)
NORM_PROTOS * ReadNormProtos(TFile *fp)
uint16_t ReadSampleSize(TFile *fp)
FEATURE_DEFS_STRUCT feature_defs_
UnicityTableEqEq< int > font_set
const Shape & GetShape(int shape_id) const
ADAPT_TEMPLATES AdaptedTemplates
int matcher_sufficient_examples_for_prototyping
char * classify_learn_debug_str
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
int AddIntConfig(INT_CLASS Class)
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
const T & get(int id) const
Return the object from an id.
void GetPreciseBoundingBox(TBOX *precise_box) const
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
PROTOTYPE * ReadPrototype(TFile *fp, uint16_t N)
const STRING debug_string() const
#define WERDS_PER_CONFIG_VEC
ADAPT_TEMPLATES Templates
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
#define ConfigIsPermanent(Class, ConfigId)
LIST push(LIST list, void *element)
void FillABC(PROTO Proto)
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId)
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
GenericVector< SEAM * > seam_array
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
double speckle_rating_penalty
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
TBOX bounding_box() const
void FreeOutlines(LIST Outlines)
int TruncateParam(float Param, int Min, int Max, char *Id)
bool classify_use_pre_adapted_templates
ADAPT_CLASS Class[MAX_NUM_CLASSES]
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
int classify_adapt_feature_threshold
int size() const
Return the size used.
void ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs)
TEMP_PROTO NewTempProto()
SVEvent * AwaitEvent(SVEventType type)
#define UNLIKELY_NUM_FEAT
#define SET_BIT(array, bit)
double classify_adapted_pruning_factor
const FEATURE_DESC_STRUCT CharNormDesc
double classify_norm_adj_midpoint
static int SortDescendingRating(const void *t1, const void *t2)
bool read_info(TFile *f, FontInfo *fi)
#define WORST_POSSIBLE_RATING
INT_CLASS Class[MAX_NUM_CLASSES]
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
void pad(int xpad, int ypad)
FEATURE_SET NewFeatureSet(int NumFeatures)
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
double classify_misfit_junk_penalty
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
bool contains_unichar(const char *const unichar_repr) const
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
void FreePrototype(void *arg)
int MakeTempProtoPerm(void *item1, void *item2)
bool MarginalMatch(float confidence, float matcher_great_threshold)
void FreeFeatureSet(FEATURE_SET FeatureSet)
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
void UpdateMatchDisplay()
UNICHAR_ID best_unichar_id
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
#define PRINT_MATCH_SUMMARY
void NormalizeOutlineX(FEATURE_SET FeatureSet)
GenericVector< STRING > correct_text
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
const FEATURE_DESC_STRUCT IntFeatDesc
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
#define WERDS_PER_CP_VECTOR
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
DLLSYM void tprintf(const char *format,...)
bool LargeSpeckle(const TBLOB &blob)
const UNICHARSET & getUnicharset() const
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
INT_TEMPLATES ReadIntTemplates(TFile *fp)
INT_TEMPLATES PreTrainedTemplates
uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
void free_int_templates(INT_TEMPLATES templates)
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc)
#define NUM_BITS_PER_CLASS
bool matcher_debug_separate_windows
void plot(ScrollView *window)
GenericVector< UnicharRating > match
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
TBOX bounding_box() const
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs)
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
ADAPT_CLASS NewAdaptedClass()
int classify_integer_matcher_multiplier
const char * id_to_unichar(UNICHAR_ID id) const
bool classify_debug_character_fragments
#define WERDS_PER_PP_VECTOR
double matcher_rating_margin
#define PermConfigFor(Class, ConfigId)
void ResetAdaptiveClassifierInternal()
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
#define ProtoForProtoId(C, P)
bool classify_bln_numeric_mode
double matcher_bad_match_pad
#define BOOL_MEMBER(name, val, comment, vec)
UnicityTable< FontSet > fontset_table_
#define OLD_MAX_NUM_CONFIGS
#define UnusedClassIdIn(T, c)
#define PRINT_PROTO_MATCHES
float ActualOutlineLength(FEATURE Feature)
double classify_character_fragments_garbage_certainty_threshold
char window_wait(ScrollView *win)
bool Serialize(FILE *fp, const char *data, size_t n=1)
#define ClassForClassId(T, c)
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
#define MAX_PICO_FEATURES
LIST push_last(LIST list, void *item)
ADAPT_CLASS ReadAdaptedClass(TFile *fp)
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
#define OLD_WERDS_PER_CONFIG_VEC
void NormalizeOutline(MFOUTLINE Outline, float XOrigin)
int MaxNumUnichars() const
const int kBlnBaselineOffset
void FreeCharDescription(CHAR_DESC CharDesc)
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
#define IsEmptyAdaptedClass(Class)
#define MAX_ADAPTABLE_WERD_SIZE
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
bool read_set(TFile *f, FontSet *fs)
#define GetPicoFeatureLength()