23 #include "config_auto.h"
66 #define ADAPT_TEMPLATE_SUFFIX ".a"
68 #define MAX_MATCHES 10
69 #define UNLIKELY_NUM_FEAT 200
71 #define MAX_ADAPTABLE_WERD_SIZE 40
73 #define ADAPTABLE_WERD_ADJUSTMENT (0.05)
75 #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
77 #define WORST_POSSIBLE_RATING (0.0f)
95 HasNonfragment =
false;
100 best_unichar_id = INVALID_UNICHAR_ID;
101 best_match_index = -1;
103 for (
int i = 0; i < match.
size(); ++i) {
104 if (match[i].rating > best_rating) {
105 best_rating = match[i].rating;
106 best_unichar_id = match[i].unichar_id;
107 best_match_index = i;
122 inline bool MarginalMatch(
float confidence,
float matcher_great_threshold) {
123 return (1.0f - confidence) > matcher_great_threshold;
132 for (
int i = 0; i < results.
match.
size(); i++) {
133 if (results.
match[i].unichar_id ==
id)
142 int index = FindScoredUnichar(
id, results);
144 return results.
match[index].rating;
186 assert(Choices !=
NULL);
210 #ifndef GRAPHICS_DISABLED
221 int y_offset,
const TBOX &wbox) {
222 #ifndef GRAPHICS_DISABLED
223 const int kSampleSpaceWidth = 500;
225 *win =
new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
226 kSampleSpaceWidth * 2, 200,
true);
229 (*win)->Pen(64, 64, 64);
234 (*win)->ZoomToRectangle(wbox.
left(), wbox.
top(),
236 #endif // GRAPHICS_DISABLED
246 if (word_len == 0)
return;
248 float* thresholds =
NULL;
249 if (fontname ==
NULL) {
255 tprintf(
"\n\nAdapting to word = %s\n",
257 thresholds =
new float[word_len];
265 #ifndef GRAPHICS_DISABLED
267 if (learn_fragmented_word_debug_win_ !=
NULL) {
277 #endif // GRAPHICS_DISABLED
279 for (
int ch = 0; ch < word_len; ++ch) {
284 float threshold = thresholds !=
NULL ? thresholds[ch] : 0.0f;
293 bool garbage =
false;
295 for (frag = 0; frag < word->
best_state[ch]; ++frag) {
306 for (frag = 0; frag < word->
best_state[ch]; ++frag) {
311 tokens[0].
string(), frag, word->
best_state[ch],
315 for (
int i = 0; i < tokens.
size(); i++) {
316 full_string += tokens[i];
317 if (i != tokens.
size() - 1)
320 LearnPieces(fontname, start_blob + frag, 1, threshold,
356 delete [] thresholds;
370 const char* correct_text,
WERD_RES* word) {
384 if (rotated_blob ==
NULL)
387 #ifndef GRAPHICS_DISABLED
393 learn_debug_win_->
Update();
398 blob->
plot(learn_fragments_debug_win_,
400 learn_fragments_debug_win_->
Update();
402 #endif // GRAPHICS_DISABLED
404 if (fontname !=
NULL) {
408 DENORM bl_denorm, cn_denorm;
411 &bl_denorm, &cn_denorm, &fx_info);
412 LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
419 tprintf(
"Adapting to char = %s, thr= %g font_id= %d\n",
427 AdaptToChar(rotated_blob, class_id, font_id, threshold,
431 tprintf(
"Can't adapt to %s not in unicharset\n", correct_text);
433 if (rotated_blob != blob) {
463 File = fopen (Filename.
string(),
"wb");
465 cprintf (
"Unable to save adapted templates to %s!\n", Filename.
string());
467 cprintf (
"\nSaving adapted templates to %s ...", Filename.
string());
502 if (static_classifier_ !=
NULL) {
503 delete static_classifier_;
504 static_classifier_ =
NULL;
536 load_pre_trained_templates) {
546 tprintf(
"Error loading shape table!\n");
550 tprintf(
"Successfully loaded shape table!\n");
581 BaselineCutoffs[i] = 0;
590 File = fopen(Filename.
string(),
"rb");
594 cprintf(
"\nReading pre-adapted templates from %s ...\n",
603 BaselineCutoffs[i] = CharNormCutoffs[i];
615 tprintf(
"Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
616 NumAdaptationsFailed);
623 NumAdaptationsFailed = 0;
634 tprintf(
"Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
635 NumAdaptationsFailed);
640 NumAdaptationsFailed = 0;
744 BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
748 for (Fid = 0; Fid < Features->
NumFeatures; Fid++) {
754 Proto = &(TempProto->
Proto);
780 tprintf(
"Added new class '%s' with class id %d and %d protos.\n",
828 *FloatFeatures = Features;
853 float adaptable_score =
856 BestChoiceLength > 0 &&
902 Class = adaptive_templates->
Class[ClassId];
903 assert(Class !=
NULL);
910 if (NumFeatures <= 0)
915 for (
int cfg = 0; cfg < IClass->
NumConfigs; ++cfg) {
917 SET_BIT(MatchingFontConfigs, cfg);
923 NumFeatures, IntFeatures,
930 if (1.0f - int_result.
rating <= Threshold) {
933 tprintf(
"Found good match to perm config %d = %4.1f%%.\n",
945 tprintf(
"Increasing reliability of temp config %d to %d.\n",
954 tprintf(
"Found poor match to temp config %d = %4.1f%%.\n",
961 NumFeatures, IntFeatures, FloatFeatures);
962 if (NewTempConfigId >= 0 &&
964 MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
968 #ifndef GRAPHICS_DISABLED
979 #ifndef GRAPHICS_DISABLED
985 if (sample ==
NULL)
return;
989 bl_features.
size(), &bl_features[0],
992 tprintf(
"Best match to temp config %d = %4.1f%%.\n",
993 int_result.config, int_result.rating * 100.0);
996 ConfigMask = 1 << int_result.config;
999 bl_features.
size(), &bl_features[0],
1031 int old_match = FindScoredUnichar(new_result.
unichar_id, *results);
1033 if (new_result.
rating + matcher_bad_match_pad < results->best_rating ||
1034 (old_match < results->match.size() &&
1035 new_result.
rating <= results->
match[old_match].rating))
1041 if (old_match < results->match.size()) {
1042 results->
match[old_match].rating = new_result.
rating;
1091 if (int_features.
empty())
return;
1103 while (*ambiguities >= 0) {
1109 int_features.
size(), &int_features[0],
1117 CharNormArray, &int_result, results);
1120 delete [] CharNormArray;
1129 const uinT8* norm_factors,
1132 int matcher_multiplier,
1133 const TBOX& blob_box,
1136 int top = blob_box.
top();
1137 int bottom = blob_box.
bottom();
1139 for (
int c = 0; c < results.
size(); c++) {
1140 CLASS_ID class_id = results[c].Class;
1149 num_features, features,
1156 matcher_multiplier, norm_factors,
1157 &int_result, final_results);
1167 ADAPT_CLASS* classes,
bool debug,
int class_id,
int bottom,
int top,
1168 float cp_rating,
int blob_length,
int matcher_multiplier,
1169 const uinT8* cn_factors,
1171 if (classes !=
NULL) {
1174 for (
int f = 0; f < int_result->
fonts.size(); ++f) {
1175 int_result->
fonts[f].fontinfo_id =
1181 for (
int f = 0; f < int_result->
fonts.size(); ++f) {
1182 int_result->
fonts[f].fontinfo_id =
1184 int_result->
fonts[f].fontinfo_id);
1195 for (
int f = 0; f < int_result->
fonts.size(); ++f) {
1196 int shape_id = int_result->
fonts[f].fontinfo_id;
1198 for (
int c = 0; c < shape.
size(); ++c) {
1199 int unichar_id = shape[c].unichar_id;
1203 for (r = 0; r < mapped_results.
size() &&
1204 mapped_results[r].unichar_id != unichar_id; ++r) {}
1205 if (r == mapped_results.
size()) {
1207 mapped_results[r].unichar_id = unichar_id;
1208 mapped_results[r].fonts.
truncate(0);
1210 for (
int i = 0; i < shape[c].font_ids.
size(); ++i) {
1216 for (
int m = 0; m < mapped_results.
size(); ++m) {
1217 mapped_results[m].rating =
1219 cp_rating, int_result->
rating,
1221 blob_length, matcher_multiplier, cn_factors);
1231 bottom, top, blob_length,
1232 matcher_multiplier, cn_factors);
1241 double cp_rating,
double im_rating,
1243 int bottom,
int top,
1244 int blob_length,
int matcher_multiplier,
1245 const uinT8* cn_factors) {
1248 cn_factors[unichar_id],
1249 matcher_multiplier);
1251 double vertical_penalty = 0.0;
1256 int min_bottom, max_bottom, min_top, max_top;
1258 &min_top, &max_top);
1260 tprintf(
"top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1261 top, min_top, max_top, bottom, min_bottom, max_bottom);
1263 if (top < min_top || top > max_top ||
1264 bottom < min_bottom || bottom > max_bottom) {
1268 double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1272 tprintf(
"%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1276 (1.0 - im_rating) * 100.0,
1277 (cn_corrected - (1.0 - im_rating)) * 100.0,
1278 cn_factors[unichar_id],
1279 miss_penalty * 100.0,
1280 vertical_penalty * 100.0);
1315 CharNormArray, BaselineCutoffs, &Results->
CPResults);
1325 delete [] CharNormArray;
1330 return Templates->
Class[ClassId]->
1362 -1, &unichar_results);
1364 for (
int r = 0; r < unichar_results.size(); ++r) {
1390 uinT8* pruner_norm_array =
new uinT8[num_pruner_classes];
1400 delete [] pruner_norm_array;
1401 if (keep_this >= 0) {
1402 adapt_results->
CPResults[0].Class = keep_this;
1408 int class_id = adapt_results->
CPResults[i].Class;
1417 blob_box, adapt_results->
CPResults, adapt_results);
1419 for (
int i = 0; i < adapt_results->
match.
size(); i++) {
1424 delete [] char_norm_array;
1425 delete adapt_results;
1426 return num_features;
1448 rating /= 1.0 + rating;
1461 BLOB_CHOICE_LIST *Choices) {
1462 assert(Choices !=
NULL);
1465 BLOB_CHOICE_IT temp_it;
1466 bool contains_nonfrag =
false;
1467 temp_it.set_to_list(Choices);
1468 int choices_length = 0;
1482 for (
int i = 0; i < Results->
match.
size(); i++) {
1484 bool adapted = result.
adapted;
1486 if (temp_it.length()+1 == max_matches &&
1487 !contains_nonfrag && current_is_frag) {
1499 Rating = Certainty = (1.0f - result.
rating);
1508 if (Certainty > best_certainty) {
1510 }
else if (adapted &&
1515 float min_xheight, max_xheight, yshift;
1517 &min_xheight, &max_xheight, &yshift);
1521 min_xheight, max_xheight, yshift,
1525 temp_it.add_to_end(choice);
1526 contains_nonfrag |= !current_is_frag;
1528 if (choices_length >= max_matches)
break;
1535 #ifndef GRAPHICS_DISABLED
1548 if (static_classifier_ ==
NULL)
return;
1553 if (sample ==
NULL)
return;
1590 if (sample ==
NULL)
return;
1651 if (sample ==
NULL) {
1666 Results->
match[0].unichar_id != CorrectClass)) {
1667 for (i = 0; i < Results->
match.
size(); i++)
1668 Ambiguities[i] = Results->
match[i].unichar_id;
1669 Ambiguities[i] = -1;
1671 Ambiguities[0] = -1;
1681 BLOB_CHOICE_LIST *ratings =
new BLOB_CHOICE_LIST();
1683 BLOB_CHOICE_IT ratings_it(ratings);
1687 ratings, unicharset);
1689 for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1690 ratings_it.forward()) {
1694 float certainty = ratings_it.data()->certainty();
1729 uinT8* pruner_norm_array,
1730 uinT8* char_norm_array) {
1749 uinT8* char_norm_array,
1750 uinT8* pruner_array) {
1752 if (pruner_array !=
NULL) {
1757 templates->
NumClasses *
sizeof(pruner_array[0]));
1760 for (
int id = 0;
id < templates->
NumClasses; ++id) {
1763 for (
int config = 0; config < fs.
size; ++config) {
1765 for (
int c = 0; c < shape.
size(); ++c) {
1766 if (char_norm_array[shape[c].unichar_id] < pruner_array[
id])
1767 pruner_array[id] = char_norm_array[shape[c].unichar_id];
1803 int MaxProtoId, OldMaxProtoId;
1816 Class = Templates->
Class[ClassId];
1819 ++NumAdaptationsFailed;
1821 cprintf(
"Cannot make new temporary config: maximum number exceeded.\n");
1828 BlobLength, NumFeatures, Features,
1834 for (i = 0; i < NumOldProtos; i++)
1838 BlobLength, NumFeatures, Features,
1846 ++NumAdaptationsFailed;
1848 cprintf(
"Cannot make new temp protos: maximum number exceeded.\n");
1859 cprintf(
"Making new temp config %d fontinfo id %d"
1860 " using %d old and %d new protos.\n",
1862 NumOldProtos, MaxProtoId - OldMaxProtoId);
1905 for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1906 ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1907 F1 = Features->
Features[*ProtoStart];
1912 for (ProtoEnd = ProtoStart + 1,
1916 F2 = Features->
Features[*ProtoEnd];
1921 AngleDelta = fabs(A1 - A2);
1922 if (AngleDelta > 0.5)
1923 AngleDelta = 1.0 - AngleDelta;
1926 fabs(X1 - X2) > SegmentLength ||
1927 fabs(Y1 - Y2) > SegmentLength)
1931 F2 = Features->
Features[*(ProtoEnd - 1)];
1941 Proto = &(TempProto->
Proto);
1946 Proto->
Length = SegmentLength;
1948 Proto->
X = (X1 + X2) / 2.0;
1986 Class = Templates->
Class[ClassId];
1997 "PERM_CONFIG_STRUCT");
2013 tprintf(
"Making config %d for %s (ClassId %d) permanent:"
2014 " fontinfo id %d, ambiguities '",
2015 ConfigId,
getDict().getUnicharset().debug_str(ClassId).
string(),
2018 *AmbigsPointer >= 0; ++AmbigsPointer)
2077 for (
int i = 0; i < results.
match.
size(); ++i) {
2079 results.
match[i].Print();
2102 static const char* romans =
"i v x I V X";
2110 float scored_one = ScoredUnichar(unichar_id_one, *Results);
2111 float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2113 for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
2115 if (match.
rating >= BadMatchThreshold) {
2120 scored_one < BadMatchThreshold) {
2121 Results->
match[Next].unichar_id = unichar_id_one;
2123 scored_zero < BadMatchThreshold) {
2124 Results->
match[Next].unichar_id = unichar_id_zero;
2126 Results->
match[Next].unichar_id = INVALID_UNICHAR_ID;
2128 if (Results->
match[Next].unichar_id != INVALID_UNICHAR_ID) {
2129 if (NextGood == Next) {
2132 Results->
match[NextGood++] = Results->
match[Next];
2138 for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
2139 if (Results->
match[Next].rating >= BadMatchThreshold) {
2140 if (NextGood == Next) {
2143 Results->
match[NextGood++] = Results->
match[Next];
2166 static char punc_chars[] =
". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2167 static char digit_chars[] =
"0 1 2 3 4 5 6 7 8 9";
2171 for (Next = NextGood = 0; Next < Results->
match.
size(); Next++) {
2174 if (strstr(punc_chars,
2176 if (punc_count >= 2)
2180 if (strstr(digit_chars,
2182 if (digit_count >= 1)
2188 if (NextGood == Next) {
2191 Results->
match[NextGood++] = match;
2215 ClipToRange<int>(255 * Threshold, 0, 255));
2217 ClipToRange<int>(255 * Threshold, 0, 255));
2236 #ifndef GRAPHICS_DISABLED
2239 tprintf(
"No built-in templates for class/shape %d\n", shape_id);
2242 if (num_features <= 0) {
2243 tprintf(
"Illegal blob (char norm features)!\n");
2250 num_features, features, &cn_result,
2254 config_mask = 1 << cn_result.
config;
2256 tprintf(
"Static Shape ID: %d\n", shape_id);
2259 AllProtosOn, reinterpret_cast<BIT_VECTOR>(&config_mask),
2260 num_features, features, &cn_result,
2265 #endif // GRAPHICS_DISABLED
2271 int class_id,
int config_id)
const {
2279 return class_string;
2284 int int_result_config)
const {
2287 if (font_set_id < 0)
2288 return kBlankFontinfoId;
2291 return fs.
configs[int_result_config];
2301 for (
int config = 0; config < fs.
size; ++config) {
2302 if (fs.
configs[config] == shape_id)
2306 tprintf(
"Shape %d not found\n", shape_id);
2315 tprintf(
"NumTimesSeen for config of %s is %d\n",
2316 getDict().getUnicharset().debug_str(class_id).
string(),
2328 int ambigs_size = (ambigs ==
NULL) ? 0 : ambigs->
size();
2329 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2331 assert(ambig_class !=
NULL);
2336 tprintf(
"Ambig %s has not been seen enough times,"
2337 " not making config for %s permanent\n",
2338 getDict().getUnicharset().debug_str(
2339 (*ambigs)[ambig]).
string(),
2340 getDict().getUnicharset().debug_str(class_id).
string());
2352 int ambigs_size = (ambigs ==
NULL) ? 0 : ambigs->
size();
2354 tprintf(
"Running UpdateAmbigsGroup for %s class_id=%d\n",
2355 getDict().getUnicharset().debug_str(class_id).
string(), class_id);
2357 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2358 CLASS_ID ambig_class_id = (*ambigs)[ambig];
2366 tprintf(
"Making config %d of %s permanent\n", cfg,
2367 getDict().getUnicharset().debug_str(
2368 ambig_class_id).
string());
bool matcher_debug_separate_windows
FILE * GetDataFilePtr() const
#define zero_all_bits(array, length)
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
ADAPT_TEMPLATES BackupAdaptedTemplates
void ClearCharNormArray(uinT8 *char_norm_array)
STRING debug_str(UNICHAR_ID id) const
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
int MakeTempProtoPerm(void *item1, void *item2)
#define PRINT_PROTO_MATCHES
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
int classify_integer_matcher_multiplier
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
bool classify_bln_numeric_mode
void FreeBitVector(BIT_VECTOR BitVector)
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
#define WordsInVectorOfSize(NumBits)
#define PermConfigFor(Class, ConfigId)
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
ADAPT_TEMPLATES Templates
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
INT_CLASS Class[MAX_NUM_CLASSES]
void EndDangerousAmbigs()
#define PRINT_MATCH_SUMMARY
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
WERD_CHOICE * best_choice
const INT_FEATURE_STRUCT * features() const
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
GenericVector< CP_RESULT_STRUCT > CPResults
bool classify_enable_adaptive_matcher
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
#define reset_bit(array, bit)
GenericVector< UnicharRating > match
double matcher_reliable_adaptive_result
double tessedit_class_miss_scale
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
double matcher_good_threshold
BIT_VECTOR NewBitVector(int NumBits)
TEMP_PROTO NewTempProto()
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
double segment_penalty_dict_case_ok
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
INT_TEMPLATES PreTrainedTemplates
#define ADAPTABLE_WERD_ADJUSTMENT
int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
PERM_CONFIG_STRUCT * PERM_CONFIG
inT64 GetEndOffset(TessdataType tessdata_type) const
#define set_all_bits(array, length)
GenericVector< STRING > correct_text
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
#define IsEmptyAdaptedClass(Class)
bool DeSerialize(bool swap, FILE *fp)
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
const FontInfo * fontinfo
bool classify_save_adapted_templates
#define ConfigIsPermanent(Class, ConfigId)
double classify_character_fragments_garbage_certainty_threshold
bool LargeSpeckle(const TBLOB &blob)
double classify_adapted_pruning_factor
TessdataManager tessdata_manager
int matcher_min_examples_for_prototyping
#define test_bit(array, bit)
int ShapeIDToClassID(int shape_id) const
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
UnicityTable< FontInfo > fontinfo_table_
UNICHAR_ID best_unichar_id
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
void ShowBestMatchFor(int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
int AddIntConfig(INT_CLASS Class)
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
int classify_learning_debug_level
double matcher_perfect_threshold
#define UNLIKELY_NUM_FEAT
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
int MaxNumUnichars() const
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
#define MAX_ADAPTABLE_WERD_SIZE
FLOAT32 ActualOutlineLength(FEATURE Feature)
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
double matcher_rating_margin
void plot(ScrollView *window)
bool classify_nonlinear_norm
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
bool get_isdigit(UNICHAR_ID unichar_id) const
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
ADAPT_CLASS Class[MAX_NUM_CLASSES]
ShapeTable * shape_table_
int AddIntProto(INT_CLASS Class)
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
void SetAdaptiveThreshold(FLOAT32 Threshold)
#define LENGTH_COMPRESSION
int classify_adapt_proto_threshold
FEATURE_STRUCT * GetCNFeature() const
#define MakeProtoPermanent(Class, ProtoId)
int matcher_permanent_classes_min
const UnicharAmbigs & getUnicharAmbigs() const
GenericVector< ScoredFont > fonts
int get_script(UNICHAR_ID unichar_id) const
const FEATURE_DESC_STRUCT CharNormDesc
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
char * classify_learn_debug_str
const char *const id_to_unichar(UNICHAR_ID id) const
#define TempConfigFor(Class, ConfigId)
TEMP_PROTO_STRUCT * TEMP_PROTO
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
bool classify_use_pre_adapted_templates
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
void Init(tesseract::IntParam *classify_debug_level)
void SetAdaptiveThreshold(FLOAT32 Threshold)
const STRING debug_string() const
static int SortDescendingRating(const void *t1, const void *t2)
INT_TEMPLATES ReadIntTemplates(FILE *File)
double matcher_avg_noise_size
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
bool PiecesAllNatural(int start, int count) const
#define MAX_NUM_INT_FEATURES
#define GetPicoFeatureLength()
#define SET_BIT(array, bit)
GenericVector< SEAM * > seam_array
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
const int kBlnBaselineOffset
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
void RemoveBadMatches(ADAPT_RESULTS *Results)
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
#define ADAPT_TEMPLATE_SUFFIX
const DENORM & denorm() const
void UpdateMatchDisplay()
void SwitchAdaptiveClassifier()
TBLOB * ClassifyNormalizeIfNeeded() const
#define UnusedClassIdIn(T, c)
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
STRING language_data_path_prefix
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
double matcher_bad_match_pad
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
char window_wait(ScrollView *win)
TBOX bounding_box() const
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
#define ClassForClassId(T, c)
bool classify_debug_character_fragments
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
STRING DebugStr(int shape_id) const
int outline_length() const
void * alloc_struct(inT32 count, const char *)
#define PRINT_FEATURE_MATCHES
void FillABC(PROTO Proto)
GenericVector< TBLOB * > blobs
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
void free_int_templates(INT_TEMPLATES templates)
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
const UNICHARSET & getUnicharset() const
bool SeekToStart(TessdataType tessdata_type)
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
#define MakeConfigPermanent(Class, ConfigId)
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
void FreeFeature(FEATURE Feature)
bool get_enabled(UNICHAR_ID unichar_id) const
int matcher_sufficient_examples_for_prototyping
int IntCastRounded(double x)
double classify_adapted_pruning_threshold
bool get_isalpha(UNICHAR_ID unichar_id) const
#define WORST_POSSIBLE_RATING
float adjust_factor() const
void FreeTempProto(void *arg)
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
bool LooksLikeGarbage(TBLOB *blob)
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
#define IncreaseConfidence(TempConfig)
LIST delete_d(LIST list, void *key, int_compare is_equal)
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
void free_adapted_templates(ADAPT_TEMPLATES templates)
bool MarginalMatch(float confidence, float matcher_great_threshold)
UnicityTable< FontSet > fontset_table_
bool AdaptableWord(WERD_RES *word)
bool contains_unichar(const char *const unichar_repr) const
bool disable_character_fragments
void EndAdaptiveClassifier()
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
bool classify_enable_learning
void cprintf(const char *format,...)
void InitMatcherRatings(register FLOAT32 *Rating)
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
GenericVector< int > best_state
void ResetAdaptiveClassifierInternal()
void InitAdaptiveClassifier(bool load_pre_trained_templates)
bool use_ambigs_for_adaption
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
void LearnWord(const char *fontname, WERD_RES *word)
TBOX bounding_box() const
void StartBackupAdaptiveClassifier()
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
const char * string() const
const double kStandardFeatureLength
bool classify_enable_adaptive_debugger
const Shape & GetShape(int shape_id) const
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
double classify_misfit_junk_penalty
#define copy_all_bits(source, dest, length)
double matcher_clustering_max_angle_delta
void FreeFeatureSet(FEATURE_SET FeatureSet)
void ClassifyAsNoise(ADAPT_RESULTS *Results)
ADAPT_TEMPLATES AdaptedTemplates
int classify_adapt_feature_threshold
NORM_PROTOS * ReadNormProtos(FILE *File, inT64 end_offset)
void FreeTempConfig(TEMP_CONFIG Config)
int geo_feature(int index) const
LIST push(LIST list, void *element)