tesseract  5.0.0-alpha-619-ge9db
adaptmatch.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptmatch.cpp
3  ** Purpose: High level adaptive matcher.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 /*-----------------------------------------------------------------------------
19  Include Files and Type Defines
20 -----------------------------------------------------------------------------*/
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include <algorithm> // for max, min
26 #include <cassert> // for assert
27 #include <cmath> // for fabs
28 #include <cstdint> // for INT32_MAX, UINT8_MAX
29 #include <cstdio> // for fflush, fclose, fopen, stdout, FILE
30 #include <cstdlib> // for malloc
31 #include <cstring> // for strstr, memset, strcmp
32 #include "adaptive.h" // for ADAPT_CLASS, free_adapted_templates
33 #include "ambigs.h" // for UnicharIdVector, UnicharAmbigs
34 #include "bitvec.h" // for FreeBitVector, NewBitVector, BIT_VECTOR
35 #include "blobs.h" // for TBLOB, TWERD
36 #include "callcpp.h" // for cprintf, window_wait
37 #include "classify.h" // for Classify, CST_FRAGMENT, CST_WHOLE
38 #include "dict.h" // for Dict
39 #include "errcode.h" // for ASSERT_HOST
40 #include "featdefs.h" // for CharNormDesc
41 #include "float2int.h" // for BASELINE_Y_SHIFT
42 #include "fontinfo.h" // for ScoredFont, FontSet
43 #include <tesseract/genericvector.h> // for GenericVector
44 #include <tesseract/helpers.h> // for IntCastRounded, ClipToRange
45 #include "intfx.h" // for BlobToTrainingSample, INT_FX_RESULT_S...
46 #include "intmatcher.h" // for CP_RESULT_STRUCT, IntegerMatcher
47 #include "intproto.h" // for INT_FEATURE_STRUCT, (anonymous), Clas...
48 #include "matchdefs.h" // for CLASS_ID, FEATURE_ID, PROTO_ID, NO_PROTO
49 #include "mfoutline.h" // for baseline, character, MF_SCALE_FACTOR
50 #include "normalis.h" // for DENORM, kBlnBaselineOffset, kBlnXHeight
51 #include "normfeat.h" // for ActualOutlineLength, CharNormLength
52 #include "ocrfeatures.h" // for FEATURE_STRUCT, FreeFeatureSet, FEATURE
53 #include "oldlist.h" // for push, delete_d
54 #include "outfeat.h" // for OutlineFeatDir, OutlineFeatLength
55 #include "pageres.h" // for WERD_RES
56 #include "params.h" // for IntParam, BoolParam, DoubleParam, Str...
57 #include "picofeat.h" // for PicoFeatDir, PicoFeatX, PicoFeatY
58 #include "protos.h" // for PROTO_STRUCT, FillABC, PROTO
59 #include "ratngs.h" // for BLOB_CHOICE_IT, BLOB_CHOICE_LIST, BLO...
60 #include "rect.h" // for TBOX
61 #include "scrollview.h" // for ScrollView, ScrollView::BROWN, Scroll...
62 #include "seam.h" // for SEAM
63 #include <tesseract/serialis.h> // for TFile
64 #include "shapeclassifier.h" // for ShapeClassifier
65 #include "shapetable.h" // for UnicharRating, ShapeTable, Shape, Uni...
66 #include <tesseract/strngs.h> // for STRING
67 #include "tessclassifier.h" // for TessClassifier
68 #include "tessdatamanager.h" // for TessdataManager, TESSDATA_INTTEMP
69 #include "tprintf.h" // for tprintf
70 #include "trainingsample.h" // for TrainingSample
71 #include <tesseract/unichar.h> // for UNICHAR_ID, INVALID_UNICHAR_ID
72 #include "unicharset.h" // for UNICHARSET, CHAR_FRAGMENT, UNICHAR_SPACE
73 #include "unicity_table.h" // for UnicityTable
74 
75 #define ADAPT_TEMPLATE_SUFFIX ".a"
76 
77 #define MAX_MATCHES 10
78 #define UNLIKELY_NUM_FEAT 200
79 #define NO_DEBUG 0
80 #define MAX_ADAPTABLE_WERD_SIZE 40
81 
82 #define ADAPTABLE_WERD_ADJUSTMENT (0.05)
83 
84 #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
85 
86 #define WORST_POSSIBLE_RATING (0.0f)
87 
90 
91 struct ADAPT_RESULTS {
92  int32_t BlobLength;
96  float best_rating;
99 
102  inline void Initialize() {
103  BlobLength = INT32_MAX;
104  HasNonfragment = false;
105  ComputeBest();
106  }
107  // Computes best_unichar_id, best_match_index and best_rating.
108  void ComputeBest() {
109  best_unichar_id = INVALID_UNICHAR_ID;
110  best_match_index = -1;
112  for (int i = 0; i < match.size(); ++i) {
113  if (match[i].rating > best_rating) {
114  best_rating = match[i].rating;
115  best_unichar_id = match[i].unichar_id;
116  best_match_index = i;
117  }
118  }
119  }
120 };
121 
122 struct PROTO_KEY {
125  int ConfigId;
126 };
127 
128 /*-----------------------------------------------------------------------------
129  Private Macros
130 -----------------------------------------------------------------------------*/
131 inline bool MarginalMatch(float confidence, float matcher_great_threshold) {
132  return (1.0f - confidence) > matcher_great_threshold;
133 }
134 
135 /*-----------------------------------------------------------------------------
136  Private Function Prototypes
137 -----------------------------------------------------------------------------*/
138 // Returns the index of the given id in results, if present, or the size of the
139 // vector (index it will go at) if not present.
140 static int FindScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) {
141  for (int i = 0; i < results.match.size(); i++) {
142  if (results.match[i].unichar_id == id)
143  return i;
144  }
145  return results.match.size();
146 }
147 
148 // Returns the current rating for a unichar id if we have rated it, defaulting
149 // to WORST_POSSIBLE_RATING.
150 static float ScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) {
151  int index = FindScoredUnichar(id, results);
152  if (index >= results.match.size()) return WORST_POSSIBLE_RATING;
153  return results.match[index].rating;
154 }
155 
156 void InitMatcherRatings(float *Rating);
157 
158 int MakeTempProtoPerm(void *item1, void *item2);
159 
160 void SetAdaptiveThreshold(float Threshold);
161 
162 
163 /*-----------------------------------------------------------------------------
164  Public Code
165 -----------------------------------------------------------------------------*/
166 /*---------------------------------------------------------------------------*/
167 namespace tesseract {
191 void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) {
192  assert(Choices != nullptr);
193  auto *Results = new ADAPT_RESULTS;
194  Results->Initialize();
195 
196  ASSERT_HOST(AdaptedTemplates != nullptr);
197 
198  DoAdaptiveMatch(Blob, Results);
199 
200  RemoveBadMatches(Results);
201  Results->match.sort(&UnicharRating::SortDescendingRating);
202  RemoveExtraPuncs(Results);
203  Results->ComputeBest();
204  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
205  Choices);
206 
207  // TODO(rays) Move to before ConvertMatchesToChoices!
208  if (LargeSpeckle(*Blob) || Choices->length() == 0)
209  AddLargeSpeckleTo(Results->BlobLength, Choices);
210 
211  if (matcher_debug_level >= 1) {
212  tprintf("AD Matches = ");
213  PrintAdaptiveMatchResults(*Results);
214  }
215 
216 #ifndef GRAPHICS_DISABLED
218  DebugAdaptiveClassifier(Blob, Results);
219 #endif
220 
221  delete Results;
222 } /* AdaptiveClassifier */
223 
224 // If *win is nullptr, sets it to a new ScrollView() object with title msg.
225 // Clears the window and draws baselines.
226 void Classify::RefreshDebugWindow(ScrollView **win, const char *msg,
227  int y_offset, const TBOX &wbox) {
228  #ifndef GRAPHICS_DISABLED
229  const int kSampleSpaceWidth = 500;
230  if (*win == nullptr) {
231  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
232  kSampleSpaceWidth * 2, 200, true);
233  }
234  (*win)->Clear();
235  (*win)->Pen(64, 64, 64);
236  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
237  kSampleSpaceWidth, kBlnBaselineOffset);
238  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
239  kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
240  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
241  wbox.right(), wbox.bottom());
242  #endif // GRAPHICS_DISABLED
243 }
244 
245 // Learns the given word using its chopped_word, seam_array, denorm,
246 // box_word, best_state, and correct_text to learn both correctly and
247 // incorrectly segmented blobs. If fontname is not nullptr, then LearnBlob
248 // is called and the data will be saved in an internal buffer.
249 // Otherwise AdaptToBlob is called for adaption within a document.
250 void Classify::LearnWord(const char* fontname, WERD_RES* word) {
251  int word_len = word->correct_text.size();
252  if (word_len == 0) return;
253 
254  float* thresholds = nullptr;
255  if (fontname == nullptr) {
256  // Adaption mode.
257  if (!EnableLearning || word->best_choice == nullptr)
258  return; // Can't or won't adapt.
259 
261  tprintf("\n\nAdapting to word = %s\n",
262  word->best_choice->debug_string().c_str());
263  thresholds = new float[word_len];
267  matcher_rating_margin, thresholds);
268  }
269  int start_blob = 0;
270 
271  #ifndef GRAPHICS_DISABLED
273  if (learn_fragmented_word_debug_win_ != nullptr) {
274  window_wait(learn_fragmented_word_debug_win_);
275  }
276  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
277  word->chopped_word->bounding_box());
278  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
279  word->chopped_word->bounding_box());
280  word->chopped_word->plot(learn_fragmented_word_debug_win_);
282  }
283  #endif // GRAPHICS_DISABLED
284 
285  for (int ch = 0; ch < word_len; ++ch) {
287  tprintf("\nLearning %s\n", word->correct_text[ch].c_str());
288  }
289  if (word->correct_text[ch].length() > 0) {
290  float threshold = thresholds != nullptr ? thresholds[ch] : 0.0f;
291 
292  LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
293  CST_WHOLE, word->correct_text[ch].c_str(), word);
294 
295  if (word->best_state[ch] > 1 && !disable_character_fragments) {
296  // Check that the character breaks into meaningful fragments
297  // that each match a whole character with at least
298  // classify_character_fragments_garbage_certainty_threshold
299  bool garbage = false;
300  int frag;
301  for (frag = 0; frag < word->best_state[ch]; ++frag) {
302  TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag];
304  garbage |= LooksLikeGarbage(frag_blob);
305  }
306  }
307  // Learn the fragments.
308  if (!garbage) {
309  bool pieces_all_natural = word->PiecesAllNatural(start_blob,
310  word->best_state[ch]);
311  if (pieces_all_natural || !prioritize_division) {
312  for (frag = 0; frag < word->best_state[ch]; ++frag) {
313  GenericVector<STRING> tokens;
314  word->correct_text[ch].split(' ', &tokens);
315 
316  tokens[0] = CHAR_FRAGMENT::to_string(
317  tokens[0].c_str(), frag, word->best_state[ch],
318  pieces_all_natural);
319 
320  STRING full_string;
321  for (int i = 0; i < tokens.size(); i++) {
322  full_string += tokens[i];
323  if (i != tokens.size() - 1)
324  full_string += ' ';
325  }
326  LearnPieces(fontname, start_blob + frag, 1, threshold,
327  CST_FRAGMENT, full_string.c_str(), word);
328  }
329  }
330  }
331  }
332 
333  // TODO(rays): re-enable this part of the code when we switch to the
334  // new classifier that needs to see examples of garbage.
335  /*
336  if (word->best_state[ch] > 1) {
337  // If the next blob is good, make junk with the rightmost fragment.
338  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
339  LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
340  word->best_state[ch + 1] + 1,
341  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
342  }
343  // If the previous blob is good, make junk with the leftmost fragment.
344  if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
345  LearnPieces(fontname, start_blob - word->best_state[ch - 1],
346  word->best_state[ch - 1] + 1,
347  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
348  }
349  }
350  // If the next blob is good, make a join with it.
351  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
352  STRING joined_text = word->correct_text[ch];
353  joined_text += word->correct_text[ch + 1];
354  LearnPieces(fontname, start_blob,
355  word->best_state[ch] + word->best_state[ch + 1],
356  threshold, CST_NGRAM, joined_text.c_str(), word);
357  }
358  */
359  }
360  start_blob += word->best_state[ch];
361  }
362  delete [] thresholds;
363 } // LearnWord.
364 
365 // Builds a blob of length fragments, from the word, starting at start,
366 // and then learns it, as having the given correct_text.
367 // If fontname is not nullptr, then LearnBlob is called and the data will be
368 // saved in an internal buffer for static training.
369 // Otherwise AdaptToBlob is called for adaption within a document.
370 // threshold is a magic number required by AdaptToChar and generated by
371 // ComputeAdaptionThresholds.
372 // Although it can be partly inferred from the string, segmentation is
373 // provided to explicitly clarify the character segmentation.
374 void Classify::LearnPieces(const char* fontname, int start, int length,
375  float threshold, CharSegmentationType segmentation,
376  const char* correct_text, WERD_RES* word) {
377  // TODO(daria) Remove/modify this if/when we want
378  // to train and/or adapt to n-grams.
379  if (segmentation != CST_WHOLE &&
380  (segmentation != CST_FRAGMENT || disable_character_fragments))
381  return;
382 
383  if (length > 1) {
384  SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
385  start + length - 1);
386  }
387  TBLOB* blob = word->chopped_word->blobs[start];
388  // Rotate the blob if needed for classification.
389  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded();
390  if (rotated_blob == nullptr)
391  rotated_blob = blob;
392 
393  #ifndef GRAPHICS_DISABLED
394  // Draw debug windows showing the blob that is being learned if needed.
395  if (strcmp(classify_learn_debug_str.c_str(), correct_text) == 0) {
396  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
397  word->chopped_word->bounding_box());
398  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
399  learn_debug_win_->Update();
400  window_wait(learn_debug_win_);
401  }
402  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
403  ASSERT_HOST(learn_fragments_debug_win_ != nullptr); // set up in LearnWord
404  blob->plot(learn_fragments_debug_win_,
406  learn_fragments_debug_win_->Update();
407  }
408  #endif // GRAPHICS_DISABLED
409 
410  if (fontname != nullptr) {
411  classify_norm_method.set_value(character); // force char norm spc 30/11/93
412  tess_bn_matching.set_value(false); // turn it off
413  tess_cn_matching.set_value(false);
414  DENORM bl_denorm, cn_denorm;
415  INT_FX_RESULT_STRUCT fx_info;
417  &bl_denorm, &cn_denorm, &fx_info);
418  LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
419  } else if (unicharset.contains_unichar(correct_text)) {
420  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
421  int font_id = word->fontinfo != nullptr
422  ? fontinfo_table_.get_id(*word->fontinfo)
423  : 0;
425  tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
426  unicharset.id_to_unichar(class_id), threshold, font_id);
427  // If filename is not nullptr we are doing recognition
428  // (as opposed to training), so we must have already set word fonts.
429  AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
430  if (BackupAdaptedTemplates != nullptr) {
431  // Adapt the backup templates too. They will be used if the primary gets
432  // too full.
433  AdaptToChar(rotated_blob, class_id, font_id, threshold,
435  }
436  } else if (classify_debug_level >= 1) {
437  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
438  }
439  if (rotated_blob != blob) {
440  delete rotated_blob;
441  }
442 
443  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
444  start + length - 1);
445 } // LearnPieces.
446 
447 /*---------------------------------------------------------------------------*/
460  STRING Filename;
461  FILE *File;
462 
463  if (AdaptedTemplates != nullptr &&
465  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
466  File = fopen (Filename.c_str(), "wb");
467  if (File == nullptr)
468  cprintf ("Unable to save adapted templates to %s!\n", Filename.c_str());
469  else {
470  cprintf ("\nSaving adapted templates to %s ...", Filename.c_str());
471  fflush(stdout);
473  cprintf ("\n");
474  fclose(File);
475  }
476  }
477 
478  if (AdaptedTemplates != nullptr) {
480  AdaptedTemplates = nullptr;
481  }
482  if (BackupAdaptedTemplates != nullptr) {
484  BackupAdaptedTemplates = nullptr;
485  }
486 
487  if (PreTrainedTemplates != nullptr) {
489  PreTrainedTemplates = nullptr;
490  }
492  FreeNormProtos();
493  if (AllProtosOn != nullptr) {
494  FreeBitVector(AllProtosOn);
495  FreeBitVector(AllConfigsOn);
496  FreeBitVector(AllConfigsOff);
497  FreeBitVector(TempProtoMask);
498  AllProtosOn = nullptr;
499  AllConfigsOn = nullptr;
500  AllConfigsOff = nullptr;
501  TempProtoMask = nullptr;
502  }
503  delete shape_table_;
504  shape_table_ = nullptr;
505  delete static_classifier_;
506  static_classifier_ = nullptr;
507 } /* EndAdaptiveClassifier */
508 
509 
510 /*---------------------------------------------------------------------------*/
529  return;
530  if (AllProtosOn != nullptr)
531  EndAdaptiveClassifier(); // Don't leak with multiple inits.
532 
533  // If there is no language_data_path_prefix, the classifier will be
534  // adaptive only.
535  if (language_data_path_prefix.length() > 0 && mgr != nullptr) {
536  TFile fp;
539 
540  if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) {
542  if (!shape_table_->DeSerialize(&fp)) {
543  tprintf("Error loading shape table!\n");
544  delete shape_table_;
545  shape_table_ = nullptr;
546  }
547  }
548 
550  ReadNewCutoffs(&fp, CharNormCutoffs);
551 
553  NormProtos = ReadNormProtos(&fp);
554  static_classifier_ = new TessClassifier(false, this);
555  }
556 
557  InitIntegerFX();
558 
559  AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
560  AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
561  AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
562  TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
563  set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
564  set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
565  zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
566 
567  for (uint16_t& BaselineCutoff : BaselineCutoffs) {
568  BaselineCutoff = 0;
569  }
570 
572  TFile fp;
573  STRING Filename;
574 
575  Filename = imagefile;
576  Filename += ADAPT_TEMPLATE_SUFFIX;
577  if (!fp.Open(Filename.c_str(), nullptr)) {
579  } else {
580  cprintf("\nReading pre-adapted templates from %s ...\n",
581  Filename.c_str());
582  fflush(stdout);
584  cprintf("\n");
586 
587  for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
588  BaselineCutoffs[i] = CharNormCutoffs[i];
589  }
590  }
591  } else {
592  if (AdaptedTemplates != nullptr)
595  }
596 } /* InitAdaptiveClassifier */
597 
600  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
601  NumAdaptationsFailed);
602  }
605  if (BackupAdaptedTemplates != nullptr)
607  BackupAdaptedTemplates = nullptr;
608  NumAdaptationsFailed = 0;
609 }
610 
611 // If there are backup adapted templates, switches to those, otherwise resets
612 // the main adaptive classifier (because it is full.)
614  if (BackupAdaptedTemplates == nullptr) {
616  return;
617  }
619  tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
620  NumAdaptationsFailed);
621  }
624  BackupAdaptedTemplates = nullptr;
625  NumAdaptationsFailed = 0;
626 }
627 
628 // Resets the backup adaptive classifier to empty.
630  if (BackupAdaptedTemplates != nullptr)
633 }
634 
635 /*---------------------------------------------------------------------------*/
654 
656 
657 } /* SettupPass1 */
658 
659 
660 /*---------------------------------------------------------------------------*/
670  EnableLearning = false;
672 
673 } /* SettupPass2 */
674 
675 
676 /*---------------------------------------------------------------------------*/
694  CLASS_ID ClassId,
695  int FontinfoId,
696  ADAPT_CLASS Class,
697  ADAPT_TEMPLATES Templates) {
698  FEATURE_SET Features;
699  int Fid, Pid;
700  FEATURE Feature;
701  int NumFeatures;
702  TEMP_PROTO TempProto;
703  PROTO Proto;
704  INT_CLASS IClass;
706 
707  classify_norm_method.set_value(baseline);
708  Features = ExtractOutlineFeatures(Blob);
709  NumFeatures = Features->NumFeatures;
710  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
711  FreeFeatureSet(Features);
712  return;
713  }
714 
715  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
716  TempConfigFor(Class, 0) = Config;
717 
718  /* this is a kludge to construct cutoffs for adapted templates */
719  if (Templates == AdaptedTemplates)
720  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
721 
722  IClass = ClassForClassId (Templates->Templates, ClassId);
723 
724  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
725  Pid = AddIntProto (IClass);
726  assert (Pid != NO_PROTO);
727 
728  Feature = Features->Features[Fid];
729  TempProto = NewTempProto ();
730  Proto = &(TempProto->Proto);
731 
732  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
733  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
734  instead of the -0.25 to 0.75 used in baseline normalization */
735  Proto->Angle = Feature->Params[OutlineFeatDir];
736  Proto->X = Feature->Params[OutlineFeatX];
737  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
738  Proto->Length = Feature->Params[OutlineFeatLength];
739  FillABC(Proto);
740 
741  TempProto->ProtoId = Pid;
742  SET_BIT (Config->Protos, Pid);
743 
744  ConvertProto(Proto, Pid, IClass);
745  AddProtoToProtoPruner(Proto, Pid, IClass,
747 
748  Class->TempProtos = push (Class->TempProtos, TempProto);
749  }
750  FreeFeatureSet(Features);
751 
752  AddIntConfig(IClass);
753  ConvertConfig (AllProtosOn, 0, IClass);
754 
756  tprintf("Added new class '%s' with class id %d and %d protos.\n",
757  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
759  DisplayAdaptedChar(Blob, IClass);
760  }
761 
762  if (IsEmptyAdaptedClass(Class))
763  (Templates->NumNonEmptyClasses)++;
764 } /* InitAdaptedClass */
765 
766 
767 /*---------------------------------------------------------------------------*/
787  INT_FEATURE_ARRAY IntFeatures,
788  FEATURE_SET *FloatFeatures) {
789  FEATURE_SET Features;
790  int NumFeatures;
791 
792  classify_norm_method.set_value(baseline);
793  Features = ExtractPicoFeatures(Blob);
794 
795  NumFeatures = Features->NumFeatures;
796  if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) {
797  FreeFeatureSet(Features);
798  return 0;
799  }
800 
801  ComputeIntFeatures(Features, IntFeatures);
802  *FloatFeatures = Features;
803 
804  return NumFeatures;
805 } /* GetAdaptiveFeatures */
806 
807 
808 /*-----------------------------------------------------------------------------
809  Private Code
810 -----------------------------------------------------------------------------*/
811 /*---------------------------------------------------------------------------*/
822  if (word->best_choice == nullptr) return false;
823  int BestChoiceLength = word->best_choice->length();
824  float adaptable_score =
826  return // rules that apply in general - simplest to compute first
827  BestChoiceLength > 0 &&
828  BestChoiceLength == word->rebuild_word->NumBlobs() &&
829  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
830  // This basically ensures that the word is at least a dictionary match
831  // (freq word, user word, system dawg word, etc).
832  // Since all the other adjustments will make adjust factor higher
833  // than higher than adaptable_score=1.1+0.05=1.15
834  // Since these are other flags that ensure that the word is dict word,
835  // this check could be at times redundant.
836  word->best_choice->adjust_factor() <= adaptable_score &&
837  // Make sure that alternative choices are not dictionary words.
838  word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
839 }
840 
841 /*---------------------------------------------------------------------------*/
853 void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId,
854  float Threshold,
855  ADAPT_TEMPLATES adaptive_templates) {
856  int NumFeatures;
857  INT_FEATURE_ARRAY IntFeatures;
858  UnicharRating int_result;
859  INT_CLASS IClass;
860  ADAPT_CLASS Class;
861  TEMP_CONFIG TempConfig;
862  FEATURE_SET FloatFeatures;
863  int NewTempConfigId;
864 
865  if (!LegalClassId (ClassId))
866  return;
867 
868  int_result.unichar_id = ClassId;
869  Class = adaptive_templates->Class[ClassId];
870  assert(Class != nullptr);
871  if (IsEmptyAdaptedClass(Class)) {
872  InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
873  } else {
874  IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
875 
876  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
877  if (NumFeatures <= 0) {
878  return; // Features already freed by GetAdaptiveFeatures.
879  }
880 
881  // Only match configs with the matching font.
882  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
883  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
884  if (GetFontinfoId(Class, cfg) == FontinfoId) {
885  SET_BIT(MatchingFontConfigs, cfg);
886  } else {
887  reset_bit(MatchingFontConfigs, cfg);
888  }
889  }
890  im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
891  NumFeatures, IntFeatures,
894  FreeBitVector(MatchingFontConfigs);
895 
896  SetAdaptiveThreshold(Threshold);
897 
898  if (1.0f - int_result.rating <= Threshold) {
899  if (ConfigIsPermanent(Class, int_result.config)) {
901  tprintf("Found good match to perm config %d = %4.1f%%.\n",
902  int_result.config, int_result.rating * 100.0);
903  FreeFeatureSet(FloatFeatures);
904  return;
905  }
906 
907  TempConfig = TempConfigFor(Class, int_result.config);
908  IncreaseConfidence(TempConfig);
909  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
910  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
911  }
913  tprintf("Increasing reliability of temp config %d to %d.\n",
914  int_result.config, TempConfig->NumTimesSeen);
915 
916  if (TempConfigReliable(ClassId, TempConfig)) {
917  MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
918  UpdateAmbigsGroup(ClassId, Blob);
919  }
920  } else {
922  tprintf("Found poor match to temp config %d = %4.1f%%.\n",
923  int_result.config, int_result.rating * 100.0);
925  DisplayAdaptedChar(Blob, IClass);
926  }
927  NewTempConfigId =
928  MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
929  NumFeatures, IntFeatures, FloatFeatures);
930  if (NewTempConfigId >= 0 &&
931  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
932  MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
933  UpdateAmbigsGroup(ClassId, Blob);
934  }
935 
936 #ifndef GRAPHICS_DISABLED
938  DisplayAdaptedChar(Blob, IClass);
939  }
940 #endif
941  }
942  FreeFeatureSet(FloatFeatures);
943  }
944 } /* AdaptToChar */
945 
947 #ifndef GRAPHICS_DISABLED
948  INT_FX_RESULT_STRUCT fx_info;
952  &bl_features);
953  if (sample == nullptr) return;
954 
955  UnicharRating int_result;
956  im_.Match(int_class, AllProtosOn, AllConfigsOn,
957  bl_features.size(), &bl_features[0],
960  tprintf("Best match to temp config %d = %4.1f%%.\n",
961  int_result.config, int_result.rating * 100.0);
963  uint32_t ConfigMask;
964  ConfigMask = 1 << int_result.config;
966  im_.Match(int_class, AllProtosOn, static_cast<BIT_VECTOR>(&ConfigMask),
967  bl_features.size(), &bl_features[0],
971  }
972 
973  delete sample;
974 #endif
975 }
976 
994 void Classify::AddNewResult(const UnicharRating& new_result,
995  ADAPT_RESULTS *results) {
996  int old_match = FindScoredUnichar(new_result.unichar_id, *results);
997 
998  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
999  (old_match < results->match.size() &&
1000  new_result.rating <= results->match[old_match].rating))
1001  return; // New one not good enough.
1002 
1003  if (!unicharset.get_fragment(new_result.unichar_id))
1004  results->HasNonfragment = true;
1005 
1006  if (old_match < results->match.size()) {
1007  results->match[old_match].rating = new_result.rating;
1008  } else {
1009  results->match.push_back(new_result);
1010  }
1011 
1012  if (new_result.rating > results->best_rating &&
1013  // Ensure that fragments do not affect best rating, class and config.
1014  // This is needed so that at least one non-fragmented character is
1015  // always present in the results.
1016  // TODO(daria): verify that this helps accuracy and does not
1017  // hurt performance.
1018  !unicharset.get_fragment(new_result.unichar_id)) {
1019  results->best_match_index = old_match;
1020  results->best_rating = new_result.rating;
1021  results->best_unichar_id = new_result.unichar_id;
1022  }
1023 } /* AddNewResult */
1024 
1025 
1026 /*---------------------------------------------------------------------------*/
1046  const GenericVector<INT_FEATURE_STRUCT>& int_features,
1047  const INT_FX_RESULT_STRUCT& fx_info,
1048  const TBLOB *blob,
1049  INT_TEMPLATES templates,
1050  ADAPT_CLASS *classes,
1051  UNICHAR_ID *ambiguities,
1052  ADAPT_RESULTS *results) {
1053  if (int_features.empty()) return;
1054  auto* CharNormArray = new uint8_t[unicharset.size()];
1055  UnicharRating int_result;
1056 
1057  results->BlobLength = GetCharNormFeature(fx_info, templates, nullptr,
1058  CharNormArray);
1059  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1060  if (debug)
1061  tprintf("AM Matches = ");
1062 
1063  int top = blob->bounding_box().top();
1064  int bottom = blob->bounding_box().bottom();
1065  while (*ambiguities >= 0) {
1066  CLASS_ID class_id = *ambiguities;
1067 
1068  int_result.unichar_id = class_id;
1069  im_.Match(ClassForClassId(templates, class_id),
1071  int_features.size(), &int_features[0],
1072  &int_result,
1075 
1076  ExpandShapesAndApplyCorrections(nullptr, debug, class_id, bottom, top, 0,
1077  results->BlobLength,
1079  CharNormArray, &int_result, results);
1080  ambiguities++;
1081  }
1082  delete [] CharNormArray;
1083 } /* AmbigClassifier */
1084 
1085 /*---------------------------------------------------------------------------*/
1089  int16_t num_features,
1090  const INT_FEATURE_STRUCT* features,
1091  const uint8_t* norm_factors,
1092  ADAPT_CLASS* classes,
1093  int debug,
1094  int matcher_multiplier,
1095  const TBOX& blob_box,
1096  const GenericVector<CP_RESULT_STRUCT>& results,
1097  ADAPT_RESULTS* final_results) {
1098  int top = blob_box.top();
1099  int bottom = blob_box.bottom();
1100  UnicharRating int_result;
1101  for (int c = 0; c < results.size(); c++) {
1102  CLASS_ID class_id = results[c].Class;
1103  BIT_VECTOR protos = classes != nullptr ? classes[class_id]->PermProtos
1104  : AllProtosOn;
1105  BIT_VECTOR configs = classes != nullptr ? classes[class_id]->PermConfigs
1106  : AllConfigsOn;
1107 
1108  int_result.unichar_id = class_id;
1109  im_.Match(ClassForClassId(templates, class_id),
1110  protos, configs,
1111  num_features, features,
1112  &int_result, classify_adapt_feature_threshold, debug,
1114  bool is_debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1115  ExpandShapesAndApplyCorrections(classes, is_debug, class_id, bottom, top,
1116  results[c].Rating,
1117  final_results->BlobLength,
1118  matcher_multiplier, norm_factors,
1119  &int_result, final_results);
1120  }
1121 }
1122 
1123 // Converts configs to fonts, and if the result is not adapted, and a
1124 // shape_table_ is present, the shape is expanded to include all
1125 // unichar_ids represented, before applying a set of corrections to the
1126 // distance rating in int_result, (see ComputeCorrectedRating.)
1127 // The results are added to the final_results output.
1129  ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
1130  float cp_rating, int blob_length, int matcher_multiplier,
1131  const uint8_t* cn_factors,
1132  UnicharRating* int_result, ADAPT_RESULTS* final_results) {
1133  if (classes != nullptr) {
1134  // Adapted result. Convert configs to fontinfo_ids.
1135  int_result->adapted = true;
1136  for (int f = 0; f < int_result->fonts.size(); ++f) {
1137  int_result->fonts[f].fontinfo_id =
1138  GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
1139  }
1140  } else {
1141  // Pre-trained result. Map fonts using font_sets_.
1142  int_result->adapted = false;
1143  for (int f = 0; f < int_result->fonts.size(); ++f) {
1144  int_result->fonts[f].fontinfo_id =
1146  int_result->fonts[f].fontinfo_id);
1147  }
1148  if (shape_table_ != nullptr) {
1149  // Two possible cases:
1150  // 1. Flat shapetable. All unichar-ids of the shapes referenced by
1151  // int_result->fonts are the same. In this case build a new vector of
1152  // mapped fonts and replace the fonts in int_result.
1153  // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
1154  // by int_result. In this case, build a vector of UnicharRating to
1155  // gather together different font-ids for each unichar. Also covers case1.
1156  GenericVector<UnicharRating> mapped_results;
1157  for (int f = 0; f < int_result->fonts.size(); ++f) {
1158  int shape_id = int_result->fonts[f].fontinfo_id;
1159  const Shape& shape = shape_table_->GetShape(shape_id);
1160  for (int c = 0; c < shape.size(); ++c) {
1161  int unichar_id = shape[c].unichar_id;
1162  if (!unicharset.get_enabled(unichar_id)) continue;
1163  // Find the mapped_result for unichar_id.
1164  int r = 0;
1165  for (r = 0; r < mapped_results.size() &&
1166  mapped_results[r].unichar_id != unichar_id; ++r) {}
1167  if (r == mapped_results.size()) {
1168  mapped_results.push_back(*int_result);
1169  mapped_results[r].unichar_id = unichar_id;
1170  mapped_results[r].fonts.truncate(0);
1171  }
1172  for (int i = 0; i < shape[c].font_ids.size(); ++i) {
1173  mapped_results[r].fonts.push_back(
1174  ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
1175  }
1176  }
1177  }
1178  for (int m = 0; m < mapped_results.size(); ++m) {
1179  mapped_results[m].rating =
1180  ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
1181  cp_rating, int_result->rating,
1182  int_result->feature_misses, bottom, top,
1183  blob_length, matcher_multiplier, cn_factors);
1184  AddNewResult(mapped_results[m], final_results);
1185  }
1186  return;
1187  }
1188  }
1189  if (unicharset.get_enabled(class_id)) {
1190  int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
1191  int_result->rating,
1192  int_result->feature_misses,
1193  bottom, top, blob_length,
1194  matcher_multiplier, cn_factors);
1195  AddNewResult(*int_result, final_results);
1196  }
1197 }
1198 
1199 // Applies a set of corrections to the confidence im_rating,
1200 // including the cn_correction, miss penalty and additional penalty
1201 // for non-alnums being vertical misfits. Returns the corrected confidence.
1202 double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
1203  double cp_rating, double im_rating,
1204  int feature_misses,
1205  int bottom, int top,
1206  int blob_length, int matcher_multiplier,
1207  const uint8_t* cn_factors) {
1208  // Compute class feature corrections.
1209  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
1210  cn_factors[unichar_id],
1211  matcher_multiplier);
1212  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1213  double vertical_penalty = 0.0;
1214  // Penalize non-alnums for being vertical misfits.
1215  if (!unicharset.get_isalpha(unichar_id) &&
1216  !unicharset.get_isdigit(unichar_id) &&
1217  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1218  int min_bottom, max_bottom, min_top, max_top;
1219  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
1220  &min_top, &max_top);
1221  if (debug) {
1222  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1223  top, min_top, max_top, bottom, min_bottom, max_bottom);
1224  }
1225  if (top < min_top || top > max_top ||
1226  bottom < min_bottom || bottom > max_bottom) {
1227  vertical_penalty = classify_misfit_junk_penalty;
1228  }
1229  }
1230  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1231  if (result < WORST_POSSIBLE_RATING)
1232  result = WORST_POSSIBLE_RATING;
1233  if (debug) {
1234  tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1235  unicharset.id_to_unichar(unichar_id),
1236  result * 100.0,
1237  cp_rating * 100.0,
1238  (1.0 - im_rating) * 100.0,
1239  (cn_corrected - (1.0 - im_rating)) * 100.0,
1240  cn_factors[unichar_id],
1241  miss_penalty * 100.0,
1242  vertical_penalty * 100.0);
1243  }
1244  return result;
1245 }
1246 
1247 /*---------------------------------------------------------------------------*/
1266  TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
1267  const INT_FX_RESULT_STRUCT& fx_info,
1268  ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
1269  if (int_features.empty()) return nullptr;
1270  auto* CharNormArray = new uint8_t[unicharset.size()];
1271  ClearCharNormArray(CharNormArray);
1272 
1274  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
1275  CharNormArray, BaselineCutoffs, &Results->CPResults);
1276 
1277  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1278  tprintf("BL Matches = ");
1279 
1280  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
1281  CharNormArray,
1282  Templates->Class, matcher_debug_flags, 0,
1283  Blob->bounding_box(), Results->CPResults, Results);
1284 
1285  delete [] CharNormArray;
1286  CLASS_ID ClassId = Results->best_unichar_id;
1287  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
1288  return nullptr;
1289 
1290  return Templates->Class[ClassId]->
1291  Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
1292 } /* BaselineClassifier */
1293 
1294 
1295 /*---------------------------------------------------------------------------*/
1312  const TrainingSample& sample,
1313  ADAPT_RESULTS *adapt_results) {
1314  // This is the length that is used for scaling ratings vs certainty.
1315  adapt_results->BlobLength =
1316  IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1317  GenericVector<UnicharRating> unichar_results;
1318  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
1319  -1, &unichar_results);
1320  // Convert results to the format used internally by AdaptiveClassifier.
1321  for (int r = 0; r < unichar_results.size(); ++r) {
1322  AddNewResult(unichar_results[r], adapt_results);
1323  }
1324  return sample.num_features();
1325 } /* CharNormClassifier */
1326 
1327 // As CharNormClassifier, but operates on a TrainingSample and outputs to
1328 // a GenericVector of ShapeRating without conversion to classes.
1330  int keep_this,
1331  const TrainingSample& sample,
1332  GenericVector<UnicharRating>* results) {
1333  results->clear();
1334  auto* adapt_results = new ADAPT_RESULTS();
1335  adapt_results->Initialize();
1336  // Compute the bounding box of the features.
1337  uint32_t num_features = sample.num_features();
1338  // Only the top and bottom of the blob_box are used by MasterMatcher, so
1339  // fabricate right and left using top and bottom.
1340  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
1341  sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
1342  // Compute the char_norm_array from the saved cn_feature.
1343  FEATURE norm_feature = sample.GetCNFeature();
1344  auto* char_norm_array = new uint8_t[unicharset.size()];
1345  int num_pruner_classes = std::max(unicharset.size(),
1347  auto* pruner_norm_array = new uint8_t[num_pruner_classes];
1348  adapt_results->BlobLength =
1349  static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
1350  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
1351  pruner_norm_array);
1352 
1353  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
1354  pruner_norm_array,
1355  shape_table_ != nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1356  &adapt_results->CPResults);
1357  delete [] pruner_norm_array;
1358  if (keep_this >= 0) {
1359  adapt_results->CPResults[0].Class = keep_this;
1360  adapt_results->CPResults.truncate(1);
1361  }
1362  if (pruner_only) {
1363  // Convert pruner results to output format.
1364  for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
1365  int class_id = adapt_results->CPResults[i].Class;
1366  results->push_back(
1367  UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1368  }
1369  } else {
1370  MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1371  char_norm_array,
1372  nullptr, matcher_debug_flags,
1374  blob_box, adapt_results->CPResults, adapt_results);
1375  // Convert master matcher results to output format.
1376  for (int i = 0; i < adapt_results->match.size(); i++) {
1377  results->push_back(adapt_results->match[i]);
1378  }
1380  }
1381  delete [] char_norm_array;
1382  delete adapt_results;
1383  return num_features;
1384 } /* CharNormTrainingSample */
1385 
1386 
1387 /*---------------------------------------------------------------------------*/
1400  float rating = results->BlobLength / matcher_avg_noise_size;
1401  rating *= rating;
1402  rating /= 1.0 + rating;
1403 
1404  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
1405 } /* ClassifyAsNoise */
1406 
1413 void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
1414  ADAPT_RESULTS *Results,
1415  BLOB_CHOICE_LIST *Choices) {
1416  assert(Choices != nullptr);
1417  float Rating;
1418  float Certainty;
1419  BLOB_CHOICE_IT temp_it;
1420  bool contains_nonfrag = false;
1421  temp_it.set_to_list(Choices);
1422  int choices_length = 0;
1423  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1424  // number of returned results, but with a shape_table_ we want to have room
1425  // for at least the biggest shape (which might contain hundreds of Indic
1426  // grapheme fragments) and more, so use double the size of the biggest shape
1427  // if that is more than the default.
1428  int max_matches = MAX_MATCHES;
1429  if (shape_table_ != nullptr) {
1430  max_matches = shape_table_->MaxNumUnichars() * 2;
1431  if (max_matches < MAX_MATCHES)
1432  max_matches = MAX_MATCHES;
1433  }
1434 
1435  float best_certainty = -FLT_MAX;
1436  for (int i = 0; i < Results->match.size(); i++) {
1437  const UnicharRating& result = Results->match[i];
1438  bool adapted = result.adapted;
1439  bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != nullptr);
1440  if (temp_it.length()+1 == max_matches &&
1441  !contains_nonfrag && current_is_frag) {
1442  continue; // look for a non-fragmented character to fill the
1443  // last spot in Choices if only fragments are present
1444  }
1445  // BlobLength can never be legally 0, this means recognition failed.
1446  // But we must return a classification result because some invoking
1447  // functions (chopper/permuter) do not anticipate a null blob choice.
1448  // So we need to assign a poor, but not infinitely bad score.
1449  if (Results->BlobLength == 0) {
1450  Certainty = -20;
1451  Rating = 100; // should be -certainty * real_blob_length
1452  } else {
1453  Rating = Certainty = (1.0f - result.rating);
1454  Rating *= rating_scale * Results->BlobLength;
1455  Certainty *= -(getDict().certainty_scale);
1456  }
1457  // Adapted results, by their very nature, should have good certainty.
1458  // Those that don't are at best misleading, and often lead to errors,
1459  // so don't accept adapted results that are too far behind the best result,
1460  // whether adapted or static.
1461  // TODO(rays) find some way of automatically tuning these constants.
1462  if (Certainty > best_certainty) {
1463  best_certainty = std::min(Certainty, static_cast<float>(classify_adapted_pruning_threshold));
1464  } else if (adapted &&
1465  Certainty / classify_adapted_pruning_factor < best_certainty) {
1466  continue; // Don't accept bad adapted results.
1467  }
1468 
1469  float min_xheight, max_xheight, yshift;
1470  denorm.XHeightRange(result.unichar_id, unicharset, box,
1471  &min_xheight, &max_xheight, &yshift);
1472  auto* choice =
1473  new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
1475  min_xheight, max_xheight, yshift,
1476  adapted ? BCC_ADAPTED_CLASSIFIER
1478  choice->set_fonts(result.fonts);
1479  temp_it.add_to_end(choice);
1480  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1481  choices_length++;
1482  if (choices_length >= max_matches) break;
1483  }
1484  Results->match.truncate(choices_length);
1485 } // ConvertMatchesToChoices
1486 
1487 
1488 /*---------------------------------------------------------------------------*/
1489 #ifndef GRAPHICS_DISABLED
1490 
1498  ADAPT_RESULTS *Results) {
1499  if (static_classifier_ == nullptr) return;
1500  INT_FX_RESULT_STRUCT fx_info;
1503  BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1504  if (sample == nullptr) return;
1505  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
1506  Results->best_unichar_id);
1507 } /* DebugAdaptiveClassifier */
1508 #endif
1509 
1510 /*---------------------------------------------------------------------------*/
1531  UNICHAR_ID *Ambiguities;
1532 
1533  INT_FX_RESULT_STRUCT fx_info;
1537  &bl_features);
1538  if (sample == nullptr) return;
1539 
1540  // TODO: With LSTM, static_classifier_ is nullptr.
1541  // Return to avoid crash in CharNormClassifier.
1542  if (static_classifier_ == nullptr) {
1543  delete sample;
1544  return;
1545  }
1546 
1548  tess_cn_matching) {
1549  CharNormClassifier(Blob, *sample, Results);
1550  } else {
1551  Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
1552  AdaptedTemplates, Results);
1553  if ((!Results->match.empty() &&
1554  MarginalMatch(Results->best_rating,
1556  !tess_bn_matching) ||
1557  Results->match.empty()) {
1558  CharNormClassifier(Blob, *sample, Results);
1559  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1560  AmbigClassifier(bl_features, fx_info, Blob,
1563  Ambiguities,
1564  Results);
1565  }
1566  }
1567 
1568  // Force the blob to be classified as noise
1569  // if the results contain only fragments.
1570  // TODO(daria): verify that this is better than
1571  // just adding a nullptr classification.
1572  if (!Results->HasNonfragment || Results->match.empty())
1573  ClassifyAsNoise(Results);
1574  delete sample;
1575 } /* DoAdaptiveMatch */
1576 
1577 /*---------------------------------------------------------------------------*/
1593  CLASS_ID CorrectClass) {
1594  auto *Results = new ADAPT_RESULTS();
1595  UNICHAR_ID *Ambiguities;
1596  int i;
1597 
1598  Results->Initialize();
1599  INT_FX_RESULT_STRUCT fx_info;
1603  &bl_features);
1604  if (sample == nullptr) {
1605  delete Results;
1606  return nullptr;
1607  }
1608 
1609  CharNormClassifier(Blob, *sample, Results);
1610  delete sample;
1611  RemoveBadMatches(Results);
1612  Results->match.sort(&UnicharRating::SortDescendingRating);
1613 
1614  /* copy the class id's into an string of ambiguities - don't copy if
1615  the correct class is the only class id matched */
1616  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
1617  if (Results->match.size() > 1 ||
1618  (Results->match.size() == 1 &&
1619  Results->match[0].unichar_id != CorrectClass)) {
1620  for (i = 0; i < Results->match.size(); i++)
1621  Ambiguities[i] = Results->match[i].unichar_id;
1622  Ambiguities[i] = -1;
1623  } else {
1624  Ambiguities[0] = -1;
1625  }
1626 
1627  delete Results;
1628  return Ambiguities;
1629 } /* GetAmbiguities */
1630 
1631 // Returns true if the given blob looks too dissimilar to any character
1632 // present in the classifier templates.
1634  auto *ratings = new BLOB_CHOICE_LIST();
1635  AdaptiveClassifier(blob, ratings);
1636  BLOB_CHOICE_IT ratings_it(ratings);
1639  print_ratings_list("======================\nLooksLikeGarbage() got ",
1640  ratings, unicharset);
1641  }
1642  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1643  ratings_it.forward()) {
1644  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != nullptr) {
1645  continue;
1646  }
1647  float certainty = ratings_it.data()->certainty();
1648  delete ratings;
1649  return certainty <
1651  }
1652  delete ratings;
1653  return true; // no whole characters in ratings
1654 }
1655 
1656 /*---------------------------------------------------------------------------*/
1679  INT_TEMPLATES templates,
1680  uint8_t* pruner_norm_array,
1681  uint8_t* char_norm_array) {
1682  FEATURE norm_feature = NewFeature(&CharNormDesc);
1683  float baseline = kBlnBaselineOffset;
1684  float scale = MF_SCALE_FACTOR;
1685  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
1686  norm_feature->Params[CharNormLength] =
1687  fx_info.Length * scale / LENGTH_COMPRESSION;
1688  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
1689  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
1690  // Deletes norm_feature.
1691  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
1692  pruner_norm_array);
1693  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
1694 } /* GetCharNormFeature */
1695 
1696 // Computes the char_norm_array for the unicharset and, if not nullptr, the
1697 // pruner_array as appropriate according to the existence of the shape_table.
1699  INT_TEMPLATES_STRUCT* templates,
1700  uint8_t* char_norm_array,
1701  uint8_t* pruner_array) {
1702  ComputeIntCharNormArray(*norm_feature, char_norm_array);
1703  if (pruner_array != nullptr) {
1704  if (shape_table_ == nullptr) {
1705  ComputeIntCharNormArray(*norm_feature, pruner_array);
1706  } else {
1707  memset(pruner_array, UINT8_MAX,
1708  templates->NumClasses * sizeof(pruner_array[0]));
1709  // Each entry in the pruner norm array is the MIN of all the entries of
1710  // the corresponding unichars in the CharNormArray.
1711  for (int id = 0; id < templates->NumClasses; ++id) {
1712  int font_set_id = templates->Class[id]->font_set_id;
1713  const FontSet &fs = fontset_table_.get(font_set_id);
1714  for (int config = 0; config < fs.size; ++config) {
1715  const Shape& shape = shape_table_->GetShape(fs.configs[config]);
1716  for (int c = 0; c < shape.size(); ++c) {
1717  if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
1718  pruner_array[id] = char_norm_array[shape[c].unichar_id];
1719  }
1720  }
1721  }
1722  }
1723  }
1724  FreeFeature(norm_feature);
1725 }
1726 
1727 /*---------------------------------------------------------------------------*/
1741  CLASS_ID ClassId,
1742  int FontinfoId,
1743  int NumFeatures,
1744  INT_FEATURE_ARRAY Features,
1745  FEATURE_SET FloatFeatures) {
1746  INT_CLASS IClass;
1747  ADAPT_CLASS Class;
1748  PROTO_ID OldProtos[MAX_NUM_PROTOS];
1749  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
1750  int NumOldProtos;
1751  int NumBadFeatures;
1752  int MaxProtoId, OldMaxProtoId;
1753  int MaskSize;
1754  int ConfigId;
1756  int i;
1757  int debug_level = NO_DEBUG;
1758 
1760  debug_level =
1762 
1763  IClass = ClassForClassId(Templates->Templates, ClassId);
1764  Class = Templates->Class[ClassId];
1765 
1766  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
1767  ++NumAdaptationsFailed;
1769  cprintf("Cannot make new temporary config: maximum number exceeded.\n");
1770  return -1;
1771  }
1772 
1773  OldMaxProtoId = IClass->NumProtos - 1;
1774 
1775  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
1776  NumFeatures, Features,
1777  OldProtos, classify_adapt_proto_threshold,
1778  debug_level);
1779 
1780  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
1781  zero_all_bits(TempProtoMask, MaskSize);
1782  for (i = 0; i < NumOldProtos; i++)
1783  SET_BIT(TempProtoMask, OldProtos[i]);
1784 
1785  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
1786  NumFeatures, Features,
1787  BadFeatures,
1789  debug_level);
1790 
1791  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
1792  IClass, Class, TempProtoMask);
1793  if (MaxProtoId == NO_PROTO) {
1794  ++NumAdaptationsFailed;
1796  cprintf("Cannot make new temp protos: maximum number exceeded.\n");
1797  return -1;
1798  }
1799 
1800  ConfigId = AddIntConfig(IClass);
1801  ConvertConfig(TempProtoMask, ConfigId, IClass);
1802  Config = NewTempConfig(MaxProtoId, FontinfoId);
1803  TempConfigFor(Class, ConfigId) = Config;
1804  copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize);
1805 
1807  cprintf("Making new temp config %d fontinfo id %d"
1808  " using %d old and %d new protos.\n",
1809  ConfigId, Config->FontinfoId,
1810  NumOldProtos, MaxProtoId - OldMaxProtoId);
1811 
1812  return ConfigId;
1813 } /* MakeNewTemporaryConfig */
1814 
1815 /*---------------------------------------------------------------------------*/
1835  int NumBadFeat,
1836  FEATURE_ID BadFeat[],
1837  INT_CLASS IClass,
1838  ADAPT_CLASS Class,
1839  BIT_VECTOR TempProtoMask) {
1840  FEATURE_ID *ProtoStart;
1841  FEATURE_ID *ProtoEnd;
1842  FEATURE_ID *LastBad;
1843  TEMP_PROTO TempProto;
1844  PROTO Proto;
1845  FEATURE F1, F2;
1846  float X1, X2, Y1, Y2;
1847  float A1, A2, AngleDelta;
1848  float SegmentLength;
1849  PROTO_ID Pid;
1850 
1851  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1852  ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1853  F1 = Features->Features[*ProtoStart];
1854  X1 = F1->Params[PicoFeatX];
1855  Y1 = F1->Params[PicoFeatY];
1856  A1 = F1->Params[PicoFeatDir];
1857 
1858  for (ProtoEnd = ProtoStart + 1,
1859  SegmentLength = GetPicoFeatureLength();
1860  ProtoEnd < LastBad;
1861  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
1862  F2 = Features->Features[*ProtoEnd];
1863  X2 = F2->Params[PicoFeatX];
1864  Y2 = F2->Params[PicoFeatY];
1865  A2 = F2->Params[PicoFeatDir];
1866 
1867  AngleDelta = fabs(A1 - A2);
1868  if (AngleDelta > 0.5)
1869  AngleDelta = 1.0 - AngleDelta;
1870 
1871  if (AngleDelta > matcher_clustering_max_angle_delta ||
1872  fabs(X1 - X2) > SegmentLength ||
1873  fabs(Y1 - Y2) > SegmentLength)
1874  break;
1875  }
1876 
1877  F2 = Features->Features[*(ProtoEnd - 1)];
1878  X2 = F2->Params[PicoFeatX];
1879  Y2 = F2->Params[PicoFeatY];
1880  A2 = F2->Params[PicoFeatDir];
1881 
1882  Pid = AddIntProto(IClass);
1883  if (Pid == NO_PROTO)
1884  return (NO_PROTO);
1885 
1886  TempProto = NewTempProto();
1887  Proto = &(TempProto->Proto);
1888 
1889  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
1890  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
1891  instead of the -0.25 to 0.75 used in baseline normalization */
1892  Proto->Length = SegmentLength;
1893  Proto->Angle = A1;
1894  Proto->X = (X1 + X2) / 2.0;
1895  Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
1896  FillABC(Proto);
1897 
1898  TempProto->ProtoId = Pid;
1899  SET_BIT(TempProtoMask, Pid);
1900 
1901  ConvertProto(Proto, Pid, IClass);
1902  AddProtoToProtoPruner(Proto, Pid, IClass,
1904 
1905  Class->TempProtos = push(Class->TempProtos, TempProto);
1906  }
1907  return IClass->NumProtos - 1;
1908 } /* MakeNewTempProtos */
1909 
1910 /*---------------------------------------------------------------------------*/
1921  CLASS_ID ClassId,
1922  int ConfigId,
1923  TBLOB *Blob) {
1924  UNICHAR_ID *Ambigs;
1926  ADAPT_CLASS Class;
1927  PROTO_KEY ProtoKey;
1928 
1929  Class = Templates->Class[ClassId];
1930  Config = TempConfigFor(Class, ConfigId);
1931 
1932  MakeConfigPermanent(Class, ConfigId);
1933  if (Class->NumPermConfigs == 0)
1934  Templates->NumPermClasses++;
1935  Class->NumPermConfigs++;
1936 
1937  // Initialize permanent config.
1938  Ambigs = GetAmbiguities(Blob, ClassId);
1939  auto Perm = static_cast<PERM_CONFIG>(malloc(sizeof(PERM_CONFIG_STRUCT)));
1940  Perm->Ambigs = Ambigs;
1941  Perm->FontinfoId = Config->FontinfoId;
1942 
1943  // Free memory associated with temporary config (since ADAPTED_CONFIG
1944  // is a union we need to clean up before we record permanent config).
1945  ProtoKey.Templates = Templates;
1946  ProtoKey.ClassId = ClassId;
1947  ProtoKey.ConfigId = ConfigId;
1948  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
1950 
1951  // Record permanent config.
1952  PermConfigFor(Class, ConfigId) = Perm;
1953 
1954  if (classify_learning_debug_level >= 1) {
1955  tprintf("Making config %d for %s (ClassId %d) permanent:"
1956  " fontinfo id %d, ambiguities '",
1957  ConfigId, getDict().getUnicharset().debug_str(ClassId).c_str(),
1958  ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
1959  for (UNICHAR_ID *AmbigsPointer = Ambigs;
1960  *AmbigsPointer >= 0; ++AmbigsPointer)
1961  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
1962  tprintf("'.\n");
1963  }
1964 } /* MakePermanent */
1965 } // namespace tesseract
1966 
1967 /*---------------------------------------------------------------------------*/
1980 int MakeTempProtoPerm(void *item1, void *item2) {
1981  ADAPT_CLASS Class;
1983  TEMP_PROTO TempProto;
1984  PROTO_KEY *ProtoKey;
1985 
1986  TempProto = static_cast<TEMP_PROTO>(item1);
1987  ProtoKey = static_cast<PROTO_KEY *>(item2);
1988 
1989  Class = ProtoKey->Templates->Class[ProtoKey->ClassId];
1990  Config = TempConfigFor(Class, ProtoKey->ConfigId);
1991 
1992  if (TempProto->ProtoId > Config->MaxProtoId ||
1993  !test_bit (Config->Protos, TempProto->ProtoId))
1994  return false;
1995 
1996  MakeProtoPermanent(Class, TempProto->ProtoId);
1997  AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId,
1998  ProtoKey->Templates->Templates);
1999  FreeTempProto(TempProto);
2000 
2001  return true;
2002 } /* MakeTempProtoPerm */
2003 
2004 /*---------------------------------------------------------------------------*/
2005 namespace tesseract {
2014  for (int i = 0; i < results.match.size(); ++i) {
2015  tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).c_str());
2016  results.match[i].Print();
2017  }
2018 } /* PrintAdaptiveMatchResults */
2019 
2020 /*---------------------------------------------------------------------------*/
2034  int Next, NextGood;
2035  float BadMatchThreshold;
2036  static const char* romans = "i v x I V X";
2037  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
2038 
2040  UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2041  unicharset.unichar_to_id("1") : -1;
2042  UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2043  unicharset.unichar_to_id("0") : -1;
2044  float scored_one = ScoredUnichar(unichar_id_one, *Results);
2045  float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2046 
2047  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2048  const UnicharRating& match = Results->match[Next];
2049  if (match.rating >= BadMatchThreshold) {
2050  if (!unicharset.get_isalpha(match.unichar_id) ||
2051  strstr(romans,
2052  unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
2053  } else if (unicharset.eq(match.unichar_id, "l") &&
2054  scored_one < BadMatchThreshold) {
2055  Results->match[Next].unichar_id = unichar_id_one;
2056  } else if (unicharset.eq(match.unichar_id, "O") &&
2057  scored_zero < BadMatchThreshold) {
2058  Results->match[Next].unichar_id = unichar_id_zero;
2059  } else {
2060  Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
2061  }
2062  if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
2063  if (NextGood == Next) {
2064  ++NextGood;
2065  } else {
2066  Results->match[NextGood++] = Results->match[Next];
2067  }
2068  }
2069  }
2070  }
2071  } else {
2072  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2073  if (Results->match[Next].rating >= BadMatchThreshold) {
2074  if (NextGood == Next) {
2075  ++NextGood;
2076  } else {
2077  Results->match[NextGood++] = Results->match[Next];
2078  }
2079  }
2080  }
2081  }
2082  Results->match.truncate(NextGood);
2083 } /* RemoveBadMatches */
2084 
2085 /*----------------------------------------------------------------------------*/
2094  int Next, NextGood;
2095  int punc_count; /*no of garbage characters */
2096  int digit_count;
2097  /*garbage characters */
2098  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2099  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2100 
2101  punc_count = 0;
2102  digit_count = 0;
2103  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2104  const UnicharRating& match = Results->match[Next];
2105  bool keep = true;
2106  if (strstr(punc_chars,
2107  unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
2108  if (punc_count >= 2)
2109  keep = false;
2110  punc_count++;
2111  } else {
2112  if (strstr(digit_chars,
2113  unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
2114  if (digit_count >= 1)
2115  keep = false;
2116  digit_count++;
2117  }
2118  }
2119  if (keep) {
2120  if (NextGood == Next) {
2121  ++NextGood;
2122  } else {
2123  Results->match[NextGood++] = match;
2124  }
2125  }
2126  }
2127  Results->match.truncate(NextGood);
2128 } /* RemoveExtraPuncs */
2129 
2130 /*---------------------------------------------------------------------------*/
2141 void Classify::SetAdaptiveThreshold(float Threshold) {
2142  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2144  ClipToRange<int>(255 * Threshold, 0, 255));
2146  ClipToRange<int>(255 * Threshold, 0, 255));
2147 } /* SetAdaptiveThreshold */
2148 
2149 /*---------------------------------------------------------------------------*/
2159 void Classify::ShowBestMatchFor(int shape_id,
2160  const INT_FEATURE_STRUCT* features,
2161  int num_features) {
2162 #ifndef GRAPHICS_DISABLED
2163  uint32_t config_mask;
2164  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2165  tprintf("No built-in templates for class/shape %d\n", shape_id);
2166  return;
2167  }
2168  if (num_features <= 0) {
2169  tprintf("Illegal blob (char norm features)!\n");
2170  return;
2171  }
2172  UnicharRating cn_result;
2173  classify_norm_method.set_value(character);
2176  num_features, features, &cn_result,
2179  tprintf("\n");
2180  config_mask = 1 << cn_result.config;
2181 
2182  tprintf("Static Shape ID: %d\n", shape_id);
2183  ShowMatchDisplay();
2185  &config_mask, num_features, features, &cn_result,
2189 #endif // GRAPHICS_DISABLED
2190 } /* ShowBestMatchFor */
2191 
2192 // Returns a string for the classifier class_id: either the corresponding
2193 // unicharset debug_str or the shape_table_ debug str.
2195  int class_id, int config_id) const {
2196  STRING class_string;
2197  if (templates == PreTrainedTemplates && shape_table_ != nullptr) {
2198  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2199  class_string = shape_table_->DebugStr(shape_id);
2200  } else {
2201  class_string = unicharset.debug_str(class_id);
2202  }
2203  return class_string;
2204 }
2205 
2206 // Converts a classifier class_id index to a shape_table_ index
2208  int int_result_config) const {
2209  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2210  // Older inttemps have no font_ids.
2211  if (font_set_id < 0)
2212  return kBlankFontinfoId;
2213  const FontSet &fs = fontset_table_.get(font_set_id);
2214  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2215  return fs.configs[int_result_config];
2216 }
2217 
2218 // Converts a shape_table_ index to a classifier class_id index (not a
2219 // unichar-id!). Uses a search, so not fast.
2220 int Classify::ShapeIDToClassID(int shape_id) const {
2221  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2222  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2223  ASSERT_HOST(font_set_id >= 0);
2224  const FontSet &fs = fontset_table_.get(font_set_id);
2225  for (int config = 0; config < fs.size; ++config) {
2226  if (fs.configs[config] == shape_id)
2227  return id;
2228  }
2229  }
2230  tprintf("Shape %d not found\n", shape_id);
2231  return -1;
2232 }
2233 
2234 // Returns true if the given TEMP_CONFIG is good enough to make it
2235 // a permanent config.
2237  const TEMP_CONFIG &config) {
2238  if (classify_learning_debug_level >= 1) {
2239  tprintf("NumTimesSeen for config of %s is %d\n",
2240  getDict().getUnicharset().debug_str(class_id).c_str(),
2241  config->NumTimesSeen);
2242  }
2244  return true;
2245  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2246  return false;
2247  } else if (use_ambigs_for_adaption) {
2248  // Go through the ambigs vector and see whether we have already seen
2249  // enough times all the characters represented by the ambigs vector.
2250  const UnicharIdVector *ambigs =
2252  int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size();
2253  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2254  ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2255  assert(ambig_class != nullptr);
2256  if (ambig_class->NumPermConfigs == 0 &&
2257  ambig_class->MaxNumTimesSeen <
2259  if (classify_learning_debug_level >= 1) {
2260  tprintf("Ambig %s has not been seen enough times,"
2261  " not making config for %s permanent\n",
2262  getDict().getUnicharset().debug_str(
2263  (*ambigs)[ambig]).c_str(),
2264  getDict().getUnicharset().debug_str(class_id).c_str());
2265  }
2266  return false;
2267  }
2268  }
2269  }
2270  return true;
2271 }
2272 
2274  const UnicharIdVector *ambigs =
2276  int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size();
2277  if (classify_learning_debug_level >= 1) {
2278  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2279  getDict().getUnicharset().debug_str(class_id).c_str(), class_id);
2280  }
2281  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2282  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2283  const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2284  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2285  if (ConfigIsPermanent(ambigs_class, cfg)) continue;
2286  const TEMP_CONFIG config =
2287  TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2288  if (config != nullptr && TempConfigReliable(ambig_class_id, config)) {
2289  if (classify_learning_debug_level >= 1) {
2290  tprintf("Making config %d of %s permanent\n", cfg,
2291  getDict().getUnicharset().debug_str(
2292  ambig_class_id).c_str());
2293  }
2294  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
2295  }
2296  }
2297  }
2298 }
2299 
2300 } // namespace tesseract
IntegerMatcher::Match
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:510
tesseract::Classify::tessedit_class_miss_scale
double tessedit_class_miss_scale
Definition: classify.h:475
INT_TEMPLATES_STRUCT
Definition: intproto.h:117
tesseract::Classify::AllProtosOn
BIT_VECTOR AllProtosOn
Definition: classify.h:522
INT_CLASS_STRUCT::font_set_id
int font_set_id
Definition: intproto.h:111
tesseract::Classify::BaselineClassifier
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:1265
TBLOB::ClassifyNormalizeIfNeeded
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:345
PROTO_STRUCT::Length
float Length
Definition: protos.h:41
tesseract::Classify::classify_enable_adaptive_matcher
bool classify_enable_adaptive_matcher
Definition: classify.h:445
tesseract::ShapeClassifier::DebugDisplay
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
Definition: shapeclassifier.cpp:96
ScrollView
Definition: scrollview.h:97
WERD_RES::ComputeAdaptionThresholds
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
Definition: pageres.cpp:557
strngs.h
picofeat.h
normalis.h
tesseract::Classify::ClearCharNormArray
void ClearCharNormArray(uint8_t *char_norm_array)
Definition: float2int.cpp:44
CLASS_ID
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:33
intfx.h
SetAdaptiveThreshold
void SetAdaptiveThreshold(float Threshold)
tesseract::CCUtil::use_ambigs_for_adaption
bool use_ambigs_for_adaption
Definition: ccutil.h:73
TEMP_PROTO_STRUCT::ProtoId
uint16_t ProtoId
Definition: adaptive.h:41
pageres.h
ADAPT_CLASS_STRUCT::MaxNumTimesSeen
uint8_t MaxNumTimesSeen
Definition: adaptive.h:56
tesseract::Classify::matcher_clustering_max_angle_delta
double matcher_clustering_max_angle_delta
Definition: classify.h:468
tesseract::Classify::ExpandShapesAndApplyCorrections
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
Definition: adaptmatch.cpp:1128
ADAPT_CLASS_STRUCT::PermConfigs
BIT_VECTOR PermConfigs
Definition: adaptive.h:59
dict.h
tesseract::Classify::NormProtos
NORM_PROTOS * NormProtos
Definition: classify.h:527
tesseract::Classify::classify_norm_method
int classify_norm_method
Definition: classify.h:434
tesseract::Classify::matcher_debug_level
int matcher_debug_level
Definition: classify.h:453
kBlnXHeight
const int kBlnXHeight
Definition: normalis.h:23
tesseract::BlobToTrainingSample
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:75
WERD_RES::PiecesAllNatural
bool PiecesAllNatural(int start, int count) const
Definition: pageres.cpp:1074
unicity_table.h
tesseract::Shape
Definition: shapetable.h:184
TempConfigFor
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:90
ADAPT_RESULTS
Definition: adaptmatch.cpp:91
tesseract::TessdataManager
Definition: tessdatamanager.h:126
tesseract::Classify::prioritize_division
bool prioritize_division
Definition: classify.h:428
PROTO_KEY::ConfigId
int ConfigId
Definition: adaptmatch.cpp:125
WERD_RES::AlternativeChoiceAdjustmentsWorseThan
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
Definition: pageres.cpp:435
UNICHARSET::get_isdigit
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:502
WERD_RES::rebuild_word
TWERD * rebuild_word
Definition: pageres.h:260
tesseract::Classify::fontinfo_table_
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:529
UNICHARSET::get_isalpha
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:481
tesseract::UnicharRating
Definition: shapetable.h:40
ADAPT_RESULTS::ComputeBest
void ComputeBest()
Definition: adaptmatch.cpp:108
tesseract::UnicharRating::unichar_id
UNICHAR_ID unichar_id
Definition: shapetable.h:74
PERM_CONFIG_STRUCT
Definition: adaptive.h:43
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::Classify::classify_adapt_proto_threshold
int classify_adapt_proto_threshold
Definition: classify.h:481
ADAPT_RESULTS::best_rating
float best_rating
Definition: adaptmatch.cpp:96
INT_CLASS_STRUCT
Definition: intproto.h:104
IntegerMatcher::ApplyCNCorrection
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
Definition: intmatcher.cpp:1223
tesseract::Classify::MasterMatcher
void MasterMatcher(INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
Definition: adaptmatch.cpp:1088
tesseract::Classify::MakeNewTemporaryConfig
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
Definition: adaptmatch.cpp:1740
NO_DEBUG
#define NO_DEBUG
Definition: adaptmatch.cpp:79
tesseract::Classify::classify_enable_learning
bool classify_enable_learning
Definition: classify.h:429
tesseract::UnicharRating::fonts
GenericVector< ScoredFont > fonts
Definition: shapetable.h:87
TBLOB::denorm
const DENORM & denorm() const
Definition: blobs.h:361
PROTO_STRUCT
Definition: protos.h:34
baseline
Definition: mfoutline.h:62
INT_FX_RESULT_STRUCT
Definition: intfx.h:34
tesseract::Dict::getUnicharAmbigs
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:108
tesseract::Classify::EnableLearning
bool EnableLearning
Definition: classify.h:577
tesseract::Classify::CharNormClassifier
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
Definition: adaptmatch.cpp:1311
tesseract::Classify::classify_enable_adaptive_debugger
bool classify_enable_adaptive_debugger
Definition: classify.h:450
mfoutline.h
tesseract::Classify::EndAdaptiveClassifier
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:459
params.h
TBLOB::plot
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:508
tesseract::Classify::TempProtoMask
BIT_VECTOR TempProtoMask
Definition: classify.h:525
outfeat.h
PROTO_ID
int16_t PROTO_ID
Definition: matchdefs.h:39
kStandardFeatureLength
const double kStandardFeatureLength
Definition: intfx.h:45
tesseract::Classify::AdaptableWord
bool AdaptableWord(WERD_RES *word)
Definition: adaptmatch.cpp:821
OutlineFeatLength
Definition: outfeat.h:44
tesseract::Classify::matcher_good_threshold
double matcher_good_threshold
Definition: classify.h:456
tesseract::Classify::classify_adapted_pruning_threshold
double classify_adapted_pruning_threshold
Definition: classify.h:479
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::Classify::matcher_permanent_classes_min
int matcher_permanent_classes_min
Definition: classify.h:462
tesseract::TessClassifier
Definition: tessclassifier.h:36
Config
CLUSTERCONFIG Config
Definition: commontraining.cpp:88
STRING
Definition: strngs.h:45
test_bit
#define test_bit(array, bit)
Definition: bitvec.h:58
tesseract::Classify::PrintAdaptedTemplates
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:244
BCC_ADAPTED_CLASSIFIER
Definition: ratngs.h:43
tesseract::UnicharAmbigs::ReverseAmbigsForAdaption
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:192
ScrollView::BROWN
Definition: scrollview.h:120
FEATURE_STRUCT
Definition: ocrfeatures.h:58
WERD_RES::fontinfo
const FontInfo * fontinfo
Definition: pageres.h:303
WERD_RES
Definition: pageres.h:160
tesseract::Classify::UpdateAmbigsGroup
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
Definition: adaptmatch.cpp:2273
cprintf
void cprintf(const char *format,...)
Definition: callcpp.cpp:32
tesseract::Dict::EndDangerousAmbigs
void EndDangerousAmbigs()
Definition: stopper.cpp:374
ADAPT_TEMPLATES_STRUCT
Definition: adaptive.h:65
tesseract::Classify::ConvertProto
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:487
tesseract::CCUtil::imagefile
STRING imagefile
Definition: ccutil.h:61
PicoFeatY
Definition: picofeat.h:43
tesseract::Classify::ConvertMatchesToChoices
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:1413
tesseract::Classify::LearnWord
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:250
tesseract::Classify::classify_learning_debug_level
int classify_learning_debug_level
Definition: classify.h:455
IntCastRounded
int IntCastRounded(double x)
Definition: helpers.h:173
MF_SCALE_FACTOR
const float MF_SCALE_FACTOR
Definition: mfoutline.h:70
tesseract::Dict::SettupStopperPass1
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:378
tesseract::Classify::SetupBLCNDenorms
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:127
WERD_RES::best_state
GenericVector< int > best_state
Definition: pageres.h:279
tesseract::Classify::LooksLikeGarbage
bool LooksLikeGarbage(TBLOB *blob)
Definition: adaptmatch.cpp:1633
INT_CLASS_STRUCT::NumProtos
uint16_t NumProtos
Definition: intproto.h:105
tesseract::Classify::RemoveBadMatches
void RemoveBadMatches(ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:2033
ADAPT_TEMPLATES_STRUCT::NumNonEmptyClasses
int NumNonEmptyClasses
Definition: adaptive.h:67
UNICHARSET::eq
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:686
PRINT_FEATURE_MATCHES
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:190
OutlineFeatDir
Definition: outfeat.h:45
rect.h
tesseract::CCUtil::language_data_path_prefix
STRING language_data_path_prefix
Definition: ccutil.h:56
tesseract::Classify::ComputeIntFeatures
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:90
tesseract::Classify::InitAdaptedClass
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:693
ADAPT_TEMPLATES_STRUCT::NumPermClasses
uint8_t NumPermClasses
Definition: adaptive.h:68
tesseract::Classify::PrintAdaptiveMatchResults
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
Definition: adaptmatch.cpp:2013
blobs.h
tesseract::ShapeClassifier::UnicharClassifySample
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
Definition: shapeclassifier.cpp:39
PicoFeatX
Definition: picofeat.h:43
oldlist.h
ScrollView::BLUE
Definition: scrollview.h:108
IntegerMatcher::FindGoodProtos
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:588
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
tesseract::ShapeTable::DeSerialize
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:246
InitIntegerFX
void InitIntegerFX()
Definition: intfx.cpp:48
tesseract::Classify::SetAdaptiveThreshold
void SetAdaptiveThreshold(float Threshold)
Definition: adaptmatch.cpp:2141
tesseract::TFile::Open
bool Open(const STRING &filename, FileReader reader)
Definition: serialis.cpp:210
PicoFeatDir
Definition: picofeat.h:43
tesseract::Classify::matcher_reliable_adaptive_result
double matcher_reliable_adaptive_result
Definition: classify.h:457
UNICHARSET::get_script
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:653
tesseract::UnicharAmbigs::AmbigsForAdaption
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:183
ratngs.h
AddIntProto
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:281
tesseract::TESSDATA_SHAPE_TABLE
Definition: tessdatamanager.h:70
tesseract::Classify::DisplayAdaptedChar
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:946
ADAPT_CLASS_STRUCT::NumPermConfigs
uint8_t NumPermConfigs
Definition: adaptive.h:55
tesseract::Classify::ClassifyAsNoise
void ClassifyAsNoise(ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:1399
tesseract::Classify::getDict
virtual Dict & getDict()
Definition: classify.h:107
ADAPT_RESULTS::Initialize
void Initialize()
Definition: adaptmatch.cpp:102
tesseract::Classify::matcher_avg_noise_size
double matcher_avg_noise_size
Definition: classify.h:461
tesseract::UnicharRating::config
uint8_t config
Definition: shapetable.h:81
tesseract::TESSDATA_INTTEMP
Definition: tessdatamanager.h:60
AddProtoToProtoPruner
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:366
genericvector.h
Y_DIM_OFFSET
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:84
reset_bit
#define reset_bit(array, bit)
Definition: bitvec.h:56
MAX_MATCHES
#define MAX_MATCHES
Definition: adaptmatch.cpp:77
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
ADAPT_RESULTS::CPResults
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:98
free_adapted_templates
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:182
ADAPTABLE_WERD_ADJUSTMENT
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:82
ConvertConfig
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:462
tesseract::Classify::matcher_min_examples_for_prototyping
int matcher_min_examples_for_prototyping
Definition: classify.h:464
DENORM::XHeightRange
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
Definition: normalis.cpp:427
TEMP_CONFIG_STRUCT
Definition: adaptive.h:34
UNICHARSET::debug_str
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:342
INT_FX_RESULT_STRUCT::Ry
int16_t Ry
Definition: intfx.h:37
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
INT_FX_RESULT_STRUCT::Ymean
int16_t Ymean
Definition: intfx.h:36
INT_FX_RESULT_STRUCT::Rx
int16_t Rx
Definition: intfx.h:37
tesseract::Classify::ShowMatchDisplay
void ShowMatchDisplay()
Definition: intproto.cpp:962
tesseract::UnicharRating::rating
float rating
Definition: shapetable.h:77
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
unicharset.h
UNICHARSET::get_top_bottom
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:558
UNICHARSET::get_enabled
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:868
LegalClassId
#define LegalClassId(c)
Definition: intproto.h:175
tesseract::Classify::matcher_perfect_threshold
double matcher_perfect_threshold
Definition: classify.h:458
MAX_NUM_CONFIGS
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
MAX_NUM_PROTOS
#define MAX_NUM_PROTOS
Definition: intproto.h:47
ADAPT_TEMPLATE_SUFFIX
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:75
MakeConfigPermanent
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:84
tesseract::Classify::ReadAdaptedTemplates
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:332
tesseract::CharSegmentationType
CharSegmentationType
Definition: classify.h:96
delete_d
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:93
tesseract::Classify::shape_table_
ShapeTable * shape_table_
Definition: classify.h:546
tesseract::Classify::PruneClasses
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:451
CharNormLength
Definition: normfeat.h:29
NO_PROTO
#define NO_PROTO
Definition: matchdefs.h:40
ocrfeatures.h
tesseract::Classify::disable_character_fragments
bool disable_character_fragments
Definition: classify.h:486
PROTO_STRUCT::Y
float Y
Definition: protos.h:39
tesseract::Classify::MakePermanent
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
Definition: adaptmatch.cpp:1920
FreeFeature
void FreeFeature(FEATURE Feature)
Definition: ocrfeatures.cpp:53
FreeTempConfig
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:74
tesseract::Classify::ShapeIDToClassID
int ShapeIDToClassID(int shape_id) const
Definition: adaptmatch.cpp:2220
trainingsample.h
LENGTH_COMPRESSION
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
tesseract::Classify::BackupAdaptedTemplates
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:519
tesseract::Classify::classify_save_adapted_templates
bool classify_save_adapted_templates
Definition: classify.h:449
tesseract::Classify::classify_nonlinear_norm
bool classify_nonlinear_norm
Definition: classify.h:452
tesseract::TessdataManager::GetComponent
bool GetComponent(TessdataType type, TFile *fp)
Definition: tessdatamanager.cpp:216
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
normfeat.h
tesseract::Classify::StartBackupAdaptiveClassifier
void StartBackupAdaptiveClassifier()
Definition: adaptmatch.cpp:629
UNICHAR_SPACE
Definition: unicharset.h:34
tesseract::Classify::AdaptiveClassifier
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:191
shapetable.h
tesseract::Dict::segment_penalty_dict_case_ok
double segment_penalty_dict_case_ok
Definition: dict.h:605
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:457
tesseract::ScoredFont
Definition: fontinfo.h:38
IncreaseConfidence
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:94
tesseract::TFile
Definition: serialis.h:75
tesseract::Dict::certainty_scale
double certainty_scale
Definition: dict.h:627
MAX_NUM_INT_FEATURES
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:128
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
WERD_CHOICE::adjust_factor
float adjust_factor() const
Definition: ratngs.h:294
UNICHARSET
Definition: unicharset.h:145
tesseract::ShapeTable::DebugStr
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:281
PROTO_STRUCT::X
float X
Definition: protos.h:38
tesseract::Classify::AmbigClassifier
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
Definition: adaptmatch.cpp:1045
tesseract::Classify::ReadNormProtos
NORM_PROTOS * ReadNormProtos(TFile *fp)
Definition: normmatch.cpp:189
INT_TEMPLATES_STRUCT::NumClasses
int NumClasses
Definition: intproto.h:118
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::ShapeTable::GetShape
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:319
tesseract::Classify::tess_cn_matching
bool tess_cn_matching
Definition: classify.h:443
FEATURE_SET_STRUCT::Features
FEATURE Features[1]
Definition: ocrfeatures.h:67
OutlineFeatY
Definition: outfeat.h:43
tesseract::Classify::AdaptedTemplates
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:515
tesseract::Classify::matcher_sufficient_examples_for_prototyping
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:466
tesseract::Classify::classify_learn_debug_str
char * classify_learn_debug_str
Definition: classify.h:495
tesseract::Classify::ExtractOutlineFeatures
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:54
AddIntConfig
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:260
PROTO_STRUCT::Angle
float Angle
Definition: protos.h:40
SEAM::BreakPieces
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:186
tesseract::TESSDATA_PFFMTABLE
Definition: tessdatamanager.h:61
TEMP_PROTO_STRUCT
Definition: adaptive.h:26
character
Definition: mfoutline.h:62
adaptive.h
WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:206
BIT_VECTOR
uint32_t * BIT_VECTOR
Definition: bitvec.h:27
tesseract::Classify::AddLargeSpeckleTo
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:201
helpers.h
tesseract::Classify::InitAdaptiveClassifier
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527
float2int.h
tesseract
Definition: baseapi.h:65
WERD_CHOICE::debug_string
const STRING debug_string() const
Definition: ratngs.h:493
PROTO_KEY::Templates
ADAPT_TEMPLATES Templates
Definition: adaptmatch.cpp:123
fontinfo.h
ConfigIsPermanent
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:81
push
LIST push(LIST list, void *element)
Definition: oldlist.cpp:172
FillABC
void FillABC(PROTO Proto)
Definition: protos.cpp:105
FEATURE_STRUCT::Params
float Params[1]
Definition: ocrfeatures.h:60
tesseract::Classify::LearnPieces
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:374
tesseract::Classify::GetCharNormFeature
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
Definition: adaptmatch.cpp:1678
CHAR_FRAGMENT::to_string
STRING to_string() const
Definition: unicharset.h:79
tesseract::Classify::DoAdaptiveMatch
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:1530
tprintf.h
callcpp.h
tesseract::Classify::GetFontinfoId
int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId)
Definition: adaptive.cpp:173
NewFeature
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:77
WERD_RES::seam_array
GenericVector< SEAM * > seam_array
Definition: pageres.h:208
NewTempConfig
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:203
tesseract::Classify::ComputeCharNormArrays
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
Definition: adaptmatch.cpp:1698
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
INT_FEATURE_ARRAY
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:151
sample
Definition: cluster.h:31
tesseract::Classify::AllConfigsOff
BIT_VECTOR AllConfigsOff
Definition: classify.h:524
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
tessclassifier.h
GenericVector< UnicharRating >
FEATURE_SET_STRUCT
Definition: ocrfeatures.h:64
tesseract::Classify::im_
IntegerMatcher im_
Definition: classify.h:540
tesseract::Classify::classify_use_pre_adapted_templates
bool classify_use_pre_adapted_templates
Definition: classify.h:447
ADAPT_TEMPLATES_STRUCT::Class
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:69
shapeclassifier.h
tesseract::Classify::ExtractPicoFeatures
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:62
GeoTop
Definition: picofeat.h:37
tesseract::Classify::classify_adapt_feature_threshold
int classify_adapt_feature_threshold
Definition: classify.h:483
tesseract::Classify::ReadNewCutoffs
void ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs)
Definition: cutoffs.cpp:40
TEMP_CONFIG_STRUCT::NumTimesSeen
uint8_t NumTimesSeen
Definition: adaptive.h:35
PROTO_KEY::ClassId
CLASS_ID ClassId
Definition: adaptmatch.cpp:124
NewTempProto
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:228
UNLIKELY_NUM_FEAT
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:78
SET_BIT
#define SET_BIT(array, bit)
Definition: bitvec.h:54
tesseract::Classify::classify_adapted_pruning_factor
double classify_adapted_pruning_factor
Definition: classify.h:477
CharNormDesc
const FEATURE_DESC_STRUCT CharNormDesc
STRING::length
int32_t length() const
Definition: strngs.cpp:187
tesseract::UnicharRating::SortDescendingRating
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:55
WORST_POSSIBLE_RATING
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:86
INT_TEMPLATES_STRUCT::Class
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:120
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
tesseract::Classify::ComputeIntCharNormArray
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
Definition: float2int.cpp:62
INT_FEATURE_STRUCT
Definition: intproto.h:131
tesseract::Classify::LearnBlob
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:70
tesseract::Classify::FreeNormProtos
void FreeNormProtos()
Definition: normmatch.cpp:167
tesseract::Classify::classify_misfit_junk_penalty
double classify_misfit_junk_penalty
Definition: classify.h:471
protos.h
BLOB_CHOICE
Definition: ratngs.h:49
TBLOB
Definition: blobs.h:282
tesseract::Classify::tess_bn_matching
bool tess_bn_matching
Definition: classify.h:444
tesseract::TrainingSample
Definition: trainingsample.h:53
featdefs.h
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:132
tesseract::FontSet::size
int size
Definition: fontinfo.h:138
TBOX::left
int16_t left() const
Definition: rect.h:71
unichar.h
tesseract::Classify::CharNormTrainingSample
int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
Definition: adaptmatch.cpp:1329
tesseract::Classify::RemoveExtraPuncs
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:2093
tesseract::Classify::GetAdaptiveFeatures
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:786
UNICHARSET::contains_unichar
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670
ScrollView::GREEN
Definition: scrollview.h:106
ADAPT_RESULTS::best_match_index
int best_match_index
Definition: adaptmatch.cpp:95
MakeTempProtoPerm
int MakeTempProtoPerm(void *item1, void *item2)
Definition: adaptmatch.cpp:1980
MarginalMatch
bool MarginalMatch(float confidence, float matcher_great_threshold)
Definition: adaptmatch.cpp:131
tesseract::Shape::size
int size() const
Definition: shapetable.h:199
FreeFeatureSet
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:61
ADAPT_CLASS_STRUCT::PermProtos
BIT_VECTOR PermProtos
Definition: adaptive.h:58
GenericVector::clear
void clear()
Definition: genericvector.h:857
INT_FX_RESULT_STRUCT::Length
int32_t Length
Definition: intfx.h:35
tesseract::Classify::DebugAdaptiveClassifier
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
Definition: adaptmatch.cpp:1497
UpdateMatchDisplay
void UpdateMatchDisplay()
Definition: intproto.cpp:446
ADAPT_RESULTS::best_unichar_id
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:94
tesseract::Classify::AddNewResult
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
Definition: adaptmatch.cpp:994
print_ratings_list
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:835
GeoBottom
Definition: picofeat.h:36
TBOX::right
int16_t right() const
Definition: rect.h:78
PRINT_MATCH_SUMMARY
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:187
bitvec.h
WERD_RES::correct_text
GenericVector< STRING > correct_text
Definition: pageres.h:283
tesseract::Classify::ComputeCorrectedRating
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
Definition: adaptmatch.cpp:1202
tesseract::FontSet::configs
int * configs
Definition: fontinfo.h:139
BLOB_CHOICE::set_fonts
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:94
ADAPT_RESULTS::HasNonfragment
bool HasNonfragment
Definition: adaptmatch.cpp:93
TEMP_PROTO_STRUCT::Proto
PROTO_STRUCT Proto
Definition: adaptive.h:42
InitMatcherRatings
void InitMatcherRatings(float *Rating)
IntegerMatcher::FindBadFeatures
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:656
OutlineFeatX
Definition: outfeat.h:42
tesseract::Classify::TempConfigReliable
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
Definition: adaptmatch.cpp:2236
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::Classify::LargeSpeckle
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:224
tesseract::Dict::getUnicharset
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
tesseract::ShapeTable
Definition: shapetable.h:261
UNICHARSET::get_fragment
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:724
PROTO_KEY
Definition: adaptmatch.cpp:122
tesseract::Classify::ReadIntTemplates
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:717
errcode.h
tesseract::Classify::PreTrainedTemplates
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:514
free_int_templates
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:697
seam.h
ScrollView::Update
static void Update()
Definition: scrollview.cpp:708
tesseract::Classify::rating_scale
double rating_scale
Definition: classify.h:472
ADAPT_CLASS_STRUCT
Definition: adaptive.h:54
tesseract::Classify::matcher_debug_separate_windows
bool matcher_debug_separate_windows
Definition: classify.h:494
tesseract::File
Definition: fileio.h:55
serialis.h
TWERD::plot
void plot(ScrollView *window)
Definition: blobs.cpp:895
ADAPT_RESULTS::match
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:97
intproto.h
tesseract::Classify::SettupPass1
void SettupPass1()
Definition: adaptmatch.cpp:652
AddProtoToClassPruner
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
Definition: intproto.cpp:327
TWERD::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:859
tesseract::Classify::GetAmbiguities
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
Definition: adaptmatch.cpp:1592
FEATURE_ID
uint8_t FEATURE_ID
Definition: matchdefs.h:45
ambigs.h
tesseract::CST_WHOLE
Definition: classify.h:98
classify.h
SEAM::JoinPieces
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:208
tesseract::Classify::classify_integer_matcher_multiplier
int classify_integer_matcher_multiplier
Definition: classify.h:505
UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
tesseract::Classify::classify_debug_character_fragments
bool classify_debug_character_fragments
Definition: classify.h:491
tesseract::Classify::matcher_rating_margin
double matcher_rating_margin
Definition: classify.h:460
PermConfigFor
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:92
tesseract::Classify::ResetAdaptiveClassifierInternal
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:598
FEATURE_SET_STRUCT::NumFeatures
uint16_t NumFeatures
Definition: ocrfeatures.h:65
tesseract::Classify::WriteAdaptedTemplates
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:453
INT_CLASS_STRUCT::NumConfigs
uint8_t NumConfigs
Definition: intproto.h:107
ADAPT_TEMPLATES_STRUCT::Templates
INT_TEMPLATES Templates
Definition: adaptive.h:66
tesseract::UnicharRating::adapted
bool adapted
Definition: shapetable.h:79
MakeProtoPermanent
#define MakeProtoPermanent(Class, ProtoId)
Definition: adaptive.h:87
tesseract::Classify::classify_bln_numeric_mode
bool classify_bln_numeric_mode
Definition: classify.h:508
tesseract::Classify::matcher_bad_match_pad
double matcher_bad_match_pad
Definition: classify.h:459
tesseract::Classify::fontset_table_
UnicityTable< FontSet > fontset_table_
Definition: classify.h:537
tesseract::Classify::matcher_debug_flags
int matcher_debug_flags
Definition: classify.h:454
GenericVector::sort
void sort()
Definition: genericvector.h:1102
tesseract::Classify::ShowBestMatchFor
void ShowBestMatchFor(int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
Definition: adaptmatch.cpp:2159
UnusedClassIdIn
#define UnusedClassIdIn(T, c)
Definition: intproto.h:176
DENORM::pix
Pix * pix() const
Definition: normalis.h:245
ADAPT_RESULTS::BlobLength
int32_t BlobLength
Definition: adaptmatch.cpp:92
PRINT_PROTO_MATCHES
#define PRINT_PROTO_MATCHES
Definition: intproto.h:191
ActualOutlineLength
float ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:31
tesseract::Classify::classify_character_fragments_garbage_certainty_threshold
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:489
GenericVector::size
int size() const
Definition: genericvector.h:71
window_wait
char window_wait(ScrollView *win)
Definition: callcpp.cpp:103
ClassForClassId
#define ClassForClassId(T, c)
Definition: intproto.h:177
tesseract::FontSet
Definition: fontinfo.h:137
CharNormY
Definition: normfeat.h:29
scrollview.h
tesseract::Classify::AdaptToChar
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:853
tesseract::Dict::SettupStopperPass2
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:382
tesseract::Classify::certainty_scale
double certainty_scale
Definition: classify.h:473
matchdefs.h
tesseract::Classify::ClassAndConfigIDToFontOrShapeID
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
Definition: adaptmatch.cpp:2207
tesseract::CST_FRAGMENT
Definition: classify.h:97
tesseract::Classify::MakeNewTempProtos
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
Definition: adaptmatch.cpp:1834
tessdatamanager.h
tesseract::Classify::SettupPass2
void SettupPass2()
Definition: adaptmatch.cpp:669
tesseract::Classify::AllConfigsOn
BIT_VECTOR AllConfigsOn
Definition: classify.h:523
tesseract::Classify::SwitchAdaptiveClassifier
void SwitchAdaptiveClassifier()
Definition: adaptmatch.cpp:613
tesseract::Classify::classify_debug_level
int classify_debug_level
Definition: classify.h:430
tesseract::ShapeTable::MaxNumUnichars
int MaxNumUnichars() const
Definition: shapetable.cpp:455
kBlnBaselineOffset
const int kBlnBaselineOffset
Definition: normalis.h:24
UNICHARSET::size
int size() const
Definition: unicharset.h:341
FreeTempProto
void FreeTempProto(void *arg)
Definition: adaptive.cpp:81
TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:446
CharNormRx
Definition: normfeat.h:29
tesseract::Classify::RefreshDebugWindow
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:226
IsEmptyAdaptedClass
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:78
MAX_ADAPTABLE_WERD_SIZE
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:80
tesseract::Classify::NewAdaptedTemplates
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:151
tesseract::TESSDATA_NORMPROTO
Definition: tessdatamanager.h:62
BCC_STATIC_CLASSIFIER
Definition: ratngs.h:42
tesseract::Classify::ClassIDToDebugStr
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
Definition: adaptmatch.cpp:2194
tesseract::UnicharRating::feature_misses
uint16_t feature_misses
Definition: shapetable.h:83
TBOX
Definition: rect.h:33
DENORM
Definition: normalis.h:49
CharNormRy
Definition: normfeat.h:29
ADAPT_CLASS_STRUCT::TempProtos
LIST TempProtos
Definition: adaptive.h:60
intmatcher.h
GetPicoFeatureLength
#define GetPicoFeatureLength()
Definition: picofeat.h:56