tesseract  5.0.0-alpha-619-ge9db
commontraining.cpp File Reference
#include "commontraining.h"
#include <algorithm>
#include <cmath>
#include "allheaders.h"
#include "ccutil.h"
#include "classify.h"
#include "cluster.h"
#include "clusttool.h"
#include "emalloc.h"
#include "featdefs.h"
#include "fontinfo.h"
#include "intfeaturespace.h"
#include "mastertrainer.h"
#include "mf.h"
#include "oldlist.h"
#include "params.h"
#include "shapetable.h"
#include "tessdatamanager.h"
#include "tessopt.h"
#include "tprintf.h"
#include "unicity_table.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define _USE_MATH_DEFINES
 

Functions

 INT_PARAM_FLAG (debug_level, 0, "Level of Trainer debugging")
 
 STRING_PARAM_FLAG (D, "", "Directory to write output files to")
 
 STRING_PARAM_FLAG (F, "font_properties", "File listing font properties")
 
 STRING_PARAM_FLAG (X, "", "File listing font xheights")
 
 STRING_PARAM_FLAG (U, "unicharset", "File to load unicharset from")
 
 STRING_PARAM_FLAG (O, "", "File to write unicharset to")
 
 STRING_PARAM_FLAG (output_trainer, "", "File to write trainer to")
 
 STRING_PARAM_FLAG (test_ch, "", "UTF8 test character string")
 
void ParseArguments (int *argc, char ***argv)
 
ShapeTabletesseract::LoadShapeTable (const STRING &file_prefix)
 
void tesseract::WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
 
MasterTrainer * tesseract::LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
 
const char * GetNextFilename (int argc, const char *const *argv)
 
LABELEDLIST FindList (LIST List, char *Label)
 
LABELEDLIST NewLabeledList (const char *Label)
 
void ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_definitions, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
 
void FreeTrainingSamples (LIST CharList)
 
void FreeLabeledList (LABELEDLIST LabeledList)
 
CLUSTERERSetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST char_sample, const char *program_feature_type)
 
void MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *clusterconfig)
 
void CleanUpUnusedData (LIST ProtoList)
 
LIST RemoveInsignificantProtos (LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N)
 
MERGE_CLASS FindClass (LIST List, const char *Label)
 
MERGE_CLASS NewLabeledClass (const char *Label)
 
void FreeLabeledClassList (LIST ClassList)
 
CLASS_STRUCTSetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList)
 
void Normalize (float *Values)
 
void FreeNormProtoList (LIST CharList)
 
void AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, char *CharName)
 
int NumberOfProtos (LIST ProtoList, bool CountSigProtos, bool CountInsigProtos)
 

Variables

CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 }
 
FEATURE_DEFS_STRUCT feature_defs
 

Macro Definition Documentation

◆ _USE_MATH_DEFINES

#define _USE_MATH_DEFINES

Definition at line 14 of file commontraining.cpp.

Function Documentation

◆ AddToNormProtosList()

void AddToNormProtosList ( LIST NormProtoList,
LIST  ProtoList,
char *  CharName 
)

Definition at line 821 of file commontraining.cpp.

825 {
826  PROTOTYPE* Proto;
827  LABELEDLIST LabeledProtoList;
828 
829  LabeledProtoList = NewLabeledList(CharName);
830  iterate(ProtoList)
831  {
832  Proto = reinterpret_cast<PROTOTYPE *>first_node (ProtoList);
833  LabeledProtoList->List = push(LabeledProtoList->List, Proto);
834  }
835  *NormProtoList = push(*NormProtoList, LabeledProtoList);
836 }

◆ CleanUpUnusedData()

void CleanUpUnusedData ( LIST  ProtoList)

Definition at line 595 of file commontraining.cpp.

597 {
598  PROTOTYPE* Prototype;
599 
600  iterate(ProtoList)
601  {
602  Prototype = reinterpret_cast<PROTOTYPE *>first_node (ProtoList);
603  free(Prototype->Variance.Elliptical);
604  Prototype->Variance.Elliptical = nullptr;
605  free(Prototype->Magnitude.Elliptical);
606  Prototype->Magnitude.Elliptical = nullptr;
607  free(Prototype->Weight.Elliptical);
608  Prototype->Weight.Elliptical = nullptr;
609  }
610 }

◆ FindClass()

MERGE_CLASS FindClass ( LIST  List,
const char *  Label 
)

Definition at line 678 of file commontraining.cpp.

678  {
679  MERGE_CLASS MergeClass;
680 
681  iterate (List)
682  {
683  MergeClass = reinterpret_cast<MERGE_CLASS>first_node (List);
684  if (strcmp (MergeClass->Label, Label) == 0)
685  return (MergeClass);
686  }
687  return (nullptr);
688 
689 } /* FindClass */

◆ FindList()

LABELEDLIST FindList ( LIST  List,
char *  Label 
)

This routine searches through a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, nullptr is returned.

Parameters
Listlist to search
Labellabel to search for
Returns
Labeled list with the specified label or nullptr.
Note
Globals: none

Definition at line 340 of file commontraining.cpp.

340  {
341  LABELEDLIST LabeledList;
342 
343  iterate (List)
344  {
345  LabeledList = reinterpret_cast<LABELEDLIST>first_node (List);
346  if (strcmp (LabeledList->Label, Label) == 0)
347  return (LabeledList);
348  }
349  return (nullptr);
350 
351 } /* FindList */

◆ FreeLabeledClassList()

void FreeLabeledClassList ( LIST  ClassList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
ClassListlist of all fonts in document

Definition at line 709 of file commontraining.cpp.

709  {
710  MERGE_CLASS MergeClass;
711 
712  LIST nodes = ClassList;
713  iterate(ClassList) /* iterate through all of the fonts */
714  {
715  MergeClass = reinterpret_cast<MERGE_CLASS>first_node (ClassList);
716  free (MergeClass->Label);
717  FreeClass(MergeClass->Class);
718  delete MergeClass;
719  }
720  destroy(nodes);
721 
722 } /* FreeLabeledClassList */

◆ FreeLabeledList()

void FreeLabeledList ( LABELEDLIST  LabeledList)

This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.

Parameters
LabeledListlabeled list to be freed
Note
Globals: none

Definition at line 476 of file commontraining.cpp.

476  {
477  destroy(LabeledList->List);
478  free(LabeledList->Label);
479  free(LabeledList);
480 } /* FreeLabeledList */

◆ FreeNormProtoList()

void FreeNormProtoList ( LIST  CharList)

Definition at line 805 of file commontraining.cpp.

807 {
808  LABELEDLIST char_sample;
809 
810  LIST nodes = CharList;
811  iterate(CharList) /* iterate through all of the fonts */
812  {
813  char_sample = reinterpret_cast<LABELEDLIST>first_node (CharList);
814  FreeLabeledList (char_sample);
815  }
816  destroy(nodes);
817 
818 } // FreeNormProtoList

◆ FreeTrainingSamples()

void FreeTrainingSamples ( LIST  CharList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
CharListlist of all fonts in document

Definition at line 450 of file commontraining.cpp.

450  {
451  LABELEDLIST char_sample;
452  FEATURE_SET FeatureSet;
453  LIST FeatureList;
454 
455  LIST nodes = CharList;
456  iterate(CharList) { /* iterate through all of the fonts */
457  char_sample = reinterpret_cast<LABELEDLIST>first_node(CharList);
458  FeatureList = char_sample->List;
459  iterate(FeatureList) { /* iterate through all of the classes */
460  FeatureSet = reinterpret_cast<FEATURE_SET>first_node(FeatureList);
461  FreeFeatureSet(FeatureSet);
462  }
463  FreeLabeledList(char_sample);
464  }
465  destroy(nodes);
466 } /* FreeTrainingSamples */

◆ GetNextFilename()

const char* GetNextFilename ( int  argc,
const char *const *  argv 
)

This routine returns the next command line argument. If there are no remaining command line arguments, it returns nullptr. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.

Globals:

  • tessoptind defined by tessopt sys call
    Returns
    Next command line argument or nullptr.

Definition at line 323 of file commontraining.cpp.

323  {
324  if (tessoptind < argc)
325  return argv[tessoptind++];
326  else
327  return nullptr;
328 } /* GetNextFilename */

◆ INT_PARAM_FLAG()

INT_PARAM_FLAG ( debug_level  ,
,
"Level of Trainer debugging"   
)

◆ MergeInsignificantProtos()

void MergeInsignificantProtos ( LIST  ProtoList,
const char *  label,
CLUSTERER Clusterer,
CLUSTERCONFIG clusterconfig 
)

Definition at line 528 of file commontraining.cpp.

530  {
531  PROTOTYPE* Prototype;
532  bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
533 
534  LIST pProtoList = ProtoList;
535  iterate(pProtoList) {
536  Prototype = reinterpret_cast<PROTOTYPE *>first_node (pProtoList);
537  if (Prototype->Significant || Prototype->Merged)
538  continue;
539  float best_dist = 0.125;
540  PROTOTYPE* best_match = nullptr;
541  // Find the nearest alive prototype.
542  LIST list_it = ProtoList;
543  iterate(list_it) {
544  PROTOTYPE* test_p = reinterpret_cast<PROTOTYPE *>first_node (list_it);
545  if (test_p != Prototype && !test_p->Merged) {
546  float dist = ComputeDistance(Clusterer->SampleSize,
547  Clusterer->ParamDesc,
548  Prototype->Mean, test_p->Mean);
549  if (dist < best_dist) {
550  best_match = test_p;
551  best_dist = dist;
552  }
553  }
554  }
555  if (best_match != nullptr && !best_match->Significant) {
556  if (debug)
557  tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
558  best_match->NumSamples, Prototype->NumSamples,
559  best_match->Mean[0], best_match->Mean[1],
560  Prototype->Mean[0], Prototype->Mean[1]);
561  best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
562  Clusterer->ParamDesc,
563  best_match->NumSamples,
564  Prototype->NumSamples,
565  best_match->Mean,
566  best_match->Mean, Prototype->Mean);
567  Prototype->NumSamples = 0;
568  Prototype->Merged = true;
569  } else if (best_match != nullptr) {
570  if (debug)
571  tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
572  Prototype->Mean[0], Prototype->Mean[1],
573  best_match->Mean[0], best_match->Mean[1]);
574  Prototype->Merged = true;
575  }
576  }
577  // Mark significant those that now have enough samples.
578  int min_samples =
579  static_cast<int32_t>(clusterconfig->MinSamples * Clusterer->NumChar);
580  pProtoList = ProtoList;
581  iterate(pProtoList) {
582  Prototype = reinterpret_cast<PROTOTYPE *>first_node (pProtoList);
583  // Process insignificant protos that do not match a green one
584  if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
585  !Prototype->Merged) {
586  if (debug)
587  tprintf("Red proto at %g,%g becoming green\n",
588  Prototype->Mean[0], Prototype->Mean[1]);
589  Prototype->Significant = true;
590  }
591  }
592 } /* MergeInsignificantProtos */

◆ NewLabeledClass()

MERGE_CLASS NewLabeledClass ( const char *  Label)

Definition at line 692 of file commontraining.cpp.

692  {
693  MERGE_CLASS MergeClass;
694 
695  MergeClass = new MERGE_CLASS_NODE;
696  MergeClass->Label = static_cast<char*>(Emalloc (strlen (Label)+1));
697  strcpy (MergeClass->Label, Label);
698  MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
699  return (MergeClass);
700 
701 } /* NewLabeledClass */

◆ NewLabeledList()

LABELEDLIST NewLabeledList ( const char *  Label)

This routine allocates a new, empty labeled list and gives it the specified label.

Parameters
Labellabel for new list
Returns
New, empty labeled list.
Note
Globals: none

Definition at line 361 of file commontraining.cpp.

361  {
362  LABELEDLIST LabeledList;
363 
364  LabeledList = static_cast<LABELEDLIST>(Emalloc (sizeof (LABELEDLISTNODE)));
365  LabeledList->Label = static_cast<char*>(Emalloc (strlen (Label)+1));
366  strcpy (LabeledList->Label, Label);
367  LabeledList->List = NIL_LIST;
368  LabeledList->SampleCount = 0;
369  LabeledList->font_sample_count = 0;
370  return (LabeledList);
371 
372 } /* NewLabeledList */

◆ Normalize()

void Normalize ( float *  Values)

Definition at line 788 of file commontraining.cpp.

790 {
791  float Slope;
792  float Intercept;
793  float Normalizer;
794 
795  Slope = tan(Values [2] * 2 * M_PI);
796  Intercept = Values [1] - Slope * Values [0];
797  Normalizer = 1 / sqrt (Slope * Slope + 1.0);
798 
799  Values [0] = Slope * Normalizer;
800  Values [1] = - Normalizer;
801  Values [2] = Intercept * Normalizer;
802 } // Normalize

◆ NumberOfProtos()

int NumberOfProtos ( LIST  ProtoList,
bool  CountSigProtos,
bool  CountInsigProtos 
)

Definition at line 839 of file commontraining.cpp.

840  {
841  int N = 0;
842  iterate(ProtoList)
843  {
844  PROTOTYPE* Proto = reinterpret_cast<PROTOTYPE*>first_node(ProtoList);
845  if ((Proto->Significant && CountSigProtos) ||
846  (!Proto->Significant && CountInsigProtos))
847  N++;
848  }
849  return(N);
850 }

◆ ParseArguments()

void ParseArguments ( int *  argc,
char ***  argv 
)

This routine parses the command line arguments that were passed to the program and uses them to set relevant training-related global parameters.

Globals:

  • Config current clustering parameters
    Parameters
    argcnumber of command line arguments to parse
    argvcommand line arguments

Definition at line 122 of file commontraining.cpp.

122  {
123  STRING usage;
124  if (*argc) {
125  usage += (*argv)[0];
126  usage += " -v | --version | ";
127  usage += (*argv)[0];
128  }
129  usage += " [.tr files ...]";
130  tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true);
131  // Record the index of the first non-flag argument to 1, since we set
132  // remove_flags to true when parsing the flags.
133  tessoptind = 1;
134  // Set some global values based on the flags.
136  std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_min_samples_fraction)));
138  std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_max_illegal)));
140  std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_independence)));
142  std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_confidence)));
143  // Set additional parameters from config file if specified.
144  if (!FLAGS_configfile.empty()) {
146  FLAGS_configfile.c_str(),
148  ccutil.params());
149  }
150 }

◆ ReadTrainingSamples()

void ReadTrainingSamples ( const FEATURE_DEFS_STRUCT feature_definitions,
const char *  feature_name,
int  max_samples,
UNICHARSET unicharset,
FILE *  file,
LIST training_samples 
)

This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.

Parameters
fileopen text file to read samples from
feature_definitions
feature_name
max_samples
unicharset
training_samples

Definition at line 389 of file commontraining.cpp.

392  {
393  char buffer[2048];
394  char unichar[UNICHAR_LEN + 1];
395  LABELEDLIST char_sample;
396  FEATURE_SET feature_samples;
397  CHAR_DESC char_desc;
398  uint32_t feature_type =
399  ShortNameToFeatureType(feature_definitions, feature_name);
400 
401  // Zero out the font_sample_count for all the classes.
402  LIST it = *training_samples;
403  iterate(it) {
404  char_sample = reinterpret_cast<LABELEDLIST>(first_node(it));
405  char_sample->font_sample_count = 0;
406  }
407 
408  while (fgets(buffer, 2048, file) != nullptr) {
409  if (buffer[0] == '\n')
410  continue;
411 
412  sscanf(buffer, "%*s %s", unichar);
413  if (unicharset != nullptr && !unicharset->contains_unichar(unichar)) {
414  unicharset->unichar_insert(unichar);
415  if (unicharset->size() > MAX_NUM_CLASSES) {
416  tprintf("Error: Size of unicharset in training is "
417  "greater than MAX_NUM_CLASSES\n");
418  exit(1);
419  }
420  }
421  char_sample = FindList(*training_samples, unichar);
422  if (char_sample == nullptr) {
423  char_sample = NewLabeledList(unichar);
424  *training_samples = push(*training_samples, char_sample);
425  }
426  char_desc = ReadCharDescription(feature_definitions, file);
427  feature_samples = char_desc->FeatureSets[feature_type];
428  if (char_sample->font_sample_count < max_samples || max_samples <= 0) {
429  char_sample->List = push(char_sample->List, feature_samples);
430  char_sample->SampleCount++;
431  char_sample->font_sample_count++;
432  } else {
433  FreeFeatureSet(feature_samples);
434  }
435  for (size_t i = 0; i < char_desc->NumFeatureSets; i++) {
436  if (feature_type != i)
437  FreeFeatureSet(char_desc->FeatureSets[i]);
438  }
439  free(char_desc);
440  }
441 } // ReadTrainingSamples

◆ RemoveInsignificantProtos()

LIST RemoveInsignificantProtos ( LIST  ProtoList,
bool  KeepSigProtos,
bool  KeepInsigProtos,
int  N 
)

Definition at line 613 of file commontraining.cpp.

619 {
620  LIST NewProtoList = NIL_LIST;
621  LIST pProtoList;
622  PROTOTYPE* Proto;
623  PROTOTYPE* NewProto;
624  int i;
625 
626  pProtoList = ProtoList;
627  iterate(pProtoList)
628  {
629  Proto = reinterpret_cast<PROTOTYPE *>first_node (pProtoList);
630  if ((Proto->Significant && KeepSigProtos) ||
631  (!Proto->Significant && KeepInsigProtos))
632  {
633  NewProto = static_cast<PROTOTYPE *>(Emalloc(sizeof(PROTOTYPE)));
634 
635  NewProto->Mean = static_cast<float *>(Emalloc(N * sizeof(float)));
636  NewProto->Significant = Proto->Significant;
637  NewProto->Style = Proto->Style;
638  NewProto->NumSamples = Proto->NumSamples;
639  NewProto->Cluster = nullptr;
640  NewProto->Distrib = nullptr;
641 
642  for (i=0; i < N; i++)
643  NewProto->Mean[i] = Proto->Mean[i];
644  if (Proto->Variance.Elliptical != nullptr) {
645  NewProto->Variance.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
646  for (i=0; i < N; i++)
647  NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
648  }
649  else
650  NewProto->Variance.Elliptical = nullptr;
651  //---------------------------------------------
652  if (Proto->Magnitude.Elliptical != nullptr) {
653  NewProto->Magnitude.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
654  for (i=0; i < N; i++)
655  NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
656  }
657  else
658  NewProto->Magnitude.Elliptical = nullptr;
659  //------------------------------------------------
660  if (Proto->Weight.Elliptical != nullptr) {
661  NewProto->Weight.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
662  for (i=0; i < N; i++)
663  NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
664  }
665  else
666  NewProto->Weight.Elliptical = nullptr;
667 
668  NewProto->TotalMagnitude = Proto->TotalMagnitude;
669  NewProto->LogMagnitude = Proto->LogMagnitude;
670  NewProtoList = push_last(NewProtoList, NewProto);
671  }
672  }
673  FreeProtoList(&ProtoList);
674  return (NewProtoList);
675 } /* RemoveInsignificantProtos */

◆ SetUpForClustering()

CLUSTERER* SetUpForClustering ( const FEATURE_DEFS_STRUCT FeatureDefs,
LABELEDLIST  char_sample,
const char *  program_feature_type 
)

This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.

Parameters
char_sampleLABELEDLIST that holds all the feature information for a
FeatureDefs
program_feature_typegiven character.
Returns
Pointer to new clusterer data structure.
Note
Globals: None

Definition at line 494 of file commontraining.cpp.

496  {
497  uint16_t N;
498  int i, j;
499  float* Sample = nullptr;
500  CLUSTERER *Clusterer;
501  int32_t CharID;
502  LIST FeatureList = nullptr;
503  FEATURE_SET FeatureSet = nullptr;
504 
505  int32_t desc_index =
506  ShortNameToFeatureType(FeatureDefs, program_feature_type);
507  N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
508  Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);
509 
510  FeatureList = char_sample->List;
511  CharID = 0;
512  iterate(FeatureList) {
513  FeatureSet = reinterpret_cast<FEATURE_SET>first_node(FeatureList);
514  for (i = 0; i < FeatureSet->MaxNumFeatures; i++) {
515  if (Sample == nullptr) Sample = static_cast<float*>(Emalloc(N * sizeof(float)));
516  for (j = 0; j < N; j++)
517  Sample[j] = FeatureSet->Features[i]->Params[j];
518  MakeSample (Clusterer, Sample, CharID);
519  }
520  CharID++;
521  }
522  free(Sample);
523  return Clusterer;
524 
525 } /* SetUpForClustering */

◆ SetUpForFloat2Int()

CLASS_STRUCT* SetUpForFloat2Int ( const UNICHARSET unicharset,
LIST  LabeledClassList 
)

Definition at line 725 of file commontraining.cpp.

726  {
727  MERGE_CLASS MergeClass;
728  CLASS_TYPE Class;
729  int NumProtos;
730  int NumConfigs;
731  int NumWords;
732  int i, j;
733  float Values[3];
734  PROTO NewProto;
735  PROTO OldProto;
736  BIT_VECTOR NewConfig;
737  BIT_VECTOR OldConfig;
738 
739  // printf("Float2Int ...\n");
740 
741  CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()];
742  iterate(LabeledClassList)
743  {
744  UnicityTableEqEq<int> font_set;
745  MergeClass = reinterpret_cast<MERGE_CLASS>first_node (LabeledClassList);
746  Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)];
747  NumProtos = MergeClass->Class->NumProtos;
748  NumConfigs = MergeClass->Class->NumConfigs;
749  font_set.move(&MergeClass->Class->font_set);
750  Class->NumProtos = NumProtos;
751  Class->MaxNumProtos = NumProtos;
752  Class->Prototypes = static_cast<PROTO>(Emalloc (sizeof(PROTO_STRUCT) * NumProtos));
753  for(i=0; i < NumProtos; i++)
754  {
755  NewProto = ProtoIn(Class, i);
756  OldProto = ProtoIn(MergeClass->Class, i);
757  Values[0] = OldProto->X;
758  Values[1] = OldProto->Y;
759  Values[2] = OldProto->Angle;
760  Normalize(Values);
761  NewProto->X = OldProto->X;
762  NewProto->Y = OldProto->Y;
763  NewProto->Length = OldProto->Length;
764  NewProto->Angle = OldProto->Angle;
765  NewProto->A = Values[0];
766  NewProto->B = Values[1];
767  NewProto->C = Values[2];
768  }
769 
770  Class->NumConfigs = NumConfigs;
771  Class->MaxNumConfigs = NumConfigs;
772  Class->font_set.move(&font_set);
773  Class->Configurations = static_cast<BIT_VECTOR*>(Emalloc (sizeof(BIT_VECTOR) * NumConfigs));
774  NumWords = WordsInVectorOfSize(NumProtos);
775  for(i=0; i < NumConfigs; i++)
776  {
777  NewConfig = NewBitVector(NumProtos);
778  OldConfig = MergeClass->Class->Configurations[i];
779  for(j=0; j < NumWords; j++)
780  NewConfig[j] = OldConfig[j];
781  Class->Configurations[i] = NewConfig;
782  }
783  }
784  return float_classes;
785 } // SetUpForFloat2Int

◆ STRING_PARAM_FLAG() [1/7]

STRING_PARAM_FLAG ( ,
""  ,
"Directory to write output files to"   
)

◆ STRING_PARAM_FLAG() [2/7]

STRING_PARAM_FLAG ( ,
"font_properties"  ,
"File listing font properties"   
)

◆ STRING_PARAM_FLAG() [3/7]

STRING_PARAM_FLAG ( ,
""  ,
"File to write unicharset to"   
)

◆ STRING_PARAM_FLAG() [4/7]

STRING_PARAM_FLAG ( output_trainer  ,
""  ,
"File to write trainer to"   
)

◆ STRING_PARAM_FLAG() [5/7]

STRING_PARAM_FLAG ( test_ch  ,
""  ,
"UTF8 test character string  
)

◆ STRING_PARAM_FLAG() [6/7]

STRING_PARAM_FLAG ( ,
"unicharset"  ,
"File to load unicharset from"   
)

◆ STRING_PARAM_FLAG() [7/7]

STRING_PARAM_FLAG ( ,
""  ,
"File listing font xheights"   
)

Variable Documentation

◆ Config

CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 }

Definition at line 88 of file commontraining.cpp.

◆ feature_defs

FEATURE_DEFS_STRUCT feature_defs

Definition at line 89 of file commontraining.cpp.

FindList
LABELEDLIST FindList(LIST List, char *Label)
Definition: commontraining.cpp:340
PROTO_STRUCT::Length
float Length
Definition: protos.h:41
tesseract::ParamUtils::ReadParamsFile
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:39
PROTOTYPE::TotalMagnitude
float TotalMagnitude
Definition: cluster.h:74
Normalize
void Normalize(float *Values)
Definition: commontraining.cpp:788
CLUSTERER::NumChar
int32_t NumChar
Definition: cluster.h:88
tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY
Definition: params.h:53
first_node
#define first_node(l)
Definition: oldlist.h:84
CLASS_STRUCT::Configurations
CONFIGS Configurations
Definition: protos.h:58
Emalloc
void * Emalloc(int Size)
Definition: emalloc.cpp:31
ShortNameToFeatureType
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
Definition: featdefs.cpp:269
CLUSTERCONFIG::Independence
float Independence
Definition: cluster.h:50
PROTOTYPE::LogMagnitude
float LogMagnitude
Definition: cluster.h:75
list_rec
Definition: oldlist.h:73
PROTO_STRUCT
Definition: protos.h:34
LABELEDLISTNODE::font_sample_count
int font_sample_count
Definition: commontraining.h:82
UnicityTableEqEq< int >
MERGE_CLASS_NODE
Definition: commontraining.h:87
FreeProtoList
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:538
CLASS_STRUCT::NumProtos
int16_t NumProtos
Definition: protos.h:53
PROTOTYPE::Magnitude
FLOATUNION Magnitude
Definition: cluster.h:77
CHAR_DESC_STRUCT::FeatureSets
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:40
Config
CLUSTERCONFIG Config
Definition: commontraining.cpp:88
STRING
Definition: strngs.h:45
CLASS_STRUCT::MaxNumProtos
int16_t MaxNumProtos
Definition: protos.h:54
CLUSTERER::SampleSize
int16_t SampleSize
Definition: cluster.h:82
ComputeDistance
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:447
tesseract::ParseCommandLineFlags
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
Definition: commandlineflags.cpp:166
NIL_LIST
#define NIL_LIST
Definition: oldlist.h:68
NewLabeledList
LABELEDLIST NewLabeledList(const char *Label)
Definition: commontraining.cpp:361
CLUSTERCONFIG::MaxIllegal
float MaxIllegal
Definition: cluster.h:48
PROTO_STRUCT::B
float B
Definition: protos.h:36
PROTOTYPE
Definition: cluster.h:62
ProtoIn
#define ProtoIn(Class, Pid)
Definition: protos.h:82
FEATURE_DEFS_STRUCT::FeatureDesc
const FEATURE_DESC_STRUCT * FeatureDesc[NUM_FEATURE_TYPES]
Definition: featdefs.h:46
CLUSTERCONFIG::Confidence
double Confidence
Definition: cluster.h:51
CLUSTERER::ParamDesc
PARAM_DESC * ParamDesc
Definition: cluster.h:83
PROTOTYPE::Merged
bool Merged
Definition: cluster.h:64
NewClass
CLASS_TYPE NewClass(int NumProtos, int NumConfigs)
Definition: protos.cpp:151
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
MAX_NUM_CONFIGS
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
file
Definition: include_gunit.h:22
MAX_NUM_PROTOS
#define MAX_NUM_PROTOS
Definition: intproto.h:47
MAX_NUM_CLASSES
#define MAX_NUM_CLASSES
Definition: matchdefs.h:29
FLOATUNION::Elliptical
float * Elliptical
Definition: cluster.h:59
MERGE_CLASS_NODE::Label
char * Label
Definition: commontraining.h:89
PROTO_STRUCT::Y
float Y
Definition: protos.h:39
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
PROTO_STRUCT::C
float C
Definition: protos.h:37
PROTOTYPE::Weight
FLOATUNION Weight
Definition: cluster.h:78
CLASS_STRUCT::NumConfigs
int16_t NumConfigs
Definition: protos.h:56
MergeClusters
int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[])
Definition: cluster.cpp:824
PROTO_STRUCT::X
float X
Definition: protos.h:38
FEATURE_SET_STRUCT::MaxNumFeatures
uint16_t MaxNumFeatures
Definition: ocrfeatures.h:66
FEATURE_DESC_STRUCT::NumParams
uint16_t NumParams
Definition: ocrfeatures.h:52
CLASS_STRUCT::font_set
UnicityTableEqEq< int > font_set
Definition: protos.h:59
FEATURE_SET_STRUCT::Features
FEATURE Features[1]
Definition: ocrfeatures.h:67
CLASS_STRUCT
Definition: protos.h:45
PROTO_STRUCT::Angle
float Angle
Definition: protos.h:40
MakeClusterer
CLUSTERER * MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[])
Definition: cluster.cpp:376
BIT_VECTOR
uint32_t * BIT_VECTOR
Definition: bitvec.h:27
push
LIST push(LIST list, void *element)
Definition: oldlist.cpp:172
FEATURE_STRUCT::Params
float Params[1]
Definition: ocrfeatures.h:60
FEATURE_DESC_STRUCT::ParamDesc
const PARAM_DESC * ParamDesc
Definition: ocrfeatures.h:54
CLASS_STRUCT::MaxNumConfigs
int16_t MaxNumConfigs
Definition: protos.h:57
tesseract::CCUtil::params
ParamsVectors * params()
Definition: ccutil.h:51
FEATURE_SET_STRUCT
Definition: ocrfeatures.h:64
LABELEDLISTNODE
Definition: commontraining.h:78
tessoptind
int tessoptind
Definition: tessopt.cpp:23
MERGE_CLASS_NODE::Class
CLASS_TYPE Class
Definition: commontraining.h:91
CHAR_DESC_STRUCT
Definition: featdefs.h:38
CLASS_STRUCT::Prototypes
PROTO Prototypes
Definition: protos.h:55
UNICHAR_LEN
#define UNICHAR_LEN
Definition: unichar.h:32
PROTO_STRUCT::A
float A
Definition: protos.h:35
PROTOTYPE::Significant
bool Significant
Definition: cluster.h:63
PROTOTYPE::Mean
float * Mean
Definition: cluster.h:73
CLUSTERER
Definition: cluster.h:81
UNICHARSET::contains_unichar
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670
iterate
#define iterate(l)
Definition: oldlist.h:92
FreeFeatureSet
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:61
FreeClass
void FreeClass(CLASS_TYPE Class)
Definition: protos.cpp:121
destroy
LIST destroy(LIST list)
Definition: oldlist.cpp:123
PROTOTYPE::Style
unsigned Style
Definition: cluster.h:69
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
LABELEDLISTNODE::SampleCount
int SampleCount
Definition: commontraining.h:81
CHAR_DESC_STRUCT::NumFeatureSets
uint32_t NumFeatureSets
Definition: featdefs.h:39
FreeLabeledList
void FreeLabeledList(LABELEDLIST LabeledList)
Definition: commontraining.cpp:476
PROTOTYPE::Variance
FLOATUNION Variance
Definition: cluster.h:76
CLUSTERCONFIG::MinSamples
float MinSamples
Definition: cluster.h:47
PROTOTYPE::NumSamples
unsigned NumSamples
Definition: cluster.h:70
push_last
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:185
PROTOTYPE::Cluster
CLUSTER * Cluster
Definition: cluster.h:71
LABELEDLISTNODE::Label
char * Label
Definition: commontraining.h:80
MakeSample
SAMPLE * MakeSample(CLUSTERER *Clusterer, const float *Feature, int32_t CharID)
Definition: cluster.cpp:429
UNICHARSET::unichar_insert
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
Definition: unicharset.cpp:625
ReadCharDescription
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)
Definition: featdefs.cpp:235
UNICHARSET::size
int size() const
Definition: unicharset.h:341
UnicityTable::move
void move(UnicityTable< T > *from)
Definition: unicity_table.h:185
LABELEDLISTNODE::List
LIST List
Definition: commontraining.h:83
PROTOTYPE::Distrib
DISTRIBUTION * Distrib
Definition: cluster.h:72