tesseract  4.0.0-1-g2a2b
commontraining.h File Reference
#include "cluster.h"
#include "commandlineflags.h"
#include "featdefs.h"
#include "intproto.h"
#include "oldlist.h"

Go to the source code of this file.

Classes

struct  LABELEDLISTNODE
 
struct  MERGE_CLASS_NODE
 

Namespaces

 tesseract
 

Typedefs

typedef struct LABELEDLISTNODELABELEDLIST
 
using MERGE_CLASS = MERGE_CLASS_NODE *
 

Functions

void ParseArguments (int *argc, char ***argv)
 
ShapeTabletesseract::LoadShapeTable (const STRING &file_prefix)
 
void tesseract::WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
 
MasterTrainer * tesseract::LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
 
const char * GetNextFilename (int argc, const char *const *argv)
 
LABELEDLIST FindList (LIST List, char *Label)
 
LABELEDLIST NewLabeledList (const char *Label)
 
void ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
 
void WriteTrainingSamples (const FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, LIST CharList, const char *program_feature_type)
 
void FreeTrainingSamples (LIST CharList)
 
void FreeLabeledList (LABELEDLIST LabeledList)
 
void FreeLabeledClassList (LIST ClassListList)
 
CLUSTERERSetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type)
 
LIST RemoveInsignificantProtos (LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N)
 
void CleanUpUnusedData (LIST ProtoList)
 
void MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
 
MERGE_CLASS FindClass (LIST List, const char *Label)
 
MERGE_CLASS NewLabeledClass (const char *Label)
 
CLASS_STRUCTSetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList)
 
void Normalize (float *Values)
 
void FreeNormProtoList (LIST CharList)
 
void AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, char *CharName)
 
int NumberOfProtos (LIST ProtoList, bool CountSigProtos, bool CountInsigProtos)
 
void allocNormProtos ()
 

Variables

FEATURE_DEFS_STRUCT feature_defs
 
CLUSTERCONFIG Config
 

Typedef Documentation

◆ LABELEDLIST

typedef struct LABELEDLISTNODE * LABELEDLIST

◆ MERGE_CLASS

Definition at line 92 of file commontraining.h.

Function Documentation

◆ AddToNormProtosList()

void AddToNormProtosList ( LIST NormProtoList,
LIST  ProtoList,
char *  CharName 
)

Definition at line 836 of file commontraining.cpp.

840 {
841  PROTOTYPE* Proto;
842  LABELEDLIST LabeledProtoList;
843 
844  LabeledProtoList = NewLabeledList(CharName);
845  iterate(ProtoList)
846  {
847  Proto = (PROTOTYPE *) first_node (ProtoList);
848  LabeledProtoList->List = push(LabeledProtoList->List, Proto);
849  }
850  *NormProtoList = push(*NormProtoList, LabeledProtoList);
851 }
LABELEDLIST NewLabeledList(const char *Label)
LIST push(LIST list, void *element)
Definition: oldlist.cpp:283
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161

◆ allocNormProtos()

void allocNormProtos ( )

◆ CleanUpUnusedData()

void CleanUpUnusedData ( LIST  ProtoList)

Definition at line 608 of file commontraining.cpp.

610 {
611  PROTOTYPE* Prototype;
612 
613  iterate(ProtoList)
614  {
615  Prototype = (PROTOTYPE *) first_node (ProtoList);
616  free(Prototype->Variance.Elliptical);
617  Prototype->Variance.Elliptical = nullptr;
618  free(Prototype->Magnitude.Elliptical);
619  Prototype->Magnitude.Elliptical = nullptr;
620  free(Prototype->Weight.Elliptical);
621  Prototype->Weight.Elliptical = nullptr;
622  }
623 }
float * Elliptical
Definition: cluster.h:64
FLOATUNION Weight
Definition: cluster.h:83
FLOATUNION Magnitude
Definition: cluster.h:82
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161
FLOATUNION Variance
Definition: cluster.h:81

◆ FindClass()

MERGE_CLASS FindClass ( LIST  List,
const char *  Label 
)

Definition at line 691 of file commontraining.cpp.

691  {
692  MERGE_CLASS MergeClass;
693 
694  iterate (List)
695  {
696  MergeClass = (MERGE_CLASS) first_node (List);
697  if (strcmp (MergeClass->Label, Label) == 0)
698  return (MergeClass);
699  }
700  return (nullptr);
701 
702 } /* FindClass */
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161
MERGE_CLASS_NODE * MERGE_CLASS

◆ FindList()

LABELEDLIST FindList ( LIST  List,
char *  Label 
)

This routine searches through a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, nullptr is returned.

Parameters
Listlist to search
Labellabel to search for
Returns
Labeled list with the specified label or nullptr.
Note
Globals: none

Definition at line 348 of file commontraining.cpp.

348  {
349  LABELEDLIST LabeledList;
350 
351  iterate (List)
352  {
353  LabeledList = (LABELEDLIST) first_node (List);
354  if (strcmp (LabeledList->Label, Label) == 0)
355  return (LabeledList);
356  }
357  return (nullptr);
358 
359 } /* FindList */
struct LABELEDLISTNODE * LABELEDLIST
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161

◆ FreeLabeledClassList()

void FreeLabeledClassList ( LIST  ClassList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
ClassListlist of all fonts in document
Returns
none
Note
Globals: none

Definition at line 724 of file commontraining.cpp.

724  {
725  MERGE_CLASS MergeClass;
726 
727  LIST nodes = ClassList;
728  iterate(ClassList) /* iterate through all of the fonts */
729  {
730  MergeClass = (MERGE_CLASS) first_node (ClassList);
731  free (MergeClass->Label);
732  FreeClass(MergeClass->Class);
733  delete MergeClass;
734  }
735  destroy(nodes);
736 
737 } /* FreeLabeledClassList */
LIST destroy(LIST list)
Definition: oldlist.cpp:170
void FreeClass(CLASS_TYPE Class)
Definition: protos.cpp:212
#define first_node(l)
Definition: oldlist.h:141
CLASS_TYPE Class
#define iterate(l)
Definition: oldlist.h:161
MERGE_CLASS_NODE * MERGE_CLASS

◆ FreeLabeledList()

void FreeLabeledList ( LABELEDLIST  LabeledList)

This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.

Parameters
LabeledListlabeled list to be freed
Note
Globals: none
Returns
none

Definition at line 489 of file commontraining.cpp.

489  {
490  destroy(LabeledList->List);
491  free(LabeledList->Label);
492  free(LabeledList);
493 } /* FreeLabeledList */
LIST destroy(LIST list)
Definition: oldlist.cpp:170

◆ FreeNormProtoList()

void FreeNormProtoList ( LIST  CharList)

Definition at line 820 of file commontraining.cpp.

822 {
823  LABELEDLIST char_sample;
824 
825  LIST nodes = CharList;
826  iterate(CharList) /* iterate through all of the fonts */
827  {
828  char_sample = (LABELEDLIST) first_node (CharList);
829  FreeLabeledList (char_sample);
830  }
831  destroy(nodes);
832 
833 } // FreeNormProtoList
void FreeLabeledList(LABELEDLIST LabeledList)
LIST destroy(LIST list)
Definition: oldlist.cpp:170
struct LABELEDLISTNODE * LABELEDLIST
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161

◆ FreeTrainingSamples()

void FreeTrainingSamples ( LIST  CharList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
CharListlist of all fonts in document
Returns
none
Note
Globals: none

Definition at line 462 of file commontraining.cpp.

462  {
463  LABELEDLIST char_sample;
464  FEATURE_SET FeatureSet;
465  LIST FeatureList;
466 
467  LIST nodes = CharList;
468  iterate(CharList) { /* iterate through all of the fonts */
469  char_sample = (LABELEDLIST) first_node(CharList);
470  FeatureList = char_sample->List;
471  iterate(FeatureList) { /* iterate through all of the classes */
472  FeatureSet = (FEATURE_SET) first_node(FeatureList);
473  FreeFeatureSet(FeatureSet);
474  }
475  FreeLabeledList(char_sample);
476  }
477  destroy(nodes);
478 } /* FreeTrainingSamples */
void FreeLabeledList(LABELEDLIST LabeledList)
LIST destroy(LIST list)
Definition: oldlist.cpp:170
struct LABELEDLISTNODE * LABELEDLIST
FEATURE_SET_STRUCT * FEATURE_SET
Definition: ocrfeatures.h:71
#define first_node(l)
Definition: oldlist.h:141
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:65
#define iterate(l)
Definition: oldlist.h:161

◆ GetNextFilename()

const char* GetNextFilename ( int  argc,
const char *const *  argv 
)

This routine returns the next command line argument. If there are no remaining command line arguments, it returns nullptr. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.

Globals:

  • tessoptind defined by tessopt sys call
    Returns
    Next command line argument or nullptr.

Definition at line 331 of file commontraining.cpp.

331  {
332  if (tessoptind < argc)
333  return argv[tessoptind++];
334  else
335  return nullptr;
336 } /* GetNextFilename */
int tessoptind
Definition: tessopt.cpp:24

◆ MergeInsignificantProtos()

void MergeInsignificantProtos ( LIST  ProtoList,
const char *  label,
CLUSTERER Clusterer,
CLUSTERCONFIG Config 
)

Definition at line 541 of file commontraining.cpp.

543  {
544  PROTOTYPE* Prototype;
545  bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
546 
547  LIST pProtoList = ProtoList;
548  iterate(pProtoList) {
549  Prototype = (PROTOTYPE *) first_node (pProtoList);
550  if (Prototype->Significant || Prototype->Merged)
551  continue;
552  float best_dist = 0.125;
553  PROTOTYPE* best_match = nullptr;
554  // Find the nearest alive prototype.
555  LIST list_it = ProtoList;
556  iterate(list_it) {
557  PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
558  if (test_p != Prototype && !test_p->Merged) {
559  float dist = ComputeDistance(Clusterer->SampleSize,
560  Clusterer->ParamDesc,
561  Prototype->Mean, test_p->Mean);
562  if (dist < best_dist) {
563  best_match = test_p;
564  best_dist = dist;
565  }
566  }
567  }
568  if (best_match != nullptr && !best_match->Significant) {
569  if (debug)
570  tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
571  best_match->NumSamples, Prototype->NumSamples,
572  best_match->Mean[0], best_match->Mean[1],
573  Prototype->Mean[0], Prototype->Mean[1]);
574  best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
575  Clusterer->ParamDesc,
576  best_match->NumSamples,
577  Prototype->NumSamples,
578  best_match->Mean,
579  best_match->Mean, Prototype->Mean);
580  Prototype->NumSamples = 0;
581  Prototype->Merged = 1;
582  } else if (best_match != nullptr) {
583  if (debug)
584  tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
585  Prototype->Mean[0], Prototype->Mean[1],
586  best_match->Mean[0], best_match->Mean[1]);
587  Prototype->Merged = 1;
588  }
589  }
590  // Mark significant those that now have enough samples.
591  int min_samples =
592  static_cast<int32_t>(clusterconfig->MinSamples * Clusterer->NumChar);
593  pProtoList = ProtoList;
594  iterate(pProtoList) {
595  Prototype = (PROTOTYPE *) first_node (pProtoList);
596  // Process insignificant protos that do not match a green one
597  if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
598  !Prototype->Merged) {
599  if (debug)
600  tprintf("Red proto at %g,%g becoming green\n",
601  Prototype->Mean[0], Prototype->Mean[1]);
602  Prototype->Significant = true;
603  }
604  }
605 } /* MergeInsignificantProtos */
float * Mean
Definition: cluster.h:78
PARAM_DESC * ParamDesc
Definition: cluster.h:88
unsigned Merged
Definition: cluster.h:69
unsigned Significant
Definition: cluster.h:68
int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[])
Definition: cluster.cpp:852
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int32_t NumChar
Definition: cluster.h:93
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161
unsigned NumSamples
Definition: cluster.h:75
int16_t SampleSize
Definition: cluster.h:87
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:450

◆ NewLabeledClass()

MERGE_CLASS NewLabeledClass ( const char *  Label)

Definition at line 705 of file commontraining.cpp.

705  {
706  MERGE_CLASS MergeClass;
707 
708  MergeClass = new MERGE_CLASS_NODE;
709  MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
710  strcpy (MergeClass->Label, Label);
711  MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
712  return (MergeClass);
713 
714 } /* NewLabeledClass */
void * Emalloc(int Size)
Definition: emalloc.cpp:31
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define MAX_NUM_PROTOS
Definition: intproto.h:48
CLASS_TYPE Class
CLASS_TYPE NewClass(int NumProtos, int NumConfigs)
Definition: protos.cpp:244

◆ NewLabeledList()

LABELEDLIST NewLabeledList ( const char *  Label)

This routine allocates a new, empty labeled list and gives it the specified label.

Parameters
Labellabel for new list
Returns
New, empty labeled list.
Note
Globals: none

Definition at line 369 of file commontraining.cpp.

369  {
370  LABELEDLIST LabeledList;
371 
372  LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
373  LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
374  strcpy (LabeledList->Label, Label);
375  LabeledList->List = NIL_LIST;
376  LabeledList->SampleCount = 0;
377  LabeledList->font_sample_count = 0;
378  return (LabeledList);
379 
380 } /* NewLabeledList */
void * Emalloc(int Size)
Definition: emalloc.cpp:31
struct LABELEDLISTNODE * LABELEDLIST
#define NIL_LIST
Definition: oldlist.h:127

◆ Normalize()

void Normalize ( float *  Values)

Definition at line 803 of file commontraining.cpp.

805 {
806  float Slope;
807  float Intercept;
808  float Normalizer;
809 
810  Slope = tan(Values [2] * 2 * M_PI);
811  Intercept = Values [1] - Slope * Values [0];
812  Normalizer = 1 / sqrt (Slope * Slope + 1.0);
813 
814  Values [0] = Slope * Normalizer;
815  Values [1] = - Normalizer;
816  Values [2] = Intercept * Normalizer;
817 } // Normalize

◆ NumberOfProtos()

int NumberOfProtos ( LIST  ProtoList,
bool  CountSigProtos,
bool  CountInsigProtos 
)

Definition at line 854 of file commontraining.cpp.

855  {
856  int N = 0;
857  iterate(ProtoList)
858  {
859  PROTOTYPE* Proto = (PROTOTYPE*)first_node(ProtoList);
860  if ((Proto->Significant && CountSigProtos) ||
861  (!Proto->Significant && CountInsigProtos))
862  N++;
863  }
864  return(N);
865 }
unsigned Significant
Definition: cluster.h:68
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161

◆ ParseArguments()

void ParseArguments ( int *  argc,
char ***  argv 
)

This routine parses the command line arguments that were passed to the program and uses them to set relevant training-related global parameters.

Globals:

  • Config current clustering parameters
    Parameters
    argcnumber of command line arguments to parse
    argvcommand line arguments
    Returns
    none

Definition at line 130 of file commontraining.cpp.

130  {
131  STRING usage;
132  if (*argc) {
133  usage += (*argv)[0];
134  usage += " -v | --version | ";
135  usage += (*argv)[0];
136  }
137  usage += " [.tr files ...]";
138  tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true);
139  // Record the index of the first non-flag argument to 1, since we set
140  // remove_flags to true when parsing the flags.
141  tessoptind = 1;
142  // Set some global values based on the flags.
144  std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_min_samples_fraction)));
146  std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_max_illegal)));
148  std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_independence)));
150  std::max(0.0, std::min(1.0, double(FLAGS_clusterconfig_confidence)));
151  // Set additional parameters from config file if specified.
152  if (!FLAGS_configfile.empty()) {
154  FLAGS_configfile.c_str(),
156  ccutil.params());
157  }
158 }
CLUSTERCONFIG Config
float MinSamples
Definition: cluster.h:50
const char * c_str() const
Definition: strngs.cpp:207
float MaxIllegal
Definition: cluster.h:51
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:39
ParamsVectors * params()
Definition: ccutil.h:62
double Confidence
Definition: cluster.h:54
float Independence
Definition: cluster.h:53
Definition: strngs.h:45
int tessoptind
Definition: tessopt.cpp:24
CCUtil ccutil

◆ ReadTrainingSamples()

void ReadTrainingSamples ( const FEATURE_DEFS_STRUCT feature_definitions,
const char *  feature_name,
int  max_samples,
UNICHARSET unicharset,
FILE *  file,
LIST training_samples 
)

This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.

Parameters
fileopen text file to read samples from
feature_definitions
feature_name
max_samples
unicharset
training_samples
Returns
none
Note
Globals: none

Definition at line 399 of file commontraining.cpp.

402  {
403  char buffer[2048];
404  char unichar[UNICHAR_LEN + 1];
405  LABELEDLIST char_sample;
406  FEATURE_SET feature_samples;
407  CHAR_DESC char_desc;
408  uint32_t feature_type =
409  ShortNameToFeatureType(feature_definitions, feature_name);
410 
411  // Zero out the font_sample_count for all the classes.
412  LIST it = *training_samples;
413  iterate(it) {
414  char_sample = reinterpret_cast<LABELEDLIST>(first_node(it));
415  char_sample->font_sample_count = 0;
416  }
417 
418  while (fgets(buffer, 2048, file) != nullptr) {
419  if (buffer[0] == '\n')
420  continue;
421 
422  sscanf(buffer, "%*s %s", unichar);
423  if (unicharset != nullptr && !unicharset->contains_unichar(unichar)) {
424  unicharset->unichar_insert(unichar);
425  if (unicharset->size() > MAX_NUM_CLASSES) {
426  tprintf("Error: Size of unicharset in training is "
427  "greater than MAX_NUM_CLASSES\n");
428  exit(1);
429  }
430  }
431  char_sample = FindList(*training_samples, unichar);
432  if (char_sample == nullptr) {
433  char_sample = NewLabeledList(unichar);
434  *training_samples = push(*training_samples, char_sample);
435  }
436  char_desc = ReadCharDescription(feature_definitions, file);
437  feature_samples = char_desc->FeatureSets[feature_type];
438  if (char_sample->font_sample_count < max_samples || max_samples <= 0) {
439  char_sample->List = push(char_sample->List, feature_samples);
440  char_sample->SampleCount++;
441  char_sample->font_sample_count++;
442  } else {
443  FreeFeatureSet(feature_samples);
444  }
445  for (size_t i = 0; i < char_desc->NumFeatureSets; i++) {
446  if (feature_type != i)
447  FreeFeatureSet(char_desc->FeatureSets[i]);
448  }
449  free(char_desc);
450  }
451 } // ReadTrainingSamples
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)
Definition: featdefs.cpp:236
LABELEDLIST NewLabeledList(const char *Label)
LIST push(LIST list, void *element)
Definition: oldlist.cpp:283
#define UNICHAR_LEN
Definition: unichar.h:31
int size() const
Definition: unicharset.h:336
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
Definition: unicharset.cpp:625
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:42
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
#define first_node(l)
Definition: oldlist.h:141
LABELEDLIST FindList(LIST List, char *Label)
uint32_t NumFeatureSets
Definition: featdefs.h:41
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:65
#define iterate(l)
Definition: oldlist.h:161
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
Definition: featdefs.cpp:270
#define MAX_NUM_CLASSES
Definition: matchdefs.h:32

◆ RemoveInsignificantProtos()

LIST RemoveInsignificantProtos ( LIST  ProtoList,
bool  KeepSigProtos,
bool  KeepInsigProtos,
int  N 
)

Definition at line 626 of file commontraining.cpp.

632 {
633  LIST NewProtoList = NIL_LIST;
634  LIST pProtoList;
635  PROTOTYPE* Proto;
636  PROTOTYPE* NewProto;
637  int i;
638 
639  pProtoList = ProtoList;
640  iterate(pProtoList)
641  {
642  Proto = (PROTOTYPE *) first_node (pProtoList);
643  if ((Proto->Significant && KeepSigProtos) ||
644  (!Proto->Significant && KeepInsigProtos))
645  {
646  NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
647 
648  NewProto->Mean = (float *)Emalloc(N * sizeof(float));
649  NewProto->Significant = Proto->Significant;
650  NewProto->Style = Proto->Style;
651  NewProto->NumSamples = Proto->NumSamples;
652  NewProto->Cluster = nullptr;
653  NewProto->Distrib = nullptr;
654 
655  for (i=0; i < N; i++)
656  NewProto->Mean[i] = Proto->Mean[i];
657  if (Proto->Variance.Elliptical != nullptr) {
658  NewProto->Variance.Elliptical = (float *)Emalloc(N * sizeof(float));
659  for (i=0; i < N; i++)
660  NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
661  }
662  else
663  NewProto->Variance.Elliptical = nullptr;
664  //---------------------------------------------
665  if (Proto->Magnitude.Elliptical != nullptr) {
666  NewProto->Magnitude.Elliptical = (float *)Emalloc(N * sizeof(float));
667  for (i=0; i < N; i++)
668  NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
669  }
670  else
671  NewProto->Magnitude.Elliptical = nullptr;
672  //------------------------------------------------
673  if (Proto->Weight.Elliptical != nullptr) {
674  NewProto->Weight.Elliptical = (float *)Emalloc(N * sizeof(float));
675  for (i=0; i < N; i++)
676  NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
677  }
678  else
679  NewProto->Weight.Elliptical = nullptr;
680 
681  NewProto->TotalMagnitude = Proto->TotalMagnitude;
682  NewProto->LogMagnitude = Proto->LogMagnitude;
683  NewProtoList = push_last(NewProtoList, NewProto);
684  }
685  }
686  FreeProtoList(&ProtoList);
687  return (NewProtoList);
688 } /* RemoveInsignificantProtos */
float * Mean
Definition: cluster.h:78
void * Emalloc(int Size)
Definition: emalloc.cpp:31
float TotalMagnitude
Definition: cluster.h:79
float * Elliptical
Definition: cluster.h:64
FLOATUNION Weight
Definition: cluster.h:83
DISTRIBUTION * Distrib
Definition: cluster.h:77
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:563
unsigned Style
Definition: cluster.h:74
unsigned Significant
Definition: cluster.h:68
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:297
FLOATUNION Magnitude
Definition: cluster.h:82
#define first_node(l)
Definition: oldlist.h:141
#define NIL_LIST
Definition: oldlist.h:127
CLUSTER * Cluster
Definition: cluster.h:76
#define iterate(l)
Definition: oldlist.h:161
unsigned NumSamples
Definition: cluster.h:75
float LogMagnitude
Definition: cluster.h:80
FLOATUNION Variance
Definition: cluster.h:81

◆ SetUpForClustering()

CLUSTERER* SetUpForClustering ( const FEATURE_DEFS_STRUCT FeatureDefs,
LABELEDLIST  char_sample,
const char *  program_feature_type 
)

This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.

Parameters
char_sampleLABELEDLIST that holds all the feature information for a
FeatureDefs
program_feature_typegiven character.
Returns
Pointer to new clusterer data structure.
Note
Globals: None

Definition at line 507 of file commontraining.cpp.

509  {
510  uint16_t N;
511  int i, j;
512  float* Sample = nullptr;
513  CLUSTERER *Clusterer;
514  int32_t CharID;
515  LIST FeatureList = nullptr;
516  FEATURE_SET FeatureSet = nullptr;
517 
518  int32_t desc_index =
519  ShortNameToFeatureType(FeatureDefs, program_feature_type);
520  N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
521  Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);
522 
523  FeatureList = char_sample->List;
524  CharID = 0;
525  iterate(FeatureList) {
526  FeatureSet = (FEATURE_SET) first_node(FeatureList);
527  for (i = 0; i < FeatureSet->MaxNumFeatures; i++) {
528  if (Sample == nullptr) Sample = (float*)Emalloc(N * sizeof(float));
529  for (j = 0; j < N; j++)
530  Sample[j] = FeatureSet->Features[i]->Params[j];
531  MakeSample (Clusterer, Sample, CharID);
532  }
533  CharID++;
534  }
535  free(Sample);
536  return Clusterer;
537 
538 } /* SetUpForClustering */
void * Emalloc(int Size)
Definition: emalloc.cpp:31
SAMPLE * MakeSample(CLUSTERER *Clusterer, const float *Feature, int32_t CharID)
Definition: cluster.cpp:452
float Params[1]
Definition: ocrfeatures.h:62
const PARAM_DESC * ParamDesc
Definition: ocrfeatures.h:56
uint16_t MaxNumFeatures
Definition: ocrfeatures.h:68
FEATURE Features[1]
Definition: ocrfeatures.h:69
FEATURE_SET_STRUCT * FEATURE_SET
Definition: ocrfeatures.h:71
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161
const FEATURE_DESC_STRUCT * FeatureDesc[NUM_FEATURE_TYPES]
Definition: featdefs.h:48
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
Definition: featdefs.cpp:270
CLUSTERER * MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[])
Definition: cluster.cpp:399

◆ SetUpForFloat2Int()

CLASS_STRUCT* SetUpForFloat2Int ( const UNICHARSET unicharset,
LIST  LabeledClassList 
)

Definition at line 740 of file commontraining.cpp.

741  {
742  MERGE_CLASS MergeClass;
743  CLASS_TYPE Class;
744  int NumProtos;
745  int NumConfigs;
746  int NumWords;
747  int i, j;
748  float Values[3];
749  PROTO NewProto;
750  PROTO OldProto;
751  BIT_VECTOR NewConfig;
752  BIT_VECTOR OldConfig;
753 
754  // printf("Float2Int ...\n");
755 
756  CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()];
757  iterate(LabeledClassList)
758  {
759  UnicityTableEqEq<int> font_set;
760  MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
761  Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)];
762  NumProtos = MergeClass->Class->NumProtos;
763  NumConfigs = MergeClass->Class->NumConfigs;
764  font_set.move(&MergeClass->Class->font_set);
765  Class->NumProtos = NumProtos;
766  Class->MaxNumProtos = NumProtos;
767  Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
768  for(i=0; i < NumProtos; i++)
769  {
770  NewProto = ProtoIn(Class, i);
771  OldProto = ProtoIn(MergeClass->Class, i);
772  Values[0] = OldProto->X;
773  Values[1] = OldProto->Y;
774  Values[2] = OldProto->Angle;
775  Normalize(Values);
776  NewProto->X = OldProto->X;
777  NewProto->Y = OldProto->Y;
778  NewProto->Length = OldProto->Length;
779  NewProto->Angle = OldProto->Angle;
780  NewProto->A = Values[0];
781  NewProto->B = Values[1];
782  NewProto->C = Values[2];
783  }
784 
785  Class->NumConfigs = NumConfigs;
786  Class->MaxNumConfigs = NumConfigs;
787  Class->font_set.move(&font_set);
788  Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
789  NumWords = WordsInVectorOfSize(NumProtos);
790  for(i=0; i < NumConfigs; i++)
791  {
792  NewConfig = NewBitVector(NumProtos);
793  OldConfig = MergeClass->Class->Configurations[i];
794  for(j=0; j < NumWords; j++)
795  NewConfig[j] = OldConfig[j];
796  Class->Configurations[i] = NewConfig;
797  }
798  }
799  return float_classes;
800 } // SetUpForFloat2Int
int16_t NumProtos
Definition: protos.h:61
void move(UnicityTable< T > *from)
float X
Definition: protos.h:46
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
float B
Definition: protos.h:44
PROTO_STRUCT * PROTO
Definition: protos.h:51
void * Emalloc(int Size)
Definition: emalloc.cpp:31
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
PROTO Prototypes
Definition: protos.h:63
int16_t MaxNumProtos
Definition: protos.h:62
UnicityTableEqEq< int > font_set
Definition: protos.h:67
#define ProtoIn(Class, Pid)
Definition: protos.h:121
int16_t NumConfigs
Definition: protos.h:64
int size() const
Definition: unicharset.h:336
float Y
Definition: protos.h:47
float Length
Definition: protos.h:49
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:82
int16_t MaxNumConfigs
Definition: protos.h:65
float C
Definition: protos.h:45
#define first_node(l)
Definition: oldlist.h:141
CLASS_TYPE Class
float Angle
Definition: protos.h:48
#define iterate(l)
Definition: oldlist.h:161
MERGE_CLASS_NODE * MERGE_CLASS
CONFIGS Configurations
Definition: protos.h:66
void Normalize(float *Values)
float A
Definition: protos.h:43

◆ WriteTrainingSamples()

void WriteTrainingSamples ( const FEATURE_DEFS_STRUCT FeatureDefs,
char *  Directory,
LIST  CharList,
const char *  program_feature_type 
)

Variable Documentation

◆ Config

CLUSTERCONFIG Config

Definition at line 95 of file commontraining.cpp.

◆ feature_defs

FEATURE_DEFS_STRUCT feature_defs

Definition at line 96 of file commontraining.cpp.