#include "commontraining.h"
#include "allheaders.h"
#include "ccutil.h"
#include "classify.h"
#include "cluster.h"
#include "clusttool.h"
#include "efio.h"
#include "emalloc.h"
#include "featdefs.h"
#include "fontinfo.h"
#include "freelist.h"
#include "globals.h"
#include "intfeaturespace.h"
#include "mastertrainer.h"
#include "mf.h"
#include "ndminx.h"
#include "oldlist.h"
#include "params.h"
#include "shapetable.h"
#include "tessdatamanager.h"
#include "tessopt.h"
#include "tprintf.h"
#include "unicity_table.h"
#include <math.h>
Go to the source code of this file.
Namespaces | |
tesseract | |
Functions | |
INT_PARAM_FLAG (debug_level, 0,"Level of Trainer debugging") | |
INT_PARAM_FLAG (load_images, 0,"Load images with tr files") | |
STRING_PARAM_FLAG (configfile,"","File to load more configs from") | |
STRING_PARAM_FLAG (D,"","Directory to write output files to") | |
STRING_PARAM_FLAG (F,"font_properties","File listing font properties") | |
STRING_PARAM_FLAG (X,"","File listing font xheights") | |
STRING_PARAM_FLAG (U,"unicharset","File to load unicharset from") | |
STRING_PARAM_FLAG (O,"","File to write unicharset to") | |
STRING_PARAM_FLAG (T,"","File to load trainer from") | |
STRING_PARAM_FLAG (output_trainer,"","File to write trainer to") | |
STRING_PARAM_FLAG (test_ch,"","UTF8 test character string") | |
DOUBLE_PARAM_FLAG (clusterconfig_min_samples_fraction, Config.MinSamples,"Min number of samples per proto as % of total") | |
DOUBLE_PARAM_FLAG (clusterconfig_max_illegal, Config.MaxIllegal,"Max percentage of samples in a cluster which have more"" than 1 feature in that cluster") | |
DOUBLE_PARAM_FLAG (clusterconfig_independence, Config.Independence,"Desired independence between dimensions") | |
DOUBLE_PARAM_FLAG (clusterconfig_confidence, Config.Confidence,"Desired confidence in prototypes created") | |
void | ParseArguments (int *argc, char ***argv) |
ShapeTable * | tesseract::LoadShapeTable (const STRING &file_prefix) |
void | tesseract::WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table) |
MasterTrainer * | tesseract::LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix) |
const char * | GetNextFilename (int argc, const char *const *argv) |
LABELEDLIST | FindList (LIST List, char *Label) |
LABELEDLIST | NewLabeledList (const char *Label) |
void | ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples) |
void | FreeTrainingSamples (LIST CharList) |
void | FreeLabeledList (LABELEDLIST LabeledList) |
CLUSTERER * | SetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST char_sample, const char *program_feature_type) |
void | MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config) |
void | CleanUpUnusedData (LIST ProtoList) |
LIST | RemoveInsignificantProtos (LIST ProtoList, BOOL8 KeepSigProtos, BOOL8 KeepInsigProtos, int N) |
MERGE_CLASS | FindClass (LIST List, const char *Label) |
MERGE_CLASS | NewLabeledClass (const char *Label) |
void | FreeLabeledClassList (LIST ClassList) |
CLASS_STRUCT * | SetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList) |
void | Normalize (float *Values) |
void | FreeNormProtoList (LIST CharList) |
void | AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, char *CharName) |
int | NumberOfProtos (LIST ProtoList, BOOL8 CountSigProtos, BOOL8 CountInsigProtos) |
Variables | |
CLUSTERCONFIG | Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 } |
FEATURE_DEFS_STRUCT | feature_defs |
CCUtil | ccutil |
Definition at line 854 of file commontraining.cpp.
void CleanUpUnusedData | ( | LIST | ProtoList | ) |
Definition at line 606 of file commontraining.cpp.
DOUBLE_PARAM_FLAG | ( | clusterconfig_min_samples_fraction | , |
Config. | MinSamples, | ||
"Min number of samples per proto as % of total" | |||
) |
DOUBLE_PARAM_FLAG | ( | clusterconfig_max_illegal | , |
Config. | MaxIllegal, | ||
"Max percentage of samples in a cluster which have more"" than 1 feature in that cluster" | |||
) |
DOUBLE_PARAM_FLAG | ( | clusterconfig_independence | , |
Config. | Independence, | ||
"Desired independence between dimensions" | |||
) |
DOUBLE_PARAM_FLAG | ( | clusterconfig_confidence | , |
Config. | Confidence, | ||
"Desired confidence in prototypes created" | |||
) |
MERGE_CLASS FindClass | ( | LIST | List, |
const char * | Label | ||
) |
Definition at line 701 of file commontraining.cpp.
LABELEDLIST FindList | ( | LIST | List, |
char * | Label | ||
) |
This routine searches thru a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, NULL is returned.
List | list to search |
Label | label to search for |
Definition at line 331 of file commontraining.cpp.
void FreeLabeledClassList | ( | LIST | ClassList | ) |
This routine deallocates all of the space allocated to the specified list of training samples.
ClassList | list of all fonts in document |
Definition at line 741 of file commontraining.cpp.
void FreeLabeledList | ( | LABELEDLIST | LabeledList | ) |
This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.
LabeledList | labeled list to be freed |
Definition at line 487 of file commontraining.cpp.
void FreeNormProtoList | ( | LIST | CharList | ) |
Definition at line 838 of file commontraining.cpp.
void FreeTrainingSamples | ( | LIST | CharList | ) |
This routine deallocates all of the space allocated to the specified list of training samples.
CharList | list of all fonts in document |
Definition at line 458 of file commontraining.cpp.
const char* GetNextFilename | ( | int | argc, |
const char *const * | argv | ||
) |
This routine returns the next command line argument. If there are no remaining command line arguments, it returns NULL. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.
Globals:
Definition at line 310 of file commontraining.cpp.
INT_PARAM_FLAG | ( | debug_level | , |
0 | , | ||
"Level of Trainer debugging" | |||
) |
INT_PARAM_FLAG | ( | load_images | , |
0 | , | ||
"Load images with tr files" | |||
) |
void MergeInsignificantProtos | ( | LIST | ProtoList, |
const char * | label, | ||
CLUSTERER * | Clusterer, | ||
CLUSTERCONFIG * | Config | ||
) |
Definition at line 541 of file commontraining.cpp.
MERGE_CLASS NewLabeledClass | ( | const char * | Label | ) |
Definition at line 718 of file commontraining.cpp.
LABELEDLIST NewLabeledList | ( | const char * | Label | ) |
This routine allocates a new, empty labeled list and gives it the specified label.
Label | label for new list |
Definition at line 357 of file commontraining.cpp.
void Normalize | ( | float * | Values | ) |
Definition at line 821 of file commontraining.cpp.
Definition at line 872 of file commontraining.cpp.
void ParseArguments | ( | int * | argc, |
char *** | argv | ||
) |
This routine parses the command line arguments that were passed to the program and ses them to set relevant training-related global parameters
Globals:
argc | number of command line arguments to parse |
argv | command line arguments |
Definition at line 88 of file commontraining.cpp.
void ReadTrainingSamples | ( | const FEATURE_DEFS_STRUCT & | feature_defs, |
const char * | feature_name, | ||
int | max_samples, | ||
UNICHARSET * | unicharset, | ||
FILE * | file, | ||
LIST * | training_samples | ||
) |
This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.
file | open text file to read samples from |
feature_defs | |
feature_name | |
max_samples | |
unicharset | |
training_samples |
Definition at line 394 of file commontraining.cpp.
LIST RemoveInsignificantProtos | ( | LIST | ProtoList, |
BOOL8 | KeepSigProtos, | ||
BOOL8 | KeepInsigProtos, | ||
int | N | ||
) |
Definition at line 633 of file commontraining.cpp.
CLUSTERER* SetUpForClustering | ( | const FEATURE_DEFS_STRUCT & | FeatureDefs, |
LABELEDLIST | char_sample, | ||
const char * | program_feature_type | ||
) |
This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.
char_sample | LABELEDLIST that holds all the feature information for a |
FeatureDefs | |
program_feature_type | given character. |
Definition at line 507 of file commontraining.cpp.
CLASS_STRUCT* SetUpForFloat2Int | ( | const UNICHARSET & | unicharset, |
LIST | LabeledClassList | ||
) |
Definition at line 758 of file commontraining.cpp.
STRING_PARAM_FLAG | ( | configfile | , |
"" | , | ||
"File to load more configs from" | |||
) |
STRING_PARAM_FLAG | ( | D | , |
"" | , | ||
"Directory to write output files to" | |||
) |
STRING_PARAM_FLAG | ( | F | , |
"font_properties" | , | ||
"File listing font properties" | |||
) |
STRING_PARAM_FLAG | ( | X | , |
"" | , | ||
"File listing font xheights" | |||
) |
STRING_PARAM_FLAG | ( | U | , |
"unicharset" | , | ||
"File to load unicharset from" | |||
) |
STRING_PARAM_FLAG | ( | O | , |
"" | , | ||
"File to write unicharset to" | |||
) |
STRING_PARAM_FLAG | ( | T | , |
"" | , | ||
"File to load trainer from" | |||
) |
STRING_PARAM_FLAG | ( | output_trainer | , |
"" | , | ||
"File to write trainer to" | |||
) |
STRING_PARAM_FLAG | ( | test_ch | , |
"" | , | ||
"UTF8 test character string" | |||
) |
CCUtil ccutil |
Definition at line 53 of file commontraining.cpp.
CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 } |
Definition at line 51 of file commontraining.cpp.
FEATURE_DEFS_STRUCT feature_defs |
Definition at line 52 of file commontraining.cpp.