tesseract
5.0.0-alpha-619-ge9db
|
#include "commontraining.h"
#include <algorithm>
#include <cmath>
#include "allheaders.h"
#include "ccutil.h"
#include "classify.h"
#include "cluster.h"
#include "clusttool.h"
#include "emalloc.h"
#include "featdefs.h"
#include "fontinfo.h"
#include "intfeaturespace.h"
#include "mastertrainer.h"
#include "mf.h"
#include "oldlist.h"
#include "params.h"
#include "shapetable.h"
#include "tessdatamanager.h"
#include "tessopt.h"
#include "tprintf.h"
#include "unicity_table.h"
Go to the source code of this file.
Namespaces | |
tesseract | |
Macros | |
#define | _USE_MATH_DEFINES |
Functions | |
INT_PARAM_FLAG (debug_level, 0, "Level of Trainer debugging") | |
STRING_PARAM_FLAG (D, "", "Directory to write output files to") | |
STRING_PARAM_FLAG (F, "font_properties", "File listing font properties") | |
STRING_PARAM_FLAG (X, "", "File listing font xheights") | |
STRING_PARAM_FLAG (U, "unicharset", "File to load unicharset from") | |
STRING_PARAM_FLAG (O, "", "File to write unicharset to") | |
STRING_PARAM_FLAG (output_trainer, "", "File to write trainer to") | |
STRING_PARAM_FLAG (test_ch, "", "UTF8 test character string") | |
void | ParseArguments (int *argc, char ***argv) |
ShapeTable * | tesseract::LoadShapeTable (const STRING &file_prefix) |
void | tesseract::WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table) |
MasterTrainer * | tesseract::LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix) |
const char * | GetNextFilename (int argc, const char *const *argv) |
LABELEDLIST | FindList (LIST List, char *Label) |
LABELEDLIST | NewLabeledList (const char *Label) |
void | ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_definitions, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples) |
void | FreeTrainingSamples (LIST CharList) |
void | FreeLabeledList (LABELEDLIST LabeledList) |
CLUSTERER * | SetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST char_sample, const char *program_feature_type) |
void | MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *clusterconfig) |
void | CleanUpUnusedData (LIST ProtoList) |
LIST | RemoveInsignificantProtos (LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N) |
MERGE_CLASS | FindClass (LIST List, const char *Label) |
MERGE_CLASS | NewLabeledClass (const char *Label) |
void | FreeLabeledClassList (LIST ClassList) |
CLASS_STRUCT * | SetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList) |
void | Normalize (float *Values) |
void | FreeNormProtoList (LIST CharList) |
void | AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, char *CharName) |
int | NumberOfProtos (LIST ProtoList, bool CountSigProtos, bool CountInsigProtos) |
Variables | |
CLUSTERCONFIG | Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 } |
FEATURE_DEFS_STRUCT | feature_defs |
#define _USE_MATH_DEFINES |
Definition at line 14 of file commontraining.cpp.
Definition at line 821 of file commontraining.cpp.
void CleanUpUnusedData | ( | LIST | ProtoList | ) |
Definition at line 595 of file commontraining.cpp.
MERGE_CLASS FindClass | ( | LIST | List, |
const char * | Label | ||
) |
Definition at line 678 of file commontraining.cpp.
LABELEDLIST FindList | ( | LIST | List, |
char * | Label | ||
) |
This routine searches through a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, nullptr is returned.
List | list to search |
Label | label to search for |
Definition at line 340 of file commontraining.cpp.
void FreeLabeledClassList | ( | LIST | ClassList | ) |
This routine deallocates all of the space allocated to the specified list of training samples.
ClassList | list of all fonts in document |
Definition at line 709 of file commontraining.cpp.
void FreeLabeledList | ( | LABELEDLIST | LabeledList | ) |
This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.
LabeledList | labeled list to be freed |
Definition at line 476 of file commontraining.cpp.
void FreeNormProtoList | ( | LIST | CharList | ) |
Definition at line 805 of file commontraining.cpp.
void FreeTrainingSamples | ( | LIST | CharList | ) |
This routine deallocates all of the space allocated to the specified list of training samples.
CharList | list of all fonts in document |
Definition at line 450 of file commontraining.cpp.
const char* GetNextFilename | ( | int | argc, |
const char *const * | argv | ||
) |
This routine returns the next command line argument. If there are no remaining command line arguments, it returns nullptr. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.
Globals:
Definition at line 323 of file commontraining.cpp.
INT_PARAM_FLAG | ( | debug_level | , |
0 | , | ||
"Level of Trainer debugging" | |||
) |
void MergeInsignificantProtos | ( | LIST | ProtoList, |
const char * | label, | ||
CLUSTERER * | Clusterer, | ||
CLUSTERCONFIG * | clusterconfig | ||
) |
Definition at line 528 of file commontraining.cpp.
MERGE_CLASS NewLabeledClass | ( | const char * | Label | ) |
Definition at line 692 of file commontraining.cpp.
LABELEDLIST NewLabeledList | ( | const char * | Label | ) |
This routine allocates a new, empty labeled list and gives it the specified label.
Label | label for new list |
Definition at line 361 of file commontraining.cpp.
void Normalize | ( | float * | Values | ) |
Definition at line 788 of file commontraining.cpp.
int NumberOfProtos | ( | LIST | ProtoList, |
bool | CountSigProtos, | ||
bool | CountInsigProtos | ||
) |
Definition at line 839 of file commontraining.cpp.
void ParseArguments | ( | int * | argc, |
char *** | argv | ||
) |
This routine parses the command line arguments that were passed to the program and uses them to set relevant training-related global parameters.
Globals:
argc | number of command line arguments to parse |
argv | command line arguments |
Definition at line 122 of file commontraining.cpp.
void ReadTrainingSamples | ( | const FEATURE_DEFS_STRUCT & | feature_definitions, |
const char * | feature_name, | ||
int | max_samples, | ||
UNICHARSET * | unicharset, | ||
FILE * | file, | ||
LIST * | training_samples | ||
) |
This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.
file | open text file to read samples from |
feature_definitions | |
feature_name | |
max_samples | |
unicharset | |
training_samples |
Definition at line 389 of file commontraining.cpp.
Definition at line 613 of file commontraining.cpp.
CLUSTERER* SetUpForClustering | ( | const FEATURE_DEFS_STRUCT & | FeatureDefs, |
LABELEDLIST | char_sample, | ||
const char * | program_feature_type | ||
) |
This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.
char_sample | LABELEDLIST that holds all the feature information for a |
FeatureDefs | |
program_feature_type | given character. |
Definition at line 494 of file commontraining.cpp.
CLASS_STRUCT* SetUpForFloat2Int | ( | const UNICHARSET & | unicharset, |
LIST | LabeledClassList | ||
) |
Definition at line 725 of file commontraining.cpp.
STRING_PARAM_FLAG | ( | D | , |
"" | , | ||
"Directory to write output files to" | |||
) |
STRING_PARAM_FLAG | ( | F | , |
"font_properties" | , | ||
"File listing font properties" | |||
) |
STRING_PARAM_FLAG | ( | O | , |
"" | , | ||
"File to write unicharset to" | |||
) |
STRING_PARAM_FLAG | ( | output_trainer | , |
"" | , | ||
"File to write trainer to" | |||
) |
STRING_PARAM_FLAG | ( | U | , |
"unicharset" | , | ||
"File to load unicharset from" | |||
) |
STRING_PARAM_FLAG | ( | X | , |
"" | , | ||
"File listing font xheights" | |||
) |
CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 } |
Definition at line 88 of file commontraining.cpp.
FEATURE_DEFS_STRUCT feature_defs |
Definition at line 89 of file commontraining.cpp.