tesseract
5.0.0-alpha-619-ge9db
|
Go to the source code of this file.
|
void | ParseArguments (int *argc, char ***argv) |
|
ShapeTable * | tesseract::LoadShapeTable (const STRING &file_prefix) |
|
void | tesseract::WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table) |
|
MasterTrainer * | tesseract::LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix) |
|
const char * | GetNextFilename (int argc, const char *const *argv) |
|
LABELEDLIST | FindList (LIST List, char *Label) |
|
LABELEDLIST | NewLabeledList (const char *Label) |
|
void | ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples) |
|
void | WriteTrainingSamples (const FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, LIST CharList, const char *program_feature_type) |
|
void | FreeTrainingSamples (LIST CharList) |
|
void | FreeLabeledList (LABELEDLIST LabeledList) |
|
void | FreeLabeledClassList (LIST ClassListList) |
|
CLUSTERER * | SetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type) |
|
LIST | RemoveInsignificantProtos (LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N) |
|
void | CleanUpUnusedData (LIST ProtoList) |
|
void | MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config) |
|
MERGE_CLASS | FindClass (LIST List, const char *Label) |
|
MERGE_CLASS | NewLabeledClass (const char *Label) |
|
CLASS_STRUCT * | SetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList) |
|
void | Normalize (float *Values) |
|
void | FreeNormProtoList (LIST CharList) |
|
void | AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, char *CharName) |
|
int | NumberOfProtos (LIST ProtoList, bool CountSigProtos, bool CountInsigProtos) |
|
void | allocNormProtos () |
|
◆ LABELEDLIST
◆ MERGE_CLASS
◆ AddToNormProtosList()
void AddToNormProtosList |
( |
LIST * |
NormProtoList, |
|
|
LIST |
ProtoList, |
|
|
char * |
CharName |
|
) |
| |
◆ allocNormProtos()
◆ CleanUpUnusedData()
void CleanUpUnusedData |
( |
LIST |
ProtoList | ) |
|
◆ FindClass()
◆ FindList()
This routine searches through a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, nullptr is returned.
- Parameters
-
List | list to search |
Label | label to search for |
- Returns
- Labeled list with the specified label or nullptr.
- Note
- Globals: none
Definition at line 340 of file commontraining.cpp.
346 if (strcmp (LabeledList->
Label, Label) == 0)
347 return (LabeledList);
◆ FreeLabeledClassList()
void FreeLabeledClassList |
( |
LIST |
ClassList | ) |
|
This routine deallocates all of the space allocated to the specified list of training samples.
- Parameters
-
ClassList | list of all fonts in document |
Definition at line 709 of file commontraining.cpp.
712 LIST nodes = ClassList;
716 free (MergeClass->
Label);
◆ FreeLabeledList()
This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.
- Parameters
-
LabeledList | labeled list to be freed |
- Note
- Globals: none
Definition at line 476 of file commontraining.cpp.
478 free(LabeledList->
Label);
◆ FreeNormProtoList()
void FreeNormProtoList |
( |
LIST |
CharList | ) |
|
◆ FreeTrainingSamples()
void FreeTrainingSamples |
( |
LIST |
CharList | ) |
|
This routine deallocates all of the space allocated to the specified list of training samples.
- Parameters
-
CharList | list of all fonts in document |
Definition at line 450 of file commontraining.cpp.
455 LIST nodes = CharList;
458 FeatureList = char_sample->
List;
◆ GetNextFilename()
const char* GetNextFilename |
( |
int |
argc, |
|
|
const char *const * |
argv |
|
) |
| |
This routine returns the next command line argument. If there are no remaining command line arguments, it returns nullptr. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.
Globals:
- tessoptind defined by tessopt sys call
- Returns
- Next command line argument or nullptr.
Definition at line 323 of file commontraining.cpp.
◆ MergeInsignificantProtos()
Definition at line 528 of file commontraining.cpp.
532 bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
534 LIST pProtoList = ProtoList;
539 float best_dist = 0.125;
542 LIST list_it = ProtoList;
545 if (test_p != Prototype && !test_p->
Merged) {
549 if (dist < best_dist) {
555 if (best_match !=
nullptr && !best_match->
Significant) {
557 tprintf(
"Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
559 best_match->
Mean[0], best_match->
Mean[1],
560 Prototype->
Mean[0], Prototype->
Mean[1]);
569 }
else if (best_match !=
nullptr) {
571 tprintf(
"Red proto at %g,%g matched a green one at %g,%g\n",
572 Prototype->
Mean[0], Prototype->
Mean[1],
573 best_match->
Mean[0], best_match->
Mean[1]);
579 static_cast<int32_t>(clusterconfig->MinSamples * Clusterer->
NumChar);
580 pProtoList = ProtoList;
587 tprintf(
"Red proto at %g,%g becoming green\n",
588 Prototype->
Mean[0], Prototype->
Mean[1]);
◆ NewLabeledClass()
◆ NewLabeledList()
This routine allocates a new, empty labeled list and gives it the specified label.
- Parameters
-
- Returns
- New, empty labeled list.
- Note
- Globals: none
Definition at line 361 of file commontraining.cpp.
365 LabeledList->
Label = static_cast<char*>(
Emalloc (strlen (Label)+1));
366 strcpy (LabeledList->
Label, Label);
370 return (LabeledList);
◆ Normalize()
void Normalize |
( |
float * |
Values | ) |
|
Definition at line 788 of file commontraining.cpp.
795 Slope = tan(Values [2] * 2 * M_PI);
796 Intercept = Values [1] - Slope * Values [0];
797 Normalizer = 1 / sqrt (Slope * Slope + 1.0);
799 Values [0] = Slope * Normalizer;
800 Values [1] = - Normalizer;
801 Values [2] = Intercept * Normalizer;
◆ NumberOfProtos()
int NumberOfProtos |
( |
LIST |
ProtoList, |
|
|
bool |
CountSigProtos, |
|
|
bool |
CountInsigProtos |
|
) |
| |
◆ ParseArguments()
void ParseArguments |
( |
int * |
argc, |
|
|
char *** |
argv |
|
) |
| |
This routine parses the command line arguments that were passed to the program and uses them to set relevant training-related global parameters.
Globals:
- Config current clustering parameters
- Parameters
-
argc | number of command line arguments to parse |
argv | command line arguments |
Definition at line 122 of file commontraining.cpp.
126 usage +=
" -v | --version | ";
129 usage +=
" [.tr files ...]";
136 std::max(0.0, std::min(1.0,
double(FLAGS_clusterconfig_min_samples_fraction)));
138 std::max(0.0, std::min(1.0,
double(FLAGS_clusterconfig_max_illegal)));
140 std::max(0.0, std::min(1.0,
double(FLAGS_clusterconfig_independence)));
142 std::max(0.0, std::min(1.0,
double(FLAGS_clusterconfig_confidence)));
144 if (!FLAGS_configfile.empty()) {
146 FLAGS_configfile.c_str(),
◆ ReadTrainingSamples()
void ReadTrainingSamples |
( |
const FEATURE_DEFS_STRUCT & |
feature_definitions, |
|
|
const char * |
feature_name, |
|
|
int |
max_samples, |
|
|
UNICHARSET * |
unicharset, |
|
|
FILE * |
file, |
|
|
LIST * |
training_samples |
|
) |
| |
This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.
- Parameters
-
file | open text file to read samples from |
feature_definitions | |
feature_name | |
max_samples | |
unicharset | |
training_samples | |
Definition at line 389 of file commontraining.cpp.
398 uint32_t feature_type =
402 LIST it = *training_samples;
404 char_sample = reinterpret_cast<LABELEDLIST>(
first_node(it));
408 while (fgets(buffer, 2048,
file) !=
nullptr) {
409 if (buffer[0] ==
'\n')
412 sscanf(buffer,
"%*s %s", unichar);
416 tprintf(
"Error: Size of unicharset in training is "
417 "greater than MAX_NUM_CLASSES\n");
421 char_sample =
FindList(*training_samples, unichar);
422 if (char_sample ==
nullptr) {
424 *training_samples =
push(*training_samples, char_sample);
427 feature_samples = char_desc->
FeatureSets[feature_type];
429 char_sample->
List =
push(char_sample->
List, feature_samples);
436 if (feature_type != i)
◆ RemoveInsignificantProtos()
LIST RemoveInsignificantProtos |
( |
LIST |
ProtoList, |
|
|
bool |
KeepSigProtos, |
|
|
bool |
KeepInsigProtos, |
|
|
int |
N |
|
) |
| |
Definition at line 613 of file commontraining.cpp.
626 pProtoList = ProtoList;
635 NewProto->
Mean = static_cast<float *>(
Emalloc(N *
sizeof(
float)));
642 for (i=0; i < N; i++)
646 for (i=0; i < N; i++)
654 for (i=0; i < N; i++)
662 for (i=0; i < N; i++)
670 NewProtoList =
push_last(NewProtoList, NewProto);
674 return (NewProtoList);
◆ SetUpForClustering()
This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.
- Parameters
-
char_sample | LABELEDLIST that holds all the feature information for a |
FeatureDefs | |
program_feature_type | given character. |
- Returns
- Pointer to new clusterer data structure.
- Note
- Globals: None
Definition at line 494 of file commontraining.cpp.
499 float* Sample =
nullptr;
502 LIST FeatureList =
nullptr;
510 FeatureList = char_sample->
List;
515 if (Sample ==
nullptr) Sample = static_cast<float*>(
Emalloc(N *
sizeof(
float)));
516 for (j = 0; j < N; j++)
◆ SetUpForFloat2Int()
Definition at line 725 of file commontraining.cpp.
753 for(i=0; i < NumProtos; i++)
757 Values[0] = OldProto->
X;
758 Values[1] = OldProto->
Y;
759 Values[2] = OldProto->
Angle;
761 NewProto->
X = OldProto->
X;
762 NewProto->
Y = OldProto->
Y;
765 NewProto->
A = Values[0];
766 NewProto->
B = Values[1];
767 NewProto->
C = Values[2];
774 NumWords = WordsInVectorOfSize(NumProtos);
775 for(i=0; i < NumConfigs; i++)
777 NewConfig = NewBitVector(NumProtos);
779 for(j=0; j < NumWords; j++)
780 NewConfig[j] = OldConfig[j];
784 return float_classes;
◆ WriteTrainingSamples()
void WriteTrainingSamples |
( |
const FEATURE_DEFS_STRUCT & |
FeatureDefs, |
|
|
char * |
Directory, |
|
|
LIST |
CharList, |
|
|
const char * |
program_feature_type |
|
) |
| |
◆ Config
◆ feature_defs
LABELEDLIST FindList(LIST List, char *Label)
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
void Normalize(float *Values)
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
void FreeProtoList(LIST *ProtoList)
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[])
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
LABELEDLIST NewLabeledList(const char *Label)
#define ProtoIn(Class, Pid)
const FEATURE_DESC_STRUCT * FeatureDesc[NUM_FEATURE_TYPES]
CLASS_TYPE NewClass(int NumProtos, int NumConfigs)
const char * c_str() const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[])
UnicityTableEqEq< int > font_set
CLUSTERER * MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[])
LIST push(LIST list, void *element)
const PARAM_DESC * ParamDesc
bool contains_unichar(const char *const unichar_repr) const
void FreeFeatureSet(FEATURE_SET FeatureSet)
void FreeClass(CLASS_TYPE Class)
DLLSYM void tprintf(const char *format,...)
void FreeLabeledList(LABELEDLIST LabeledList)
LIST push_last(LIST list, void *item)
SAMPLE * MakeSample(CLUSTERER *Clusterer, const float *Feature, int32_t CharID)
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)
void move(UnicityTable< T > *from)