tesseract
5.0.0-alpha-619-ge9db
|
Go to the documentation of this file.
16 #ifndef TESSERACT_TRAINING_TRAININGSAMPLESET_H_
17 #define TESSERACT_TRAINING_TRAININGSAMPLESET_H_
33 class IntFeatureSpace;
35 struct UnicharAndFonts;
56 return samples_.size();
59 return num_raw_samples_;
68 return unicharset_size_;
71 return fontinfo_table_;
125 int font_id2,
int class_id2,
130 int font_id2,
int class_id2,
144 int font_id2,
int class_id2,
146 bool thorough)
const;
162 return samples_[index];
167 samples_[index] =
nullptr;
225 struct FontClassDistance {
231 struct FontClassInfo {
243 int32_t canonical_sample;
245 float canonical_dist;
253 BitVector cloud_features;
266 PointerVector<TrainingSample> samples_;
268 int num_raw_samples_;
272 int unicharset_size_;
276 IndexMapBiDi font_id_map_;
283 const FontInfoTable& fontinfo_table_;
289 #endif // TRAININGSAMPLESETSET_H_
TrainingSample * extract_sample(int index)
void AddAllFontsForClass(int class_id, Shape *shape) const
float ClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map)
const GenericVector< int > & GetCanonicalFeatures(int font_id, int class_id) const
TrainingSample * MutableSample(int font_id, int class_id, int index)
TrainingSample * mutable_sample(int index)
float GetCanonicalDist(int font_id, int class_id) const
void LoadUnicharset(const char *filename)
int NumClassSamples(int font_id, int class_id, bool randomize) const
bool Serialize(FILE *fp) const
int GlobalSampleIndex(int font_id, int class_id, int index) const
const BitVector & GetCloudFeatures(int font_id, int class_id) const
TrainingSampleSet(const FontInfoTable &fontinfo_table)
const FontInfoTable & fontinfo_table() const
void ComputeCanonicalFeatures()
float UnicharDistance(const UnicharAndFonts &uf1, const UnicharAndFonts &uf2, bool matched_fonts, const IntFeatureMap &feature_map)
const TrainingSample * GetSample(int index) const
int num_raw_samples() const
void OrganizeByFontAndClass()
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
void ComputeCloudFeatures(int feature_space_size)
bool DeleteableSample(const TrainingSample *sample)
STRING SampleToString(const TrainingSample &sample) const
int AddSample(const char *unichar, TrainingSample *sample)
int SparseSize() const override
void IndexFeatures(const IntFeatureSpace &feature_space)
void DisplaySamplesWithFeature(int f_index, const Shape &shape, const IntFeatureSpace &feature_space, ScrollView::Color color, ScrollView *window) const
void KillSample(TrainingSample *sample)
void ReplicateAndRandomizeSamples()
const UNICHARSET & unicharset() const
bool DeSerialize(bool swap, FILE *fp)
int ReliablySeparable(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map, bool thorough) const
float ComputeClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map) const
const TrainingSample * GetCanonicalSample(int font_id, int class_id) const
void ComputeCanonicalSamples(const IntFeatureMap &map, bool debug)