tesseract
5.0.0-alpha-619-ge9db
|
#include <trainingsampleset.h>
|
| TrainingSampleSet (const FontInfoTable &fontinfo_table) |
|
| ~TrainingSampleSet () |
|
bool | Serialize (FILE *fp) const |
|
bool | DeSerialize (bool swap, FILE *fp) |
|
int | num_samples () const |
|
int | num_raw_samples () const |
|
int | NumFonts () const |
|
const UNICHARSET & | unicharset () const |
|
int | charsetsize () const |
|
const FontInfoTable & | fontinfo_table () const |
|
void | LoadUnicharset (const char *filename) |
|
int | AddSample (const char *unichar, TrainingSample *sample) |
|
void | AddSample (int unichar_id, TrainingSample *sample) |
|
int | NumClassSamples (int font_id, int class_id, bool randomize) const |
|
const TrainingSample * | GetSample (int index) const |
|
const TrainingSample * | GetSample (int font_id, int class_id, int index) const |
|
TrainingSample * | MutableSample (int font_id, int class_id, int index) |
|
STRING | SampleToString (const TrainingSample &sample) const |
|
const BitVector & | GetCloudFeatures (int font_id, int class_id) const |
|
const GenericVector< int > & | GetCanonicalFeatures (int font_id, int class_id) const |
|
float | UnicharDistance (const UnicharAndFonts &uf1, const UnicharAndFonts &uf2, bool matched_fonts, const IntFeatureMap &feature_map) |
|
float | ClusterDistance (int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map) |
|
float | ComputeClusterDistance (int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map) const |
|
int | ReliablySeparable (int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map, bool thorough) const |
|
int | GlobalSampleIndex (int font_id, int class_id, int index) const |
|
const TrainingSample * | GetCanonicalSample (int font_id, int class_id) const |
|
float | GetCanonicalDist (int font_id, int class_id) const |
|
TrainingSample * | mutable_sample (int index) |
|
TrainingSample * | extract_sample (int index) |
|
void | IndexFeatures (const IntFeatureSpace &feature_space) |
|
void | KillSample (TrainingSample *sample) |
|
void | DeleteDeadSamples () |
|
bool | DeleteableSample (const TrainingSample *sample) |
|
void | OrganizeByFontAndClass () |
|
void | SetupFontIdMap () |
|
void | ComputeCanonicalSamples (const IntFeatureMap &map, bool debug) |
|
void | ReplicateAndRandomizeSamples () |
|
void | ComputeCanonicalFeatures () |
|
void | ComputeCloudFeatures (int feature_space_size) |
|
void | AddAllFontsForClass (int class_id, Shape *shape) const |
|
void | DisplaySamplesWithFeature (int f_index, const Shape &shape, const IntFeatureSpace &feature_space, ScrollView::Color color, ScrollView *window) const |
|
Definition at line 43 of file trainingsampleset.h.
◆ TrainingSampleSet()
tesseract::TrainingSampleSet::TrainingSampleSet |
( |
const FontInfoTable & |
fontinfo_table | ) |
|
|
explicit |
Definition at line 70 of file trainingsampleset.cpp.
71 : num_raw_samples_(0), unicharset_size_(0),
72 font_class_array_(
nullptr), fontinfo_table_(font_table) {
◆ ~TrainingSampleSet()
tesseract::TrainingSampleSet::~TrainingSampleSet |
( |
| ) |
|
◆ AddAllFontsForClass()
void tesseract::TrainingSampleSet::AddAllFontsForClass |
( |
int |
class_id, |
|
|
Shape * |
shape |
|
) |
| const |
◆ AddSample() [1/2]
int tesseract::TrainingSampleSet::AddSample |
( |
const char * |
unichar, |
|
|
TrainingSample * |
sample |
|
) |
| |
Definition at line 129 of file trainingsampleset.cpp.
133 tprintf(
"Error: Size of unicharset in TrainingSampleSet::AddSample is "
134 "greater than MAX_NUM_CLASSES\n");
◆ AddSample() [2/2]
void tesseract::TrainingSampleSet::AddSample |
( |
int |
unichar_id, |
|
|
TrainingSample * |
sample |
|
) |
| |
Definition at line 145 of file trainingsampleset.cpp.
146 sample->set_class_id(unichar_id);
147 samples_.push_back(
sample);
148 num_raw_samples_ = samples_.size();
149 unicharset_size_ = unicharset_.
size();
◆ charsetsize()
int tesseract::TrainingSampleSet::charsetsize |
( |
| ) |
const |
|
inline |
◆ ClusterDistance()
float tesseract::TrainingSampleSet::ClusterDistance |
( |
int |
font_id1, |
|
|
int |
class_id1, |
|
|
int |
font_id2, |
|
|
int |
class_id2, |
|
|
const IntFeatureMap & |
feature_map |
|
) |
| |
Definition at line 296 of file trainingsampleset.cpp.
302 if (font_index1 < 0 || font_index2 < 0)
304 FontClassInfo& fc_info = (*font_class_array_)(font_index1, class_id1);
305 if (font_id1 == font_id2) {
307 if (fc_info.unichar_distance_cache.size() == 0)
308 fc_info.unichar_distance_cache.init_to_size(unicharset_size_, -1.0f);
309 if (fc_info.unichar_distance_cache[class_id2] < 0) {
314 fc_info.unichar_distance_cache[class_id2] = result;
316 FontClassInfo& fc_info2 = (*font_class_array_)(font_index2, class_id2);
317 if (fc_info2.unichar_distance_cache.size() == 0)
318 fc_info2.unichar_distance_cache.init_to_size(unicharset_size_, -1.0f);
319 fc_info2.unichar_distance_cache[class_id1] = result;
321 return fc_info.unichar_distance_cache[class_id2];
322 }
else if (class_id1 == class_id2) {
324 if (fc_info.font_distance_cache.size() == 0)
325 fc_info.font_distance_cache.init_to_size(font_id_map_.
CompactSize(),
327 if (fc_info.font_distance_cache[font_index2] < 0) {
332 fc_info.font_distance_cache[font_index2] = result;
334 FontClassInfo& fc_info2 = (*font_class_array_)(font_index2, class_id2);
335 if (fc_info2.font_distance_cache.size() == 0)
336 fc_info2.font_distance_cache.init_to_size(font_id_map_.
CompactSize(),
338 fc_info2.font_distance_cache[font_index1] = result;
340 return fc_info.font_distance_cache[font_index2];
345 while (cache_index < fc_info.distance_cache.size() &&
346 (fc_info.distance_cache[cache_index].unichar_id != class_id2 ||
347 fc_info.distance_cache[cache_index].font_id != font_id2))
349 if (cache_index == fc_info.distance_cache.size()) {
354 FontClassDistance fc_dist = { class_id2, font_id2, result };
355 fc_info.distance_cache.push_back(fc_dist);
358 FontClassInfo& fc_info2 = (*font_class_array_)(font_index2, class_id2);
359 fc_dist.unichar_id = class_id1;
360 fc_dist.font_id = font_id1;
361 fc_info2.distance_cache.push_back(fc_dist);
363 return fc_info.distance_cache[cache_index].distance;
◆ ComputeCanonicalFeatures()
void tesseract::TrainingSampleSet::ComputeCanonicalFeatures |
( |
| ) |
|
Definition at line 694 of file trainingsampleset.cpp.
697 for (
int font_index = 0; font_index < font_size; ++font_index) {
699 for (
int c = 0; c < unicharset_size_; ++c) {
704 FontClassInfo& fcinfo = (*font_class_array_)(font_index, c);
705 fcinfo.canonical_features =
sample->indexed_features();
◆ ComputeCanonicalSamples()
void tesseract::TrainingSampleSet::ComputeCanonicalSamples |
( |
const IntFeatureMap & |
map, |
|
|
bool |
debug |
|
) |
| |
Definition at line 568 of file trainingsampleset.cpp.
571 IntFeatureDist f_table;
572 if (debug)
tprintf(
"feature table size %d\n", map.sparse_size());
576 double global_worst_dist = 0.0;
579 for (
int font_index = 0; font_index < font_size; ++font_index) {
581 for (
int c = 0; c < unicharset_size_; ++c) {
582 int samples_found = 0;
583 FontClassInfo& fcinfo = (*font_class_array_)(font_index, c);
584 if (fcinfo.samples.size() == 0 ||
586 fcinfo.canonical_sample = -1;
587 fcinfo.canonical_dist = 0.0f;
588 if (debug)
tprintf(
"Skipping class %d\n", c);
593 double min_max_dist = 2.0;
596 double max_max_dist = 0.0;
599 fcinfo.canonical_sample = fcinfo.samples[0];
600 fcinfo.canonical_dist = 0.0f;
601 for (
int i = 0; i < fcinfo.samples.size(); ++i) {
602 int s1 = fcinfo.samples[i];
604 f_table.Set(features1, features1.
size(),
true);
605 double max_dist = 0.0;
610 for (
int j = 0; j < fcinfo.samples.size(); ++j) {
611 int s2 = fcinfo.samples[j];
612 if (samples_[s2]->class_id() != c ||
613 samples_[s2]->font_id() != font_id ||
617 double dist = f_table.FeatureDistance(features2);
618 if (dist > max_dist) {
620 if (dist > max_max_dist) {
629 f_table.Set(features1, features1.
size(),
false);
630 samples_[s1]->set_max_dist(max_dist);
632 if (max_dist < min_max_dist) {
633 fcinfo.canonical_sample = s1;
634 fcinfo.canonical_dist = max_dist;
636 UpdateRange(max_dist, &min_max_dist, &max_max_dist);
638 if (max_max_dist > global_worst_dist) {
640 global_worst_dist = max_max_dist;
645 tprintf(
"Found %d samples of class %d=%s, font %d, "
646 "dist range [%g, %g], worst pair= %s, %s\n",
648 font_index, min_max_dist, max_max_dist,
655 tprintf(
"Global worst dist = %g, between sample %d and %d\n",
656 global_worst_dist, worst_s1, worst_s2);
◆ ComputeCloudFeatures()
void tesseract::TrainingSampleSet::ComputeCloudFeatures |
( |
int |
feature_space_size | ) |
|
Definition at line 712 of file trainingsampleset.cpp.
715 for (
int font_index = 0; font_index < font_size; ++font_index) {
717 for (
int c = 0; c < unicharset_size_; ++c) {
721 FontClassInfo& fcinfo = (*font_class_array_)(font_index, c);
722 fcinfo.cloud_features.Init(feature_space_size);
726 for (
int i = 0; i < sample_features.
size(); ++i)
727 fcinfo.cloud_features.SetBit(sample_features[i]);
◆ ComputeClusterDistance()
float tesseract::TrainingSampleSet::ComputeClusterDistance |
( |
int |
font_id1, |
|
|
int |
class_id1, |
|
|
int |
font_id2, |
|
|
int |
class_id2, |
|
|
const IntFeatureMap & |
feature_map |
|
) |
| const |
◆ DeleteableSample()
bool tesseract::TrainingSampleSet::DeleteableSample |
( |
const TrainingSample * |
sample | ) |
|
◆ DeleteDeadSamples()
void tesseract::TrainingSampleSet::DeleteDeadSamples |
( |
| ) |
|
Definition at line 497 of file trainingsampleset.cpp.
498 using namespace std::placeholders;
500 num_raw_samples_ = samples_.size();
◆ DeSerialize()
bool tesseract::TrainingSampleSet::DeSerialize |
( |
bool |
swap, |
|
|
FILE * |
fp |
|
) |
| |
Definition at line 94 of file trainingsampleset.cpp.
95 if (!samples_.DeSerialize(swap, fp))
return false;
96 num_raw_samples_ = samples_.size();
98 if (!font_id_map_.
DeSerialize(swap, fp))
return false;
99 delete font_class_array_;
100 font_class_array_ =
nullptr;
102 if (fread(¬_null,
sizeof(not_null), 1, fp) != 1)
return false;
108 unicharset_size_ = unicharset_.
size();
◆ DisplaySamplesWithFeature()
Definition at line 743 of file trainingsampleset.cpp.
750 if (shape.ContainsUnichar(
sample->class_id())) {
752 space.IndexAndSortFeatures(
sample->features(),
sample->num_features(),
754 for (
int f = 0; f < indexed_features.
size(); ++f) {
755 if (indexed_features[f] == f_index) {
756 sample->DisplayFeatures(color, window);
◆ extract_sample()
TrainingSample* tesseract::TrainingSampleSet::extract_sample |
( |
int |
index | ) |
|
|
inline |
◆ fontinfo_table()
const FontInfoTable& tesseract::TrainingSampleSet::fontinfo_table |
( |
| ) |
const |
|
inline |
◆ GetCanonicalDist()
float tesseract::TrainingSampleSet::GetCanonicalDist |
( |
int |
font_id, |
|
|
int |
class_id |
|
) |
| const |
Definition at line 474 of file trainingsampleset.cpp.
477 if (font_index < 0)
return 0.0f;
478 if ((*font_class_array_)(font_index, class_id).canonical_sample >= 0)
479 return (*font_class_array_)(font_index, class_id).canonical_dist;
◆ GetCanonicalFeatures()
const GenericVector< int > & tesseract::TrainingSampleSet::GetCanonicalFeatures |
( |
int |
font_id, |
|
|
int |
class_id |
|
) |
| const |
Definition at line 219 of file trainingsampleset.cpp.
223 return (*font_class_array_)(font_index, class_id).canonical_features;
◆ GetCanonicalSample()
const TrainingSample * tesseract::TrainingSampleSet::GetCanonicalSample |
( |
int |
font_id, |
|
|
int |
class_id |
|
) |
| const |
Definition at line 462 of file trainingsampleset.cpp.
466 if (font_index < 0)
return nullptr;
467 const int sample_index = (*font_class_array_)(font_index,
468 class_id).canonical_sample;
469 return sample_index >= 0 ? samples_[sample_index] :
nullptr;
◆ GetCloudFeatures()
const BitVector & tesseract::TrainingSampleSet::GetCloudFeatures |
( |
int |
font_id, |
|
|
int |
class_id |
|
) |
| const |
Definition at line 211 of file trainingsampleset.cpp.
215 return (*font_class_array_)(font_index, class_id).cloud_features;
◆ GetSample() [1/2]
const TrainingSample * tesseract::TrainingSampleSet::GetSample |
( |
int |
font_id, |
|
|
int |
class_id, |
|
|
int |
index |
|
) |
| const |
Definition at line 180 of file trainingsampleset.cpp.
184 if (font_index < 0)
return nullptr;
185 int sample_index = (*font_class_array_)(font_index, class_id).samples[index];
186 return samples_[sample_index];
◆ GetSample() [2/2]
const TrainingSample * tesseract::TrainingSampleSet::GetSample |
( |
int |
index | ) |
const |
◆ GlobalSampleIndex()
int tesseract::TrainingSampleSet::GlobalSampleIndex |
( |
int |
font_id, |
|
|
int |
class_id, |
|
|
int |
index |
|
) |
| const |
Definition at line 452 of file trainingsampleset.cpp.
456 if (font_index < 0)
return -1;
457 return (*font_class_array_)(font_index, class_id).samples[index];
◆ IndexFeatures()
void tesseract::TrainingSampleSet::IndexFeatures |
( |
const IntFeatureSpace & |
feature_space | ) |
|
◆ KillSample()
void tesseract::TrainingSampleSet::KillSample |
( |
TrainingSample * |
sample | ) |
|
◆ LoadUnicharset()
void tesseract::TrainingSampleSet::LoadUnicharset |
( |
const char * |
filename | ) |
|
Definition at line 113 of file trainingsampleset.cpp.
115 tprintf(
"Failed to load unicharset from file %s\n"
116 "Building unicharset from scratch...\n",
123 unicharset_size_ = unicharset_.
size();
◆ mutable_sample()
TrainingSample* tesseract::TrainingSampleSet::mutable_sample |
( |
int |
index | ) |
|
|
inline |
◆ MutableSample()
TrainingSample * tesseract::TrainingSampleSet::MutableSample |
( |
int |
font_id, |
|
|
int |
class_id, |
|
|
int |
index |
|
) |
| |
Definition at line 191 of file trainingsampleset.cpp.
195 if (font_index < 0)
return nullptr;
196 int sample_index = (*font_class_array_)(font_index, class_id).samples[index];
197 return samples_[sample_index];
◆ num_raw_samples()
int tesseract::TrainingSampleSet::num_raw_samples |
( |
| ) |
const |
|
inline |
◆ num_samples()
int tesseract::TrainingSampleSet::num_samples |
( |
| ) |
const |
|
inline |
◆ NumClassSamples()
int tesseract::TrainingSampleSet::NumClassSamples |
( |
int |
font_id, |
|
|
int |
class_id, |
|
|
bool |
randomize |
|
) |
| const |
Definition at line 156 of file trainingsampleset.cpp.
159 if (font_id < 0 || class_id < 0 ||
160 font_id >= font_id_map_.
SparseSize() || class_id >= unicharset_size_) {
168 return (*font_class_array_)(font_index, class_id).samples.size();
◆ NumFonts()
int tesseract::TrainingSampleSet::NumFonts |
( |
| ) |
const |
|
inline |
◆ OrganizeByFontAndClass()
void tesseract::TrainingSampleSet::OrganizeByFontAndClass |
( |
| ) |
|
Definition at line 511 of file trainingsampleset.cpp.
515 int compact_font_size = font_id_map_.
CompactSize();
517 delete font_class_array_;
520 compact_font_size, unicharset_size_, empty);
521 for (
int s = 0; s < samples_.size(); ++s) {
522 int font_id = samples_[s]->font_id();
523 int class_id = samples_[s]->class_id();
524 if (font_id < 0 || font_id >= font_id_map_.
SparseSize()) {
525 tprintf(
"Font id = %d/%d, class id = %d/%d on sample %d\n",
526 font_id, font_id_map_.
SparseSize(), class_id, unicharset_size_,
530 ASSERT_HOST(class_id >= 0 && class_id < unicharset_size_);
532 (*font_class_array_)(font_index, class_id).samples.push_back(s);
536 for (
int f = 0; f < compact_font_size; ++f) {
537 for (
int c = 0; c < unicharset_size_; ++c)
539 (*font_class_array_)(f, c).samples.size();
543 num_raw_samples_ = samples_.size();
◆ ReliablySeparable()
int tesseract::TrainingSampleSet::ReliablySeparable |
( |
int |
font_id1, |
|
|
int |
class_id1, |
|
|
int |
font_id2, |
|
|
int |
class_id2, |
|
|
const IntFeatureMap & |
feature_map, |
|
|
bool |
thorough |
|
) |
| const |
Definition at line 413 of file trainingsampleset.cpp.
419 if (sample2 ==
nullptr)
424 if (cloud1.size() == 0)
425 return canonical2.
size();
428 for (
int f = 0; f < canonical2.
size(); ++f) {
429 const int feature = canonical2[f];
434 AddNearFeatures(feature_map, feature, 1, &good_features);
437 for (i = 0; i < good_features.
size(); ++i) {
438 int good_f = good_features[i];
439 if (cloud1[good_f]) {
443 if (i < good_features.
size())
◆ ReplicateAndRandomizeSamples()
void tesseract::TrainingSampleSet::ReplicateAndRandomizeSamples |
( |
| ) |
|
Definition at line 665 of file trainingsampleset.cpp.
668 for (
int font_index = 0; font_index < font_size; ++font_index) {
669 for (
int c = 0; c < unicharset_size_; ++c) {
670 FontClassInfo& fcinfo = (*font_class_array_)(font_index, c);
671 int sample_count = fcinfo.samples.size();
672 int min_samples = 2 * std::max(kSampleRandomSize, sample_count);
673 if (sample_count > 0 && sample_count < min_samples) {
674 int base_count = sample_count;
675 for (
int base_index = 0; sample_count < min_samples; ++sample_count) {
676 int src_index = fcinfo.samples[base_index++];
677 if (base_index >= base_count) base_index = 0;
678 TrainingSample*
sample = samples_[src_index]->RandomizedCopy(
679 sample_count % kSampleRandomSize);
680 int sample_index = samples_.size();
681 sample->set_sample_index(sample_index);
682 samples_.push_back(
sample);
683 fcinfo.samples.push_back(sample_index);
◆ SampleToString()
◆ Serialize()
bool tesseract::TrainingSampleSet::Serialize |
( |
FILE * |
fp | ) |
const |
Definition at line 80 of file trainingsampleset.cpp.
81 if (!samples_.Serialize(fp))
return false;
83 if (!font_id_map_.
Serialize(fp))
return false;
84 int8_t not_null = font_class_array_ !=
nullptr;
85 if (fwrite(¬_null,
sizeof(not_null), 1, fp) != 1)
return false;
◆ SetupFontIdMap()
void tesseract::TrainingSampleSet::SetupFontIdMap |
( |
| ) |
|
Definition at line 548 of file trainingsampleset.cpp.
551 for (
int s = 0; s < samples_.size(); ++s) {
552 const int font_id = samples_[s]->font_id();
553 while (font_id >= font_counts.
size())
555 ++font_counts[font_id];
557 font_id_map_.
Init(font_counts.
size(),
false);
558 for (
int f = 0; f < font_counts.
size(); ++f) {
559 font_id_map_.
SetMap(f, font_counts[f] > 0);
561 font_id_map_.
Setup();
◆ UnicharDistance()
Definition at line 230 of file trainingsampleset.cpp.
234 int num_fonts1 = uf1.font_ids.size();
235 int c1 = uf1.unichar_id;
236 int num_fonts2 = uf2.font_ids.size();
237 int c2 = uf2.unichar_id;
238 double dist_sum = 0.0;
240 const bool debug =
false;
243 for (
int i = 0; i < num_fonts1; ++i) {
244 int f1 = uf1.font_ids[i];
245 for (
int j = 0; j < num_fonts2; ++j) {
246 int f2 = uf2.font_ids[j];
255 for (
int i = 0; i < num_fonts1; ++i) {
256 int f1 = uf1.font_ids[i];
257 for (
int j = 0; j < num_fonts2; ++j) {
258 int f2 = uf2.font_ids[j];
261 tprintf(
"Cluster dist %d %d %d %d = %g\n",
273 int num_samples = std::max(num_fonts1, num_fonts2);
274 for (
int i = 0; i <
num_samples; ++i, index += increment) {
275 int f1 = uf1.font_ids[i % num_fonts1];
276 int f2 = uf2.font_ids[index % num_fonts2];
278 tprintf(
"Cluster dist %d %d %d %d = %g\n",
285 if (dist_count == 0) {
290 return dist_sum / dist_count;
◆ unicharset()
const UNICHARSET& tesseract::TrainingSampleSet::unicharset |
( |
| ) |
const |
|
inline |
The documentation for this class was generated from the following files:
bool load_from_file(const char *const filename, bool skip_fragments)
float ClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map)
void AppendOtherUnicharset(const UNICHARSET &src)
const GenericVector< int > & GetCanonicalFeatures(int font_id, int class_id) const
void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, STRING *box_str)
int NumClassSamples(int font_id, int class_id, bool randomize) const
int SparseToCompact(int sparse_index) const override
int CompactToSparse(int compact_index) const
bool save_to_file(const char *const filename) const
const BitVector & GetCloudFeatures(int font_id, int class_id) const
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)
STRING debug_str(UNICHAR_ID id) const
const char * c_str() const
float UnicharDistance(const UnicharAndFonts &uf1, const UnicharAndFonts &uf2, bool matched_fonts, const IntFeatureMap &feature_map)
const TrainingSample * GetSample(int index) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool DeSerializeClasses(bool swap, FILE *fp)
int num_raw_samples() const
bool DeleteableSample(const TrainingSample *sample)
STRING SampleToString(const TrainingSample &sample) const
int AddSample(const char *unichar, TrainingSample *sample)
int SparseSize() const override
bool contains_unichar(const char *const unichar_repr) const
void Init(int size, bool all_mapped)
bool SerializeClasses(FILE *fp) const
DLLSYM void tprintf(const char *format,...)
void IndexFeatures(const IntFeatureSpace &feature_space)
void SetMap(int sparse_index, bool mapped)
const char * id_to_unichar(UNICHAR_ID id) const
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
int ReliablySeparable(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map, bool thorough) const
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
float ComputeClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map) const
const TrainingSample * GetCanonicalSample(int font_id, int class_id) const