|
| ClassPruner (int max_classes) |
|
| ~ClassPruner () |
|
void | ComputeScores (const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features) |
|
void | AdjustForExpectedNumFeatures (const uint16_t *expected_num_features, int cutoff_strength) |
|
void | DisableDisabledClasses (const UNICHARSET &unicharset) |
|
void | DisableFragments (const UNICHARSET &unicharset) |
|
void | NormalizeForXheight (int norm_multiplier, const uint8_t *normalization_factors) |
|
void | NoNormalization () |
|
void | PruneAndSort (int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset) |
|
void | DebugMatch (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const |
|
void | SummarizeResult (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uint16_t *expected_num_features, int norm_multiplier, const uint8_t *normalization_factors) const |
|
int | SetupResults (GenericVector< CP_RESULT_STRUCT > *results) const |
|
Definition at line 146 of file intmatcher.cpp.
◆ ClassPruner()
tesseract::ClassPruner::ClassPruner |
( |
int |
max_classes | ) |
|
|
inline |
Definition at line 148 of file intmatcher.cpp.
156 max_classes_ = max_classes;
159 class_count_ =
new int[rounded_classes_];
160 norm_count_ =
new int[rounded_classes_];
161 sort_key_ =
new int[rounded_classes_ + 1];
162 sort_index_ =
new int[rounded_classes_ + 1];
163 for (
int i = 0; i < rounded_classes_; i++) {
166 pruning_threshold_ = 0;
◆ ~ClassPruner()
tesseract::ClassPruner::~ClassPruner |
( |
| ) |
|
|
inline |
Definition at line 170 of file intmatcher.cpp.
172 delete []class_count_;
173 delete []norm_count_;
175 delete []sort_index_;
◆ AdjustForExpectedNumFeatures()
void tesseract::ClassPruner::AdjustForExpectedNumFeatures |
( |
const uint16_t * |
expected_num_features, |
|
|
int |
cutoff_strength |
|
) |
| |
|
inline |
Adjusts the scores according to the number of expected features. Used in lieu of a constant bias, this penalizes classes that expect more features than there are present. Thus an actual c will score higher for c than e, even though almost all the features match e as well as c, because e expects more features to be present.
Definition at line 250 of file intmatcher.cpp.
253 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
254 if (num_features_ < expected_num_features[class_id]) {
255 int deficit = expected_num_features[class_id] - num_features_;
256 class_count_[class_id] -= class_count_[class_id] * deficit /
257 (num_features_ * cutoff_strength + deficit);
◆ ComputeScores()
Computes the scores for every class in the character set, by summing the weights for each feature and stores the sums internally in class_count_.
Definition at line 179 of file intmatcher.cpp.
182 num_features_ = num_features;
184 for (
int f = 0; f < num_features; ++f) {
193 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
196 const uint32_t* pruner_word_ptr =
199 uint32_t pruner_word = *pruner_word_ptr++;
◆ DebugMatch()
Prints debug info on the class pruner matches for the pruned classes only.
Definition at line 339 of file intmatcher.cpp.
344 int max_num_classes = int_templates->
NumClasses;
345 for (
int f = 0; f < num_features_; ++f) {
347 tprintf(
"F=%3d(%d,%d,%d),", f, feature->
X, feature->
Y, feature->
Theta);
353 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
356 const uint32_t* pruner_word_ptr =
359 uint32_t pruner_word = *pruner_word_ptr++;
360 for (
int word_class = 0; word_class < 16 &&
361 class_id < max_num_classes; ++word_class, ++class_id) {
362 if (norm_count_[class_id] >= pruning_threshold_) {
364 classify.ClassIDToDebugStr(int_templates,
365 class_id, 0).c_str(),
◆ DisableDisabledClasses()
void tesseract::ClassPruner::DisableDisabledClasses |
( |
const UNICHARSET & |
unicharset | ) |
|
|
inline |
Zeros the scores for classes disabled in the unicharset. Implements the black-list to recognize a subset of the character set.
Definition at line 263 of file intmatcher.cpp.
265 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
267 class_count_[class_id] = 0;
◆ DisableFragments()
void tesseract::ClassPruner::DisableFragments |
( |
const UNICHARSET & |
unicharset | ) |
|
|
inline |
Zeros the scores of fragments.
Definition at line 271 of file intmatcher.cpp.
273 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
277 class_count_[class_id] = 0;
◆ NoNormalization()
void tesseract::ClassPruner::NoNormalization |
( |
| ) |
|
|
inline |
The nop normalization copies the class_count_ array to norm_count_.
Definition at line 294 of file intmatcher.cpp.
296 for (
int class_id = 0; class_id < max_classes_; class_id++) {
297 norm_count_[class_id] = class_count_[class_id];
◆ NormalizeForXheight()
void tesseract::ClassPruner::NormalizeForXheight |
( |
int |
norm_multiplier, |
|
|
const uint8_t * |
normalization_factors |
|
) |
| |
|
inline |
Normalizes the counts for xheight, putting the normalized result in norm_count_. Applies a simple subtractive penalty for incorrect vertical position provided by the normalization_factors array, indexed by character class, and scaled by the norm_multiplier.
Definition at line 285 of file intmatcher.cpp.
288 for (
int class_id = 0; class_id < max_classes_; class_id++) {
289 norm_count_[class_id] = class_count_[class_id] -
290 ((norm_multiplier * normalization_factors[class_id]) >> 8);
◆ PruneAndSort()
void tesseract::ClassPruner::PruneAndSort |
( |
int |
pruning_factor, |
|
|
int |
keep_this, |
|
|
bool |
max_of_non_fragments, |
|
|
const UNICHARSET & |
unicharset |
|
) |
| |
|
inline |
Prunes the classes using <the maximum count> * pruning_factor/256 as a threshold for keeping classes. If max_of_non_fragments, then ignore fragments in computing the maximum count.
Definition at line 303 of file intmatcher.cpp.
307 for (
int c = 0; c < max_classes_; ++c) {
308 if (norm_count_[c] > max_count &&
314 (!max_of_non_fragments || !unicharset.
get_fragment(c))) {
315 max_count = norm_count_[c];
319 pruning_threshold_ = (max_count * pruning_factor) >> 8;
321 if (pruning_threshold_ < 1)
322 pruning_threshold_ = 1;
324 for (
int class_id = 0; class_id < max_classes_; class_id++) {
325 if (norm_count_[class_id] >= pruning_threshold_ ||
326 class_id == keep_this) {
328 sort_index_[num_classes_] = class_id;
329 sort_key_[num_classes_] = norm_count_[class_id];
334 if (num_classes_ > 1)
335 HeapSort(num_classes_, sort_key_, sort_index_);
◆ SetupResults()
Copies the pruned, sorted classes into the output results and returns the number of classes.
Definition at line 399 of file intmatcher.cpp.
403 for (
int c = 0; c < num_classes_; ++c) {
404 (*results)[c].Class = sort_index_[num_classes_ - c];
405 (*results)[c].Rating = 1.0f - sort_key_[num_classes_ - c] /
◆ SummarizeResult()
void tesseract::ClassPruner::SummarizeResult |
( |
const Classify & |
classify, |
|
|
const INT_TEMPLATES_STRUCT * |
int_templates, |
|
|
const uint16_t * |
expected_num_features, |
|
|
int |
norm_multiplier, |
|
|
const uint8_t * |
normalization_factors |
|
) |
| const |
|
inline |
Prints a summary of the pruner result.
Definition at line 376 of file intmatcher.cpp.
382 tprintf(
"CP:%d classes, %d features:\n", num_classes_, num_features_);
383 for (
int i = 0; i < num_classes_; ++i) {
384 int class_id = sort_index_[num_classes_ - i];
385 STRING class_string = classify.ClassIDToDebugStr(int_templates,
387 tprintf(
"%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
388 class_string.
c_str(),
389 class_count_[class_id],
390 expected_num_features[class_id],
391 (norm_multiplier * normalization_factors[class_id]) >> 8,
392 sort_key_[num_classes_ - i],
393 100.0 - 100.0 * sort_key_[num_classes_ - i] /
The documentation for this class was generated from the following file: