|
| ClassPruner (int max_classes) |
|
| ~ClassPruner () |
|
void | ComputeScores (const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features) |
|
void | AdjustForExpectedNumFeatures (const uint16_t *expected_num_features, int cutoff_strength) |
|
void | DisableDisabledClasses (const UNICHARSET &unicharset) |
|
void | DisableFragments (const UNICHARSET &unicharset) |
|
void | NormalizeForXheight (int norm_multiplier, const uint8_t *normalization_factors) |
|
void | NoNormalization () |
|
void | PruneAndSort (int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset) |
|
void | DebugMatch (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const |
|
void | SummarizeResult (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uint16_t *expected_num_features, int norm_multiplier, const uint8_t *normalization_factors) const |
|
int | SetupResults (GenericVector< CP_RESULT_STRUCT > *results) const |
|
Definition at line 104 of file intmatcher.cpp.
◆ ClassPruner()
tesseract::ClassPruner::ClassPruner |
( |
int |
max_classes | ) |
|
|
inline |
Definition at line 106 of file intmatcher.cpp.
113 max_classes_ = max_classes;
116 class_count_ =
new int[rounded_classes_];
117 norm_count_ =
new int[rounded_classes_];
118 sort_key_ =
new int[rounded_classes_ + 1];
119 sort_index_ =
new int[rounded_classes_ + 1];
120 for (
int i = 0; i < rounded_classes_; i++) {
123 pruning_threshold_ = 0;
#define NUM_BITS_PER_CLASS
int RoundUp(int n, int block_size)
#define WERDS_PER_CP_VECTOR
◆ ~ClassPruner()
tesseract::ClassPruner::~ClassPruner |
( |
| ) |
|
|
inline |
Definition at line 128 of file intmatcher.cpp.
129 delete []class_count_;
130 delete []norm_count_;
132 delete []sort_index_;
◆ AdjustForExpectedNumFeatures()
void tesseract::ClassPruner::AdjustForExpectedNumFeatures |
( |
const uint16_t * |
expected_num_features, |
|
|
int |
cutoff_strength |
|
) |
| |
|
inline |
Adjusts the scores according to the number of expected features. Used in lieu of a constant bias, this penalizes classes that expect more features than there are present. Thus an actual c will score higher for c than e, even though almost all the features match e as well as c, because e expects more features to be present.
Definition at line 208 of file intmatcher.cpp.
210 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
211 if (num_features_ < expected_num_features[class_id]) {
212 int deficit = expected_num_features[class_id] - num_features_;
213 class_count_[class_id] -= class_count_[class_id] * deficit /
214 (num_features_ * cutoff_strength + deficit);
◆ ComputeScores()
Computes the scores for every class in the character set, by summing the weights for each feature and stores the sums internally in class_count_.
Definition at line 137 of file intmatcher.cpp.
139 num_features_ = num_features;
141 for (
int f = 0; f < num_features; ++f) {
150 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
153 const uint32_t* pruner_word_ptr =
156 uint32_t pruner_word = *pruner_word_ptr++;
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
#define NUM_BITS_PER_CLASS
#define CLASS_PRUNER_CLASS_MASK
uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
#define WERDS_PER_CP_VECTOR
◆ DebugMatch()
Prints debug info on the class pruner matches for the pruned classes only.
Definition at line 297 of file intmatcher.cpp.
301 int max_num_classes = int_templates->
NumClasses;
302 for (
int f = 0; f < num_features_; ++f) {
304 tprintf(
"F=%3d(%d,%d,%d),", f, feature->
X, feature->
Y, feature->
Theta);
310 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
313 const uint32_t* pruner_word_ptr =
316 uint32_t pruner_word = *pruner_word_ptr++;
317 for (
int word_class = 0; word_class < 16 &&
318 class_id < max_num_classes; ++word_class, ++class_id) {
319 if (norm_count_[class_id] >= pruning_threshold_) {
321 classify.ClassIDToDebugStr(int_templates,
322 class_id, 0).string(),
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
#define NUM_BITS_PER_CLASS
#define CLASS_PRUNER_CLASS_MASK
uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
DLLSYM void tprintf(const char *format,...)
#define WERDS_PER_CP_VECTOR
◆ DisableDisabledClasses()
void tesseract::ClassPruner::DisableDisabledClasses |
( |
const UNICHARSET & |
unicharset | ) |
|
|
inline |
Zeros the scores for classes disabled in the unicharset. Implements the black-list to recognize a subset of the character set.
Definition at line 221 of file intmatcher.cpp.
222 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
224 class_count_[class_id] = 0;
bool get_enabled(UNICHAR_ID unichar_id) const
◆ DisableFragments()
void tesseract::ClassPruner::DisableFragments |
( |
const UNICHARSET & |
unicharset | ) |
|
|
inline |
Zeros the scores of fragments.
Definition at line 229 of file intmatcher.cpp.
230 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
234 class_count_[class_id] = 0;
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
◆ NoNormalization()
void tesseract::ClassPruner::NoNormalization |
( |
| ) |
|
|
inline |
The nop normalization copies the class_count_ array to norm_count_.
Definition at line 252 of file intmatcher.cpp.
253 for (
int class_id = 0; class_id < max_classes_; class_id++) {
254 norm_count_[class_id] = class_count_[class_id];
◆ NormalizeForXheight()
void tesseract::ClassPruner::NormalizeForXheight |
( |
int |
norm_multiplier, |
|
|
const uint8_t * |
normalization_factors |
|
) |
| |
|
inline |
Normalizes the counts for xheight, putting the normalized result in norm_count_. Applies a simple subtractive penalty for incorrect vertical position provided by the normalization_factors array, indexed by character class, and scaled by the norm_multiplier.
Definition at line 243 of file intmatcher.cpp.
245 for (
int class_id = 0; class_id < max_classes_; class_id++) {
246 norm_count_[class_id] = class_count_[class_id] -
247 ((norm_multiplier * normalization_factors[class_id]) >> 8);
◆ PruneAndSort()
void tesseract::ClassPruner::PruneAndSort |
( |
int |
pruning_factor, |
|
|
int |
keep_this, |
|
|
bool |
max_of_non_fragments, |
|
|
const UNICHARSET & |
unicharset |
|
) |
| |
|
inline |
Prunes the classes using <the maximum count> * pruning_factor/256 as a threshold for keeping classes. If max_of_non_fragments, then ignore fragments in computing the maximum count.
Definition at line 261 of file intmatcher.cpp.
264 for (
int c = 0; c < max_classes_; ++c) {
265 if (norm_count_[c] > max_count &&
271 (!max_of_non_fragments || !unicharset.
get_fragment(c))) {
272 max_count = norm_count_[c];
276 pruning_threshold_ = (max_count * pruning_factor) >> 8;
278 if (pruning_threshold_ < 1)
279 pruning_threshold_ = 1;
281 for (
int class_id = 0; class_id < max_classes_; class_id++) {
282 if (norm_count_[class_id] >= pruning_threshold_ ||
283 class_id == keep_this) {
285 sort_index_[num_classes_] = class_id;
286 sort_key_[num_classes_] = norm_count_[class_id];
291 if (num_classes_ > 1)
292 HeapSort(num_classes_, sort_key_, sort_index_);
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
void HeapSort(int n, int ra[], int rb[])
◆ SetupResults()
Copies the pruned, sorted classes into the output results and returns the number of classes.
Definition at line 357 of file intmatcher.cpp.
360 for (
int c = 0; c < num_classes_; ++c) {
361 (*results)[c].Class = sort_index_[num_classes_ - c];
362 (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
#define CLASS_PRUNER_CLASS_MASK
void init_to_size(int size, const T &t)
◆ SummarizeResult()
void tesseract::ClassPruner::SummarizeResult |
( |
const Classify & |
classify, |
|
|
const INT_TEMPLATES_STRUCT * |
int_templates, |
|
|
const uint16_t * |
expected_num_features, |
|
|
int |
norm_multiplier, |
|
|
const uint8_t * |
normalization_factors |
|
) |
| const |
|
inline |
Prints a summary of the pruner result.
Definition at line 334 of file intmatcher.cpp.
339 tprintf(
"CP:%d classes, %d features:\n", num_classes_, num_features_);
340 for (
int i = 0; i < num_classes_; ++i) {
341 int class_id = sort_index_[num_classes_ - i];
342 STRING class_string = classify.ClassIDToDebugStr(int_templates,
344 tprintf(
"%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
346 class_count_[class_id],
347 expected_num_features[class_id],
348 (norm_multiplier * normalization_factors[class_id]) >> 8,
349 sort_key_[num_classes_ - i],
350 100.0 - 100.0 * sort_key_[num_classes_ - i] /
const char * string() const
#define CLASS_PRUNER_CLASS_MASK
DLLSYM void tprintf(const char *format,...)
The documentation for this class was generated from the following file: