19 #include "config_auto.h"
50 static const uint8_t offset_table[] = {
51 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
52 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
53 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,
54 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,
55 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
56 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
57 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,
58 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6,
59 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
60 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
61 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
64 static const uint8_t next_table[] = {
65 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e,
66 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a,
67 0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26,
68 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32,
69 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e,
70 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a,
71 0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56,
72 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62,
73 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e,
74 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a,
75 0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86,
76 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92,
77 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e,
78 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa,
79 0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6,
80 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2,
81 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce,
82 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda,
83 0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6,
84 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2,
85 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
101 HeapSort (
int n,
int ra[],
int rb[]) {
126 if (j < ir && ra[j] < ra[j + 1])
156 max_classes_ = max_classes;
159 class_count_ =
new int[rounded_classes_];
160 norm_count_ =
new int[rounded_classes_];
161 sort_key_ =
new int[rounded_classes_ + 1];
162 sort_index_ =
new int[rounded_classes_ + 1];
163 for (
int i = 0; i < rounded_classes_; i++) {
166 pruning_threshold_ = 0;
172 delete []class_count_;
173 delete []norm_count_;
175 delete []sort_index_;
182 num_features_ = num_features;
184 for (
int f = 0; f < num_features; ++f) {
193 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
196 const uint32_t* pruner_word_ptr =
199 uint32_t pruner_word = *pruner_word_ptr++;
252 int cutoff_strength) {
253 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
254 if (num_features_ < expected_num_features[class_id]) {
255 int deficit = expected_num_features[class_id] - num_features_;
256 class_count_[class_id] -= class_count_[class_id] * deficit /
257 (num_features_ * cutoff_strength + deficit);
265 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
267 class_count_[class_id] = 0;
273 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
277 class_count_[class_id] = 0;
287 const uint8_t* normalization_factors) {
288 for (
int class_id = 0; class_id < max_classes_; class_id++) {
289 norm_count_[class_id] = class_count_[class_id] -
290 ((norm_multiplier * normalization_factors[class_id]) >> 8);
296 for (
int class_id = 0; class_id < max_classes_; class_id++) {
297 norm_count_[class_id] = class_count_[class_id];
305 bool max_of_non_fragments,
const UNICHARSET& unicharset) {
307 for (
int c = 0; c < max_classes_; ++c) {
308 if (norm_count_[c] > max_count &&
314 (!max_of_non_fragments || !unicharset.
get_fragment(c))) {
315 max_count = norm_count_[c];
319 pruning_threshold_ = (max_count * pruning_factor) >> 8;
321 if (pruning_threshold_ < 1)
322 pruning_threshold_ = 1;
324 for (
int class_id = 0; class_id < max_classes_; class_id++) {
325 if (norm_count_[class_id] >= pruning_threshold_ ||
326 class_id == keep_this) {
328 sort_index_[num_classes_] = class_id;
329 sort_key_[num_classes_] = norm_count_[class_id];
334 if (num_classes_ > 1)
335 HeapSort(num_classes_, sort_key_, sort_index_);
344 int max_num_classes = int_templates->
NumClasses;
345 for (
int f = 0; f < num_features_; ++f) {
347 tprintf(
"F=%3d(%d,%d,%d),", f, feature->
X, feature->
Y, feature->
Theta);
353 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
356 const uint32_t* pruner_word_ptr =
359 uint32_t pruner_word = *pruner_word_ptr++;
360 for (
int word_class = 0; word_class < 16 &&
361 class_id < max_num_classes; ++word_class, ++class_id) {
362 if (norm_count_[class_id] >= pruning_threshold_) {
364 classify.ClassIDToDebugStr(int_templates,
365 class_id, 0).c_str(),
379 const uint16_t* expected_num_features,
381 const uint8_t* normalization_factors)
const {
382 tprintf(
"CP:%d classes, %d features:\n", num_classes_, num_features_);
383 for (
int i = 0; i < num_classes_; ++i) {
384 int class_id = sort_index_[num_classes_ - i];
385 STRING class_string = classify.ClassIDToDebugStr(int_templates,
387 tprintf(
"%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
388 class_string.
c_str(),
389 class_count_[class_id],
390 expected_num_features[class_id],
391 (norm_multiplier * normalization_factors[class_id]) >> 8,
392 sort_key_[num_classes_ - i],
393 100.0 - 100.0 * sort_key_[num_classes_ - i] /
403 for (
int c = 0; c < num_classes_; ++c) {
404 (*results)[c].Class = sort_index_[num_classes_ - c];
405 (*results)[c].Rating = 1.0f - sort_key_[num_classes_ - c] /
425 int rounded_classes_;
427 int pruning_threshold_;
453 int num_features,
int keep_this,
455 const uint8_t* normalization_factors,
456 const uint16_t* expected_num_features,
458 ClassPruner pruner(int_templates->
NumClasses);
460 pruner.ComputeScores(int_templates, num_features, features);
462 pruner.AdjustForExpectedNumFeatures(expected_num_features,
472 if (normalization_factors !=
nullptr) {
474 normalization_factors);
476 pruner.NoNormalization();
483 pruner.DebugMatch(*
this, int_templates, features);
486 pruner.SummarizeResult(*
this, int_templates, expected_num_features,
488 normalization_factors);
491 return pruner.SetupResults(results);
517 int AdaptFeatureThreshold,
519 bool SeparateDebugWindows) {
524 cprintf (
"Integer Matcher -------------------------------------------\n");
526 tables->Clear(ClassTemplate);
529 for (Feature = 0; Feature < NumFeatures; Feature++) {
530 int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
531 Feature, &Features[Feature],
538 #ifndef GRAPHICS_DISABLED
540 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
545 DisplayProtoDebugInfo(ClassTemplate, ConfigMask,
546 *tables, SeparateDebugWindows);
550 DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
551 Features, AdaptFeatureThreshold, Debug,
552 SeparateDebugWindows);
556 tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask);
557 tables->NormalizeSums(ClassTemplate, NumFeatures);
559 FindBestMatch(ClassTemplate, *tables, Result);
561 #ifndef GRAPHICS_DISABLED
566 cprintf(
"Match Complete --------------------------------------------\n");
596 int AdaptProtoThreshold,
599 int NumGoodProtos = 0;
604 (
"Find Good Protos -------------------------------------------\n");
606 tables->Clear(ClassTemplate);
608 for (
int Feature = 0; Feature < NumFeatures; Feature++)
609 UpdateTablesForFeature(
610 ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
613 #ifndef GRAPHICS_DISABLED
615 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
620 for (
int proto = 0; proto < ClassTemplate->
NumProtos; proto++) {
625 Temp += tables->proto_evidence_[proto][i];
630 if (Temp >= AdaptProtoThreshold) {
638 cprintf (
"Match Complete --------------------------------------------\n");
641 return NumGoodProtos;
664 int AdaptFeatureThreshold,
667 int NumBadFeatures = 0;
671 cprintf(
"Find Bad Features -------------------------------------------\n");
673 tables->Clear(ClassTemplate);
675 for (
int Feature = 0; Feature < NumFeatures; Feature++) {
676 UpdateTablesForFeature(
677 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
684 if (tables->feature_evidence_[i] > best)
685 best = tables->feature_evidence_[i];
688 if (best < AdaptFeatureThreshold) {
689 *FeatureArray = Feature;
695 #ifndef GRAPHICS_DISABLED
697 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
702 cprintf(
"Match Complete --------------------------------------------\n");
705 return NumBadFeatures;
710 : classify_debug_level_(classify_debug_level)
715 double Similarity = (static_cast<double>(IntSimilarity)) / 65536.0 / 65536.0;
717 evidence = 255.0 / (evidence * evidence + 1.0);
725 similarity_evidence_table_[i] = static_cast<uint8_t>(evidence + 0.5);
729 evidence_table_mask_ =
732 table_trunc_shift_bits_ = (27 -
SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
754 static void IMDebugConfiguration(
int FeatureNum, uint16_t ActualProtoNum,
755 uint8_t Evidence, uint32_t ConfigWord) {
756 cprintf (
"F = %3d, P = %3d, E = %3d, Configs = ",
757 FeatureNum, static_cast<int>(ActualProtoNum), static_cast<int>(Evidence));
771 static void IMDebugConfigurationSum(
int FeatureNum, uint8_t *FeatureEvidence,
772 int32_t ConfigCount) {
773 cprintf(
"F=%3d, C=", FeatureNum);
774 for (
int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
775 cprintf(
"%4d", FeatureEvidence[ConfigNum]);
791 int IntegerMatcher::UpdateTablesForFeature(
802 uint32_t ActualProtoNum;
804 int32_t proto_word_offset;
805 int32_t proto_offset;
807 uint32_t *ProtoPrunerPtr;
811 uint32_t XFeatureAddress;
812 uint32_t YFeatureAddress;
813 uint32_t ThetaFeatureAddress;
818 XFeatureAddress = ((Feature->
X >> 2) << 1);
822 for (ProtoSetIndex = 0, ActualProtoNum = 0;
823 ProtoSetIndex < ClassTemplate->
NumProtoSets; ProtoSetIndex++) {
824 ProtoSet = ClassTemplate->
ProtoSets[ProtoSetIndex];
825 ProtoPrunerPtr = reinterpret_cast<uint32_t *>((*ProtoSet).ProtoPruner);
830 ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
831 ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
832 ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
833 ProtoWord &= *ProtoMask;
835 if (ProtoWord != 0) {
836 proto_byte = ProtoWord & 0xff;
838 proto_word_offset = 0;
839 while (ProtoWord != 0 || proto_byte != 0) {
840 while (proto_byte == 0) {
841 proto_byte = ProtoWord & 0xff;
843 proto_word_offset += 8;
845 proto_offset = offset_table[proto_byte] + proto_word_offset;
846 proto_byte = next_table[proto_byte];
847 Proto = &(ProtoSet->
Protos[ProtoNum + proto_offset]);
848 ConfigWord = Proto->
Configs[0];
849 int32_t A3 = (((Proto->
A * (Feature->
X - 128)) * 2)
850 - (Proto->
B * (Feature->
Y - 128)) + (Proto->
C * 512));
851 int32_t M3 = ((static_cast<int8_t>(Feature->
Theta - Proto->
Angle)) *
858 A3 >>= mult_trunc_shift_bits_;
859 M3 >>= mult_trunc_shift_bits_;
860 if (static_cast<uint32_t>(A3) > evidence_mult_mask_)
861 A3 = evidence_mult_mask_;
862 if (static_cast<uint32_t>(M3) > evidence_mult_mask_)
863 M3 = evidence_mult_mask_;
865 uint32_t A4 = (A3 * A3) + (M3 * M3);
866 A4 >>= table_trunc_shift_bits_;
867 if (A4 > evidence_table_mask_)
870 Evidence = similarity_evidence_table_[A4];
873 IMDebugConfiguration (FeatureNum,
874 ActualProtoNum + proto_offset,
875 Evidence, ConfigWord);
877 ConfigWord &= *ConfigMask;
879 uint8_t feature_evidence_index = 0;
880 uint8_t config_byte = 0;
881 while (ConfigWord != 0 || config_byte != 0) {
882 while (config_byte == 0) {
883 config_byte = ConfigWord & 0xff;
885 feature_evidence_index += 8;
887 const uint8_t config_offset =
888 offset_table[config_byte] + feature_evidence_index - 8;
889 config_byte = next_table[config_byte];
894 uint8_t* UINT8Pointer =
896 for (uint8_t ProtoIndex =
897 ClassTemplate->
ProtoLengths[ActualProtoNum + proto_offset];
898 ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
899 if (Evidence > *UINT8Pointer) {
900 uint8_t Temp = *UINT8Pointer;
901 *UINT8Pointer = Evidence;
904 else if (Evidence == 0)
919 int SumOverConfigs = 0;
920 for (
int ConfigNum = ClassTemplate->
NumConfigs; ConfigNum > 0; ConfigNum--) {
921 int evidence = *UINT8Pointer++;
922 SumOverConfigs += evidence;
923 *IntPointer++ += evidence;
925 return SumOverConfigs;
931 #ifndef GRAPHICS_DISABLED
932 void IntegerMatcher::DebugFeatureProtoError(
944 uint8_t ProtoWordNum;
946 uint16_t ActualProtoNum;
949 cprintf(
"Configuration Mask:\n");
950 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++)
951 cprintf(
"%1d", (((*ConfigMask) >> ConfigNum) & 1));
954 cprintf(
"Feature Error for Configurations:\n");
955 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++) {
959 / NumFeatures / 256.0));
966 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->
NumProtoSets;
969 for (ProtoWordNum = 0; ProtoWordNum < 2;
970 ProtoWordNum++, ProtoMask++) {
974 && (ActualProtoNum < ClassTemplate->
NumProtos));
975 ProtoNum++, ActualProtoNum++)
976 cprintf (
"%1d", (((*ProtoMask) >> ProtoNum) & 1));
983 for (
int i = 0; i < ClassTemplate->
NumConfigs; i++)
988 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->
NumProtoSets;
990 ProtoSet = ClassTemplate->
ProtoSets[ProtoSetIndex];
994 (ActualProtoNum < ClassTemplate->NumProtos));
995 ProtoNum++, ActualProtoNum++) {
996 cprintf (
"P %3d =", ActualProtoNum);
998 for (uint8_t j = 0; j < ClassTemplate->
ProtoLengths[ActualProtoNum]; j++) {
1005 temp / 256.0 / ClassTemplate->
ProtoLengths[ActualProtoNum]);
1009 while (ConfigWord) {
1010 cprintf (
"%5d", ConfigWord & 1 ? temp : 0);
1012 ProtoConfigs[ConfigNum] += temp;
1022 cprintf (
"Proto Error for Configurations:\n");
1023 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++)
1026 ProtoConfigs[ConfigNum] /
1032 cprintf (
"Proto Sum for Configurations:\n");
1033 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++)
1034 cprintf (
" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
1037 cprintf (
"Proto Length for Configurations:\n");
1038 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++)
1040 static_cast<float>(ClassTemplate->
ConfigLengths[ConfigNum]));
1046 void IntegerMatcher::DisplayProtoDebugInfo(
1050 bool SeparateDebugWindows) {
1052 uint16_t ActualProtoNum;
1057 if (SeparateDebugWindows) {
1062 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->
NumProtoSets;
1064 ProtoSet = ClassTemplate->
ProtoSets[ProtoSetIndex];
1068 (ActualProtoNum < ClassTemplate->NumProtos));
1069 ProtoNum++, ActualProtoNum++) {
1072 for (uint8_t i = 0; i < ClassTemplate->
ProtoLengths[ActualProtoNum]; i++)
1077 if ((ProtoSet->
Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
1085 void IntegerMatcher::DisplayFeatureDebugInfo(
1089 int16_t NumFeatures,
1091 int AdaptFeatureThreshold,
1093 bool SeparateDebugWindows) {
1096 tables->
Clear(ClassTemplate);
1099 if (SeparateDebugWindows) {
1104 for (
int Feature = 0; Feature < NumFeatures; Feature++) {
1105 UpdateTablesForFeature(
1106 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
1118 if (best < AdaptFeatureThreshold)
1138 uint32_t ConfigWord;
1143 uint16_t ActualProtoNum;
1147 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->
NumProtoSets;
1149 ProtoSet = ClassTemplate->
ProtoSets[ProtoSetIndex];
1153 ProtoNum++, ActualProtoNum++) {
1161 ConfigWord &= *ConfigMask;
1163 while (ConfigWord) {
1165 *IntPointer += temp;
1178 INT_CLASS ClassTemplate, int16_t NumFeatures) {
1192 int IntegerMatcher::FindBestMatch(
1198 result->
fonts.truncate(0);
1205 if (*classify_debug_level_ > 2)
1206 tprintf(
"Config %d, rating=%d\n", c, rating);
1207 if (rating > best_match) {
1209 best_match = rating;
1215 result->
rating = best_match / 65536.0f;
1225 int normalization_factor,
1226 int matcher_multiplier) {
1227 int divisor = blob_length + matcher_multiplier;
1228 return divisor == 0 ? 1.0f : (rating * blob_length +
1229 matcher_multiplier * normalization_factor / 256.0f) / divisor;