tesseract
5.0.0-alpha-619-ge9db
|
#include <blamer.h>
|
| BlamerBundle () |
|
| BlamerBundle (const BlamerBundle &other) |
|
| ~BlamerBundle () |
|
STRING | TruthString () const |
|
IncorrectResultReason | incorrect_result_reason () const |
|
bool | NoTruth () const |
|
bool | HasDebugInfo () const |
|
const STRING & | debug () const |
|
const STRING & | misadaption_debug () const |
|
void | UpdateBestRating (float rating) |
|
int | correct_segmentation_length () const |
|
bool | MatrixPositionCorrect (int index, const MATRIX_COORD &coord) |
|
void | set_best_choice_is_dict_and_top_choice (bool value) |
|
const char * | lattice_data () const |
|
int | lattice_size () const |
|
void | set_lattice_data (const char *data, int size) |
|
const tesseract::ParamsTrainingBundle & | params_training_bundle () const |
|
void | AddHypothesis (const tesseract::ParamsTrainingHypothesis &hypo) |
|
void | SetWordTruth (const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box) |
|
void | SetSymbolTruth (const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box) |
|
void | SetRejectedTruth () |
|
bool | ChoiceIsCorrect (const WERD_CHOICE *word_choice) const |
|
void | ClearResults () |
|
void | CopyTruth (const BlamerBundle &other) |
|
void | CopyResults (const BlamerBundle &other) |
|
const char * | IncorrectReason () const |
|
void | FillDebugString (const STRING &msg, const WERD_CHOICE *choice, STRING *debug) |
|
void | SetupNormTruthWord (const DENORM &denorm) |
|
void | SplitBundle (int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const |
|
void | JoinBlames (const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug) |
|
void | BlameClassifier (const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug) |
|
void | SetChopperBlame (const WERD_RES *word, bool debug) |
|
void | BlameClassifierOrLangModel (const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug) |
|
void | SetupCorrectSegmentation (const TWERD *word, bool debug) |
|
bool | GuidedSegsearchNeeded (const WERD_CHOICE *best_choice) const |
|
void | InitForSegSearch (const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, tesseract::LMPainPoints *pain_points, double max_char_wh_ratio, WERD_RES *word_res) |
|
bool | GuidedSegsearchStillGoing () const |
|
void | FinishSegSearch (const WERD_CHOICE *best_choice, bool debug, STRING *debug_str) |
|
void | SetMisAdaptionDebug (const WERD_CHOICE *best_choice, bool debug) |
|
Definition at line 103 of file blamer.h.
◆ BlamerBundle() [1/2]
BlamerBundle::BlamerBundle |
( |
| ) |
|
|
inline |
Definition at line 105 of file blamer.h.
105 : truth_has_char_boxes_(
false),
◆ BlamerBundle() [2/2]
◆ ~BlamerBundle()
BlamerBundle::~BlamerBundle |
( |
| ) |
|
|
inline |
Definition at line 112 of file blamer.h.
112 {
delete[] lattice_data_; }
◆ AddHypothesis()
◆ BlameClassifier()
void BlamerBundle::BlameClassifier |
( |
const UNICHARSET & |
unicharset, |
|
|
const TBOX & |
blob_box, |
|
|
const BLOB_CHOICE_LIST & |
choices, |
|
|
bool |
debug |
|
) |
| |
Definition at line 263 of file blamer.cpp.
267 if (!truth_has_char_boxes_ ||
271 for (
int b = 0; b < norm_truth_word_.
length(); ++b) {
272 const TBOX &truth_box = norm_truth_word_.
BlobBox(b);
278 bool incorrect_adapted =
false;
279 UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
280 const char *truth_str = truth_text_[b].c_str();
283 BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
284 for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
285 choices_it.forward()) {
292 incorrect_adapted =
true;
297 STRING debug_str =
"unichar ";
298 debug_str += truth_str;
299 debug_str +=
" not found in classification list";
301 }
else if (incorrect_adapted) {
302 STRING debug_str =
"better rating for adapted ";
304 debug_str +=
" than for correct ";
305 debug_str += truth_str;
◆ BlameClassifierOrLangModel()
void BlamerBundle::BlameClassifierOrLangModel |
( |
const WERD_RES * |
word, |
|
|
const UNICHARSET & |
unicharset, |
|
|
bool |
valid_permuter, |
|
|
bool |
debug |
|
) |
| |
Definition at line 375 of file blamer.cpp.
378 if (valid_permuter) {
380 best_choice_is_dict_and_top_choice_ =
true;
385 for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
386 blob_choice_it.forward()) {
387 if (!(unicharset.
get_fragment(blob_choice_it.data()->unichar_id()))) {
388 first_choice = blob_choice_it.data();
394 best_choice_is_dict_and_top_choice_ =
false;
400 if (best_choice_is_dict_and_top_choice_) {
401 debug_str =
"Best choice is: incorrect, top choice, dictionary word";
402 debug_str +=
" with permuter ";
405 debug_str =
"Classifier/Old LM tradeoff is to blame";
◆ ChoiceIsCorrect()
bool BlamerBundle::ChoiceIsCorrect |
( |
const WERD_CHOICE * |
word_choice | ) |
const |
Definition at line 117 of file blamer.cpp.
118 if (word_choice ==
nullptr)
return false;
121 for (
int i = 0; i < word_choice->
length(); ++i) {
126 return truth_str == normed_choice_str;
◆ ClearResults()
void BlamerBundle::ClearResults |
( |
| ) |
|
|
inline |
Definition at line 190 of file blamer.h.
192 norm_box_tolerance_ = 0;
195 segsearch_is_looking_for_blame_ =
false;
197 correct_segmentation_cols_.
clear();
198 correct_segmentation_rows_.
clear();
199 best_choice_is_dict_and_top_choice_ =
false;
200 delete[] lattice_data_;
201 lattice_data_ =
nullptr;
◆ CopyResults()
void BlamerBundle::CopyResults |
( |
const BlamerBundle & |
other | ) |
|
|
inline |
Definition at line 211 of file blamer.h.
212 norm_truth_word_ = other.norm_truth_word_;
213 norm_box_tolerance_ = other.norm_box_tolerance_;
214 incorrect_result_reason_ = other.incorrect_result_reason_;
215 segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
216 best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
217 correct_segmentation_cols_ = other.correct_segmentation_cols_;
218 correct_segmentation_rows_ = other.correct_segmentation_rows_;
219 best_choice_is_dict_and_top_choice_ =
220 other.best_choice_is_dict_and_top_choice_;
221 if (other.lattice_data_ !=
nullptr) {
222 lattice_data_ =
new char[other.lattice_size_];
223 memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
224 lattice_size_ = other.lattice_size_;
226 lattice_data_ =
nullptr;
◆ CopyTruth()
Definition at line 204 of file blamer.h.
205 truth_has_char_boxes_ = other.truth_has_char_boxes_;
206 truth_word_ = other.truth_word_;
207 truth_text_ = other.truth_text_;
208 incorrect_result_reason_ =
◆ correct_segmentation_length()
int BlamerBundle::correct_segmentation_length |
( |
| ) |
const |
|
inline |
Definition at line 141 of file blamer.h.
142 return correct_segmentation_cols_.
size();
◆ debug()
const STRING& BlamerBundle::debug |
( |
| ) |
const |
|
inline |
◆ FillDebugString()
Definition at line 129 of file blamer.cpp.
132 (*debug) +=
"Truth ";
133 for (
int i = 0; i < this->truth_text_.
size(); ++i) {
134 (*debug) += this->truth_text_[i];
136 if (!this->truth_has_char_boxes_) (*debug) +=
" (no char boxes)";
137 if (choice !=
nullptr) {
138 (*debug) +=
" Choice ";
141 (*debug) += choice_str;
◆ FinishSegSearch()
void BlamerBundle::FinishSegSearch |
( |
const WERD_CHOICE * |
best_choice, |
|
|
bool |
debug, |
|
|
STRING * |
debug_str |
|
) |
| |
Definition at line 517 of file blamer.cpp.
530 if (segsearch_is_looking_for_blame_) {
531 segsearch_is_looking_for_blame_ =
false;
532 if (best_choice_is_dict_and_top_choice_) {
533 *debug_str =
"Best choice is: incorrect, top choice, dictionary word";
534 *debug_str +=
" with permuter ";
537 }
else if (best_correctly_segmented_rating_ <
539 *debug_str +=
"Correct segmentation state was not explored";
542 if (best_correctly_segmented_rating_ >=
544 *debug_str +=
"Correct segmentation paths were pruned by LM\n";
547 best_correctly_segmented_rating_);
◆ GuidedSegsearchNeeded()
bool BlamerBundle::GuidedSegsearchNeeded |
( |
const WERD_CHOICE * |
best_choice | ) |
const |
Definition at line 469 of file blamer.cpp.
471 !segsearch_is_looking_for_blame_ &&
472 truth_has_char_boxes_ &&
◆ GuidedSegsearchStillGoing()
bool BlamerBundle::GuidedSegsearchStillGoing |
( |
| ) |
const |
Definition at line 512 of file blamer.cpp.
513 return segsearch_is_looking_for_blame_;
◆ HasDebugInfo()
bool BlamerBundle::HasDebugInfo |
( |
| ) |
const |
|
inline |
◆ incorrect_result_reason()
Definition at line 121 of file blamer.h.
122 return incorrect_result_reason_;
◆ IncorrectReason()
const char * BlamerBundle::IncorrectReason |
( |
| ) |
const |
◆ IncorrectReasonName()
◆ InitForSegSearch()
Definition at line 478 of file blamer.cpp.
484 segsearch_is_looking_for_blame_ =
true;
486 tprintf(
"segsearch starting to look for blame\n");
490 *debug_str +=
"Correct segmentation:\n";
491 for (
int idx = 0; idx < correct_segmentation_cols_.
size(); ++idx) {
492 debug_str->
add_str_int(
"col=", correct_segmentation_cols_[idx]);
493 debug_str->
add_str_int(
" row=", correct_segmentation_rows_[idx]);
495 if (!ratings->
Classified(correct_segmentation_cols_[idx],
496 correct_segmentation_rows_[idx],
499 correct_segmentation_rows_[idx],
501 false, max_char_wh_ratio, word_res)) {
502 segsearch_is_looking_for_blame_ =
false;
503 *debug_str +=
"\nFailed to insert pain point\n";
◆ JoinBlames()
Definition at line 231 of file blamer.cpp.
236 if (bundle1.incorrect_result_reason_ !=
IRR_CORRECT &&
239 debug_str +=
"Blame from part 1: ";
240 debug_str += bundle1.debug_;
241 irr = bundle1.incorrect_result_reason_;
243 if (bundle2.incorrect_result_reason_ !=
IRR_CORRECT &&
246 debug_str +=
"Blame from part 2: ";
247 debug_str += bundle2.debug_;
249 irr = bundle2.incorrect_result_reason_;
250 }
else if (irr != bundle2.incorrect_result_reason_) {
254 incorrect_result_reason_ = irr;
256 SetBlame(irr, debug_str,
nullptr,
debug);
◆ LastChanceBlame()
void BlamerBundle::LastChanceBlame |
( |
bool |
debug, |
|
|
WERD_RES * |
word |
|
) |
| |
|
static |
Definition at line 558 of file blamer.cpp.
570 STRING debug_str =
"Choice is incorrect after recognition";
◆ lattice_data()
const char* BlamerBundle::lattice_data |
( |
| ) |
const |
|
inline |
Definition at line 153 of file blamer.h.
154 return lattice_data_;
◆ lattice_size()
int BlamerBundle::lattice_size |
( |
| ) |
const |
|
inline |
Definition at line 156 of file blamer.h.
157 return lattice_size_;
◆ MatrixPositionCorrect()
bool BlamerBundle::MatrixPositionCorrect |
( |
int |
index, |
|
|
const MATRIX_COORD & |
coord |
|
) |
| |
|
inline |
Definition at line 146 of file blamer.h.
147 return correct_segmentation_cols_[index] == coord.
col &&
148 correct_segmentation_rows_[index] == coord.
row;
◆ misadaption_debug()
const STRING& BlamerBundle::misadaption_debug |
( |
| ) |
const |
|
inline |
Definition at line 134 of file blamer.h.
135 return misadaption_debug_;
◆ NoTruth()
bool BlamerBundle::NoTruth |
( |
| ) |
const |
|
inline |
◆ params_training_bundle()
Definition at line 166 of file blamer.h.
167 return params_training_bundle_;
◆ set_best_choice_is_dict_and_top_choice()
void BlamerBundle::set_best_choice_is_dict_and_top_choice |
( |
bool |
value | ) |
|
|
inline |
Definition at line 150 of file blamer.h.
151 best_choice_is_dict_and_top_choice_ = value;
◆ set_lattice_data()
void BlamerBundle::set_lattice_data |
( |
const char * |
data, |
|
|
int |
size |
|
) |
| |
|
inline |
Definition at line 159 of file blamer.h.
160 lattice_size_ = size;
161 delete [] lattice_data_;
162 lattice_data_ =
new char[lattice_size_];
163 memcpy(lattice_data_, data, lattice_size_);
◆ SetChopperBlame()
void BlamerBundle::SetChopperBlame |
( |
const WERD_RES * |
word, |
|
|
bool |
debug |
|
) |
| |
Definition at line 316 of file blamer.cpp.
317 if (
NoTruth() || !truth_has_char_boxes_ ||
322 bool missing_chop =
false;
326 int16_t truth_x = -1;
327 while (box_index < truth_word_.
length() && blob_index < num_blobs) {
334 truth_x + norm_box_tolerance_) {
341 if (missing_chop || box_index < norm_truth_word_.
length()) {
344 debug_str.
add_str_int(
"Detected missing chop (tolerance=",
345 norm_box_tolerance_);
346 debug_str +=
") at Bounding Box=";
349 debug_str.
add_str_int(
"\nNo chop for truth at x=", truth_x);
352 norm_truth_word_.
length() - box_index);
353 debug_str +=
" truth box(es)";
355 debug_str +=
"\nMaximally chopped word boxes:\n";
356 for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
361 debug_str +=
"Truth bounding boxes:\n";
362 for (box_index = 0; box_index < norm_truth_word_.
length(); ++box_index) {
◆ SetMisAdaptionDebug()
void BlamerBundle::SetMisAdaptionDebug |
( |
const WERD_CHOICE * |
best_choice, |
|
|
bool |
debug |
|
) |
| |
Definition at line 585 of file blamer.cpp.
589 misadaption_debug_ =
"misadapt to word (";
591 misadaption_debug_ +=
"): ";
◆ SetRejectedTruth()
void BlamerBundle::SetRejectedTruth |
( |
| ) |
|
Definition at line 111 of file blamer.cpp.
113 truth_has_char_boxes_ =
false;
◆ SetSymbolTruth()
void BlamerBundle::SetSymbolTruth |
( |
const UNICHARSET & |
unicharset, |
|
|
const char * |
char_str, |
|
|
const TBOX & |
char_box |
|
) |
| |
Definition at line 92 of file blamer.cpp.
94 STRING symbol_str(char_str);
96 if (
id != INVALID_UNICHAR_ID) {
98 if (normed_uch.length() > 0) symbol_str = normed_uch;
100 int length = truth_word_.
length();
104 truth_has_char_boxes_ =
true;
105 else if (truth_word_.
BlobBox(length - 1) == char_box)
106 truth_has_char_boxes_ =
false;
◆ SetupCorrectSegmentation()
void BlamerBundle::SetupCorrectSegmentation |
( |
const TWERD * |
word, |
|
|
bool |
debug |
|
) |
| |
Definition at line 413 of file blamer.cpp.
414 #ifndef DISABLED_LEGACY_ENGINE
416 #endif // ndef DISABLED_LEGACY_ENGINE
417 if (incorrect_result_reason_ !=
IRR_CORRECT || !truth_has_char_boxes_)
421 debug_str +=
"Blamer computing correct_segmentation_cols\n";
422 int curr_box_col = 0;
423 int next_box_col = 0;
425 if (num_blobs == 0)
return;
427 int16_t next_box_x = word->
blobs[blob_index]->bounding_box().right();
428 for (
int truth_idx = 0; blob_index < num_blobs &&
429 truth_idx < norm_truth_word_.
length();
432 int16_t curr_box_x = next_box_x;
433 if (blob_index + 1 < num_blobs)
434 next_box_x = word->
blobs[blob_index + 1]->bounding_box().right();
435 int16_t truth_x = norm_truth_word_.
BlobBox(truth_idx).
right();
436 debug_str.
add_str_int(
"Box x coord vs. truth: ", curr_box_x);
439 if (curr_box_x > (truth_x + norm_box_tolerance_)) {
441 }
else if (curr_box_x >= truth_x - norm_box_tolerance_ &&
442 (blob_index + 1 >= num_blobs ||
443 next_box_x > truth_x + norm_box_tolerance_)) {
444 correct_segmentation_cols_.
push_back(curr_box_col);
445 correct_segmentation_rows_.
push_back(next_box_col-1);
450 curr_box_col = next_box_col;
453 if (blob_index < num_blobs ||
454 correct_segmentation_cols_.
size() != norm_truth_word_.
length()) {
455 debug_str.
add_str_int(
"Blamer failed to find correct segmentation"
456 " (tolerance=", norm_box_tolerance_);
457 if (blob_index >= num_blobs) debug_str +=
" blob == nullptr";
459 debug_str.
add_str_int(
" path length ", correct_segmentation_cols_.
size());
463 correct_segmentation_cols_.
clear();
464 correct_segmentation_rows_.
clear();
◆ SetupNormTruthWord()
void BlamerBundle::SetupNormTruthWord |
( |
const DENORM & |
denorm | ) |
|
Definition at line 151 of file blamer.cpp.
153 norm_box_tolerance_ = kBlamerBoxTolerance * denorm.
x_scale();
158 for (
int b = 0; b < truth_word_.
length(); ++b) {
160 topleft.
x = box.
left();
161 topleft.
y = box.
top();
166 TBOX norm_box(norm_topleft.
x, norm_botright.
y,
167 norm_botright.
x, norm_topleft.
y);
◆ SetWordTruth()
void BlamerBundle::SetWordTruth |
( |
const UNICHARSET & |
unicharset, |
|
|
const char * |
truth_str, |
|
|
const TBOX & |
word_box |
|
) |
| |
Definition at line 72 of file blamer.cpp.
75 truth_has_char_boxes_ =
false;
79 unicharset.
encode_string(truth_str,
false, &encoding, &lengths,
nullptr);
81 for (
int i = 0; i < encoding.
size(); total_length += lengths[i++]) {
82 STRING uch(truth_str + total_length);
83 uch.truncate_at(lengths[i] - total_length);
◆ SplitBundle()
void BlamerBundle::SplitBundle |
( |
int |
word1_right, |
|
|
int |
word2_left, |
|
|
bool |
debug, |
|
|
BlamerBundle * |
bundle1, |
|
|
BlamerBundle * |
bundle2 |
|
) |
| const |
Definition at line 175 of file blamer.cpp.
181 int begin2_truth_index = -1;
183 truth_has_char_boxes_) {
184 debug_str =
"Looking for truth split at";
187 debug_str +=
"\nnorm_truth_word boxes:\n";
188 if (norm_truth_word_.
length() > 1) {
190 for (b = 1; b < norm_truth_word_.
length(); ++b) {
192 if ((abs(word1_right - norm_truth_word_.
BlobBox(b - 1).
right()) <
193 norm_box_tolerance_) &&
194 (abs(word2_left - norm_truth_word_.
BlobBox(b).
left()) <
195 norm_box_tolerance_)) {
196 begin2_truth_index = b;
197 debug_str +=
"Split found";
206 if (begin2_truth_index > 0) {
207 bundle1->truth_has_char_boxes_ =
true;
208 bundle1->norm_box_tolerance_ = norm_box_tolerance_;
209 bundle2->truth_has_char_boxes_ =
true;
210 bundle2->norm_box_tolerance_ = norm_box_tolerance_;
212 for (b = 0; b < norm_truth_word_.
length(); ++b) {
213 if (b == begin2_truth_index) curr_bb = bundle2;
216 curr_bb->truth_text_.
push_back(truth_text_[b]);
222 debug_str +=
"Truth split not found";
223 debug_str += truth_has_char_boxes_ ?
224 "\n" :
" (no truth char boxes)\n";
◆ TruthString()
STRING BlamerBundle::TruthString |
( |
| ) |
const |
|
inline |
Definition at line 115 of file blamer.h.
117 for (
int i = 0; i < truth_text_.
size(); ++i)
118 truth_str += truth_text_[i];
◆ UpdateBestRating()
void BlamerBundle::UpdateBestRating |
( |
float |
rating | ) |
|
|
inline |
Definition at line 137 of file blamer.h.
138 if (rating < best_correctly_segmented_rating_)
139 best_correctly_segmented_rating_ = rating;
The documentation for this struct was generated from the following files:
static const char * permuter_name(uint8_t permuter)
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
STRING TruthString() const
void add_str_int(const char *str, int number)
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
UNICHAR_ID unichar_id(int index) const
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
bool x_almost_equal(const TBOX &box, int tolerance) const
const UNICHARSET * unicharset() const
static const float kBadRating
UNICHAR_ID unichar_id() const
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
ParamsTrainingHypothesis & AddHypothesis(const ParamsTrainingHypothesis &other)
void CopyTruth(const BlamerBundle &other)
BlamerBundle * blamer_bundle
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
const TBOX & BlobBox(int index) const
WERD_CHOICE * best_choice
const char * c_str() const
void StartHypothesisList()
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
GenericVector< TBLOB * > blobs
const char *const kIncorrectResultReasonNames[]
bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res)
void print_to_str(STRING *str) const
TBOX bounding_box() const
void InsertBox(int index, const TBOX &box)
const STRING & debug() const
void add_str_double(const char *str, double number)
void CopyResults(const BlamerBundle &other)
DLLSYM void tprintf(const char *format,...)
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
const char * id_to_unichar(UNICHAR_ID id) const
bool Classified(int col, int row, int wildcard_id) const