tesseract  5.0.0-alpha-619-ge9db
blamer.cpp
Go to the documentation of this file.
1 // File: blamer.cpp
3 // Description: Module allowing precise error causes to be allocated.
4 // Author: Rike Antonova
5 // Refactored: Ray Smith
6 //
7 // (C) Copyright 2013, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "blamer.h"
21 #include <cmath> // for abs
22 #include <cstdlib> // for abs
23 #include "blobs.h" // for TPOINT, TWERD, TBLOB
24 #include "errcode.h" // for ASSERT_HOST
25 #if !defined(DISABLED_LEGACY_ENGINE)
26 #include "lm_pain_points.h" // for LMPainPoints
27 #endif
28 #include "matrix.h" // for MATRIX
29 #include "normalis.h" // for DENORM
30 #include "pageres.h" // for WERD_RES
31 #include "unicharset.h" // for UNICHARSET
32 
33 // Names for each value of IncorrectResultReason enum. Keep in sync.
34 const char kBlameCorrect[] = "corr";
35 const char kBlameClassifier[] = "cl";
36 const char kBlameChopper[] = "chop";
37 const char kBlameClassLMTradeoff[] = "cl/LM";
38 const char kBlamePageLayout[] = "pglt";
39 const char kBlameSegsearchHeur[] = "ss_heur";
40 const char kBlameSegsearchPP[] = "ss_pp";
41 const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
42 const char kBlameAdaption[] = "adapt";
43 const char kBlameNoTruthSplit[] = "no_tr_spl";
44 const char kBlameNoTruth[] = "no_tr";
45 const char kBlameUnknown[] = "unkn";
46 
47 const char * const kIncorrectResultReasonNames[] = {
60 };
61 
63  return kIncorrectResultReasonNames[irr];
64 }
65 
66 const char *BlamerBundle::IncorrectReason() const {
67  return kIncorrectResultReasonNames[incorrect_result_reason_];
68 }
69 
70 // Functions to setup the blamer.
71 // Whole word string, whole word bounding box.
72 void BlamerBundle::SetWordTruth(const UNICHARSET& unicharset,
73  const char* truth_str, const TBOX& word_box) {
74  truth_word_.InsertBox(0, word_box);
75  truth_has_char_boxes_ = false;
76  // Encode the string as UNICHAR_IDs.
78  GenericVector<char> lengths;
79  unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
80  int total_length = 0;
81  for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
82  STRING uch(truth_str + total_length);
83  uch.truncate_at(lengths[i] - total_length);
84  UNICHAR_ID id = encoding[i];
85  if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
86  truth_text_.push_back(uch);
87  }
88 }
89 
90 // Single "character" string, "character" bounding box.
91 // May be called multiple times to indicate the characters in a word.
93  const char* char_str, const TBOX& char_box) {
94  STRING symbol_str(char_str);
95  UNICHAR_ID id = unicharset.unichar_to_id(char_str);
96  if (id != INVALID_UNICHAR_ID) {
97  STRING normed_uch(unicharset.get_normed_unichar(id));
98  if (normed_uch.length() > 0) symbol_str = normed_uch;
99  }
100  int length = truth_word_.length();
101  truth_text_.push_back(symbol_str);
102  truth_word_.InsertBox(length, char_box);
103  if (length == 0)
104  truth_has_char_boxes_ = true;
105  else if (truth_word_.BlobBox(length - 1) == char_box)
106  truth_has_char_boxes_ = false;
107 }
108 
109 // Marks that there is something wrong with the truth text, like it contains
110 // reject characters.
112  incorrect_result_reason_ = IRR_NO_TRUTH;
113  truth_has_char_boxes_ = false;
114 }
115 
116 // Returns true if the provided word_choice is correct.
117 bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE* word_choice) const {
118  if (word_choice == nullptr) return false;
119  const UNICHARSET* uni_set = word_choice->unicharset();
120  STRING normed_choice_str;
121  for (int i = 0; i < word_choice->length(); ++i) {
122  normed_choice_str +=
123  uni_set->get_normed_unichar(word_choice->unichar_id(i));
124  }
125  STRING truth_str = TruthString();
126  return truth_str == normed_choice_str;
127 }
128 
130  const WERD_CHOICE *choice,
131  STRING *debug) {
132  (*debug) += "Truth ";
133  for (int i = 0; i < this->truth_text_.size(); ++i) {
134  (*debug) += this->truth_text_[i];
135  }
136  if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
137  if (choice != nullptr) {
138  (*debug) += " Choice ";
139  STRING choice_str;
140  choice->string_and_lengths(&choice_str, nullptr);
141  (*debug) += choice_str;
142  }
143  if (msg.length() > 0) {
144  (*debug) += "\n";
145  (*debug) += msg;
146  }
147  (*debug) += "\n";
148 }
149 
150 // Sets up the norm_truth_word from truth_word using the given DENORM.
152  // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
153  norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
154  TPOINT topleft;
155  TPOINT botright;
156  TPOINT norm_topleft;
157  TPOINT norm_botright;
158  for (int b = 0; b < truth_word_.length(); ++b) {
159  const TBOX &box = truth_word_.BlobBox(b);
160  topleft.x = box.left();
161  topleft.y = box.top();
162  botright.x = box.right();
163  botright.y = box.bottom();
164  denorm.NormTransform(nullptr, topleft, &norm_topleft);
165  denorm.NormTransform(nullptr, botright, &norm_botright);
166  TBOX norm_box(norm_topleft.x, norm_botright.y,
167  norm_botright.x, norm_topleft.y);
168  norm_truth_word_.InsertBox(b, norm_box);
169  }
170 }
171 
172 // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
173 // bundles) where the right edge/ of the left-hand word is word1_right,
174 // and the left edge of the right-hand word is word2_left.
175 void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug,
176  BlamerBundle* bundle1,
177  BlamerBundle* bundle2) const {
178  STRING debug_str;
179  // Find truth boxes that correspond to the split in the blobs.
180  int b;
181  int begin2_truth_index = -1;
182  if (incorrect_result_reason_ != IRR_NO_TRUTH &&
183  truth_has_char_boxes_) {
184  debug_str = "Looking for truth split at";
185  debug_str.add_str_int(" end1_x ", word1_right);
186  debug_str.add_str_int(" begin2_x ", word2_left);
187  debug_str += "\nnorm_truth_word boxes:\n";
188  if (norm_truth_word_.length() > 1) {
189  norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
190  for (b = 1; b < norm_truth_word_.length(); ++b) {
191  norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
192  if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
193  norm_box_tolerance_) &&
194  (abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
195  norm_box_tolerance_)) {
196  begin2_truth_index = b;
197  debug_str += "Split found";
198  break;
199  }
200  }
201  debug_str += '\n';
202  }
203  }
204  // Populate truth information in word and word2 with the first and second
205  // part of the original truth.
206  if (begin2_truth_index > 0) {
207  bundle1->truth_has_char_boxes_ = true;
208  bundle1->norm_box_tolerance_ = norm_box_tolerance_;
209  bundle2->truth_has_char_boxes_ = true;
210  bundle2->norm_box_tolerance_ = norm_box_tolerance_;
211  BlamerBundle *curr_bb = bundle1;
212  for (b = 0; b < norm_truth_word_.length(); ++b) {
213  if (b == begin2_truth_index) curr_bb = bundle2;
214  curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
215  curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
216  curr_bb->truth_text_.push_back(truth_text_[b]);
217  }
218  } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
219  bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
220  bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
221  } else {
222  debug_str += "Truth split not found";
223  debug_str += truth_has_char_boxes_ ?
224  "\n" : " (no truth char boxes)\n";
225  bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
226  bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
227  }
228 }
229 
230 // "Joins" the blames from bundle1 and bundle2 into *this.
232  const BlamerBundle& bundle2, bool debug) {
233  STRING debug_str;
234  IncorrectResultReason irr = incorrect_result_reason_;
235  if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
236  if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
237  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
238  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
239  debug_str += "Blame from part 1: ";
240  debug_str += bundle1.debug_;
241  irr = bundle1.incorrect_result_reason_;
242  }
243  if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
244  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
245  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
246  debug_str += "Blame from part 2: ";
247  debug_str += bundle2.debug_;
248  if (irr == IRR_CORRECT) {
249  irr = bundle2.incorrect_result_reason_;
250  } else if (irr != bundle2.incorrect_result_reason_) {
251  irr = IRR_UNKNOWN;
252  }
253  }
254  incorrect_result_reason_ = irr;
255  if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
256  SetBlame(irr, debug_str, nullptr, debug);
257  }
258 }
259 
260 // If a blob with the same bounding box as one of the truth character
261 // bounding boxes is not classified as the corresponding truth character
262 // blames character classifier for incorrect answer.
264  const TBOX& blob_box,
265  const BLOB_CHOICE_LIST& choices,
266  bool debug) {
267  if (!truth_has_char_boxes_ ||
268  incorrect_result_reason_ != IRR_CORRECT)
269  return; // Nothing to do here.
270 
271  for (int b = 0; b < norm_truth_word_.length(); ++b) {
272  const TBOX &truth_box = norm_truth_word_.BlobBox(b);
273  // Note that we are more strict on the bounding box boundaries here
274  // than in other places (chopper, segmentation search), since we do
275  // not have the ability to check the previous and next bounding box.
276  if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) {
277  bool found = false;
278  bool incorrect_adapted = false;
279  UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
280  const char *truth_str = truth_text_[b].c_str();
281  // We promise not to modify the list or its contents, using a
282  // const BLOB_CHOICE* below.
283  BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
284  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
285  choices_it.forward()) {
286  const BLOB_CHOICE* choice = choices_it.data();
287  if (strcmp(truth_str, unicharset.get_normed_unichar(
288  choice->unichar_id())) == 0) {
289  found = true;
290  break;
291  } else if (choice->IsAdapted()) {
292  incorrect_adapted = true;
293  incorrect_adapted_id = choice->unichar_id();
294  }
295  } // end choices_it for loop
296  if (!found) {
297  STRING debug_str = "unichar ";
298  debug_str += truth_str;
299  debug_str += " not found in classification list";
300  SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
301  } else if (incorrect_adapted) {
302  STRING debug_str = "better rating for adapted ";
303  debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
304  debug_str += " than for correct ";
305  debug_str += truth_str;
306  SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
307  }
308  break;
309  }
310  } // end iterating over blamer_bundle->norm_truth_word
311 }
312 
313 // Checks whether chops were made at all the character bounding box
314 // boundaries in word->truth_word. If not - blames the chopper for an
315 // incorrect answer.
316 void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) {
317  if (NoTruth() || !truth_has_char_boxes_ ||
319  return;
320  }
321  STRING debug_str;
322  bool missing_chop = false;
323  int num_blobs = word->chopped_word->blobs.size();
324  int box_index = 0;
325  int blob_index = 0;
326  int16_t truth_x = -1;
327  while (box_index < truth_word_.length() && blob_index < num_blobs) {
328  truth_x = norm_truth_word_.BlobBox(box_index).right();
329  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
330  if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
331  ++blob_index;
332  continue; // encountered an extra chop, keep looking
333  } else if (curr_blob->bounding_box().right() >
334  truth_x + norm_box_tolerance_) {
335  missing_chop = true;
336  break;
337  } else {
338  ++blob_index;
339  }
340  }
341  if (missing_chop || box_index < norm_truth_word_.length()) {
342  STRING debug_str;
343  if (missing_chop) {
344  debug_str.add_str_int("Detected missing chop (tolerance=",
345  norm_box_tolerance_);
346  debug_str += ") at Bounding Box=";
347  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
348  curr_blob->bounding_box().print_to_str(&debug_str);
349  debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
350  } else {
351  debug_str.add_str_int("Missing chops for last ",
352  norm_truth_word_.length() - box_index);
353  debug_str += " truth box(es)";
354  }
355  debug_str += "\nMaximally chopped word boxes:\n";
356  for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
357  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
358  curr_blob->bounding_box().print_to_str(&debug_str);
359  debug_str += '\n';
360  }
361  debug_str += "Truth bounding boxes:\n";
362  for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
363  norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
364  debug_str += '\n';
365  }
366  SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
367  }
368 }
369 
370 // Blames the classifier or the language model if, after running only the
371 // chopper, best_choice is incorrect and no blame has been yet set.
372 // Blames the classifier if best_choice is classifier's top choice and is a
373 // dictionary word (i.e. language model could not have helped).
374 // Otherwise, blames the language model (formerly permuter word adjustment).
376  const WERD_RES* word,
377  const UNICHARSET& unicharset, bool valid_permuter, bool debug) {
378  if (valid_permuter) {
379  // Find out whether best choice is a top choice.
380  best_choice_is_dict_and_top_choice_ = true;
381  for (int i = 0; i < word->best_choice->length(); ++i) {
382  BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
383  ASSERT_HOST(!blob_choice_it.empty());
384  BLOB_CHOICE *first_choice = nullptr;
385  for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
386  blob_choice_it.forward()) { // find first non-fragment choice
387  if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
388  first_choice = blob_choice_it.data();
389  break;
390  }
391  }
392  ASSERT_HOST(first_choice != nullptr);
393  if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
394  best_choice_is_dict_and_top_choice_ = false;
395  break;
396  }
397  }
398  }
399  STRING debug_str;
400  if (best_choice_is_dict_and_top_choice_) {
401  debug_str = "Best choice is: incorrect, top choice, dictionary word";
402  debug_str += " with permuter ";
403  debug_str += word->best_choice->permuter_name();
404  } else {
405  debug_str = "Classifier/Old LM tradeoff is to blame";
406  }
407  SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
409  debug_str, word->best_choice, debug);
410 }
411 
412 // Sets up the correct_segmentation_* to mark the correct bounding boxes.
414 #ifndef DISABLED_LEGACY_ENGINE
415  params_training_bundle_.StartHypothesisList();
416 #endif // ndef DISABLED_LEGACY_ENGINE
417  if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
418  return; // Nothing to do here.
419 
420  STRING debug_str;
421  debug_str += "Blamer computing correct_segmentation_cols\n";
422  int curr_box_col = 0;
423  int next_box_col = 0;
424  int num_blobs = word->NumBlobs();
425  if (num_blobs == 0) return; // No blobs to play with.
426  int blob_index = 0;
427  int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
428  for (int truth_idx = 0; blob_index < num_blobs &&
429  truth_idx < norm_truth_word_.length();
430  ++blob_index) {
431  ++next_box_col;
432  int16_t curr_box_x = next_box_x;
433  if (blob_index + 1 < num_blobs)
434  next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
435  int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
436  debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
437  debug_str.add_str_int(" ", truth_x);
438  debug_str += "\n";
439  if (curr_box_x > (truth_x + norm_box_tolerance_)) {
440  break; // failed to find a matching box
441  } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
442  (blob_index + 1 >= num_blobs || // next box can't be included
443  next_box_x > truth_x + norm_box_tolerance_)) {
444  correct_segmentation_cols_.push_back(curr_box_col);
445  correct_segmentation_rows_.push_back(next_box_col-1);
446  ++truth_idx;
447  debug_str.add_str_int("col=", curr_box_col);
448  debug_str.add_str_int(" row=", next_box_col-1);
449  debug_str += "\n";
450  curr_box_col = next_box_col;
451  }
452  }
453  if (blob_index < num_blobs || // trailing blobs
454  correct_segmentation_cols_.size() != norm_truth_word_.length()) {
455  debug_str.add_str_int("Blamer failed to find correct segmentation"
456  " (tolerance=", norm_box_tolerance_);
457  if (blob_index >= num_blobs) debug_str += " blob == nullptr";
458  debug_str += ")\n";
459  debug_str.add_str_int(" path length ", correct_segmentation_cols_.size());
460  debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
461  debug_str += "\n";
462  SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
463  correct_segmentation_cols_.clear();
464  correct_segmentation_rows_.clear();
465  }
466 }
467 
468 // Returns true if a guided segmentation search is needed.
469 bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
470  return incorrect_result_reason_ == IRR_CORRECT &&
471  !segsearch_is_looking_for_blame_ &&
472  truth_has_char_boxes_ &&
473  !ChoiceIsCorrect(best_choice);
474 }
475 
476 #if !defined(DISABLED_LEGACY_ENGINE)
477 // Setup ready to guide the segmentation search to the correct segmentation.
479  MATRIX* ratings, UNICHAR_ID wildcard_id,
480  bool debug, STRING* debug_str,
481  tesseract::LMPainPoints* pain_points,
482  double max_char_wh_ratio,
483  WERD_RES* word_res) {
484  segsearch_is_looking_for_blame_ = true;
485  if (debug) {
486  tprintf("segsearch starting to look for blame\n");
487  }
488  // Fill pain points for any unclassifed blob corresponding to the
489  // correct segmentation state.
490  *debug_str += "Correct segmentation:\n";
491  for (int idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
492  debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
493  debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
494  *debug_str += "\n";
495  if (!ratings->Classified(correct_segmentation_cols_[idx],
496  correct_segmentation_rows_[idx],
497  wildcard_id) &&
498  !pain_points->GeneratePainPoint(correct_segmentation_cols_[idx],
499  correct_segmentation_rows_[idx],
501  false, max_char_wh_ratio, word_res)) {
502  segsearch_is_looking_for_blame_ = false;
503  *debug_str += "\nFailed to insert pain point\n";
504  SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
505  break;
506  }
507  } // end for blamer_bundle->correct_segmentation_cols/rows
508 }
509 #endif // !defined(DISABLED_LEGACY_ENGINE)
510 
511 // Returns true if the guided segsearch is in progress.
513  return segsearch_is_looking_for_blame_;
514 }
515 
516 // The segmentation search has ended. Sets the blame appropriately.
518  bool debug, STRING *debug_str) {
519  // If we are still looking for blame (i.e. best_choice is incorrect, but a
520  // path representing the correct segmentation could be constructed), we can
521  // blame segmentation search pain point prioritization if the rating of the
522  // path corresponding to the correct segmentation is better than that of
523  // best_choice (i.e. language model would have done the correct thing, but
524  // because of poor pain point prioritization the correct segmentation was
525  // never explored). Otherwise we blame the tradeoff between the language model
526  // and the classifier, since even after exploring the path corresponding to
527  // the correct segmentation incorrect best_choice would have been chosen.
528  // One special case when we blame the classifier instead is when best choice
529  // is incorrect, but it is a dictionary word and it classifier's top choice.
530  if (segsearch_is_looking_for_blame_) {
531  segsearch_is_looking_for_blame_ = false;
532  if (best_choice_is_dict_and_top_choice_) {
533  *debug_str = "Best choice is: incorrect, top choice, dictionary word";
534  *debug_str += " with permuter ";
535  *debug_str += best_choice->permuter_name();
536  SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
537  } else if (best_correctly_segmented_rating_ <
538  best_choice->rating()) {
539  *debug_str += "Correct segmentation state was not explored";
540  SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
541  } else {
542  if (best_correctly_segmented_rating_ >=
544  *debug_str += "Correct segmentation paths were pruned by LM\n";
545  } else {
546  debug_str->add_str_double("Best correct segmentation rating ",
547  best_correctly_segmented_rating_);
548  debug_str->add_str_double(" vs. best choice rating ",
549  best_choice->rating());
550  }
551  SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
552  }
553  }
554 }
555 
556 // If the bundle is null or still does not indicate the correct result,
557 // fix it and use some backup reason for the blame.
559  if (word->blamer_bundle == nullptr) {
561  word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
563  } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
564  word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
566  } else {
568  IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
569  if (irr == IRR_CORRECT && !correct) {
570  STRING debug_str = "Choice is incorrect after recognition";
571  word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
572  debug);
573  } else if (irr != IRR_CORRECT && correct) {
574  if (debug) {
575  tprintf("Corrected %s\n", word->blamer_bundle->debug_.c_str());
576  }
577  word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
578  word->blamer_bundle->debug_ = "";
579  }
580  }
581 }
582 
583 // Sets the misadaption debug if this word is incorrect, as this word is
584 // being adapted to.
586  bool debug) {
587  if (incorrect_result_reason_ != IRR_NO_TRUTH &&
588  !ChoiceIsCorrect(best_choice)) {
589  misadaption_debug_ ="misadapt to word (";
590  misadaption_debug_ += best_choice->permuter_name();
591  misadaption_debug_ += "): ";
592  FillDebugString("", best_choice, &misadaption_debug_);
593  if (debug) {
594  tprintf("%s\n", misadaption_debug_.c_str());
595  }
596  }
597 }
BlamerBundle::BlamerBundle
BlamerBundle()
Definition: blamer.h:105
WERD_CHOICE::permuter_name
static const char * permuter_name(uint8_t permuter)
Definition: ratngs.cpp:196
IRR_SEGSEARCH_PP
Definition: blamer.h:82
BlamerBundle::BlameClassifierOrLangModel
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:375
IRR_NO_TRUTH_SPLIT
Definition: blamer.h:91
BlamerBundle::ChoiceIsCorrect
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:117
normalis.h
BlamerBundle::TruthString
STRING TruthString() const
Definition: blamer.h:115
pageres.h
STRING::add_str_int
void add_str_int(const char *str, int number)
Definition: strngs.cpp:370
BlamerBundle::SetSymbolTruth
void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box)
Definition: blamer.cpp:92
TPOINT
Definition: blobs.h:49
UNICHARSET::encode_string
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
Definition: unicharset.cpp:258
BlamerBundle::BlameClassifier
void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug)
Definition: blamer.cpp:263
WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:303
WERD_CHOICE
Definition: ratngs.h:261
DENORM::NormTransform
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:334
TWERD
Definition: blobs.h:416
IRR_UNKNOWN
Definition: blamer.h:97
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
WERD_RES::GetBlobChoices
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:755
BlamerBundle::SetupNormTruthWord
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:151
MATRIX
Definition: matrix.h:574
TBOX::top
int16_t top() const
Definition: rect.h:57
STRING
Definition: strngs.h:45
STRING::truncate_at
void truncate_at(int32_t index)
Definition: strngs.cpp:258
WERD_RES
Definition: pageres.h:160
TBOX::x_almost_equal
bool x_almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:245
IncorrectResultReason
IncorrectResultReason
Definition: blamer.h:52
BlamerBundle::SetChopperBlame
void SetChopperBlame(const WERD_RES *word, bool debug)
Definition: blamer.cpp:316
kBlameSegsearchPP
const char kBlameSegsearchPP[]
Definition: blamer.cpp:40
WERD_CHOICE::unicharset
const UNICHARSET * unicharset() const
Definition: ratngs.h:288
WERD_CHOICE::kBadRating
static const float kBadRating
Definition: ratngs.h:263
blobs.h
BLOB_CHOICE::unichar_id
UNICHAR_ID unichar_id() const
Definition: ratngs.h:75
UNICHARSET::get_normed_unichar
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:818
BlamerBundle::JoinBlames
void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug)
Definition: blamer.cpp:231
BlamerBundle::FillDebugString
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
Definition: blamer.cpp:129
IRR_CHOPPER
Definition: blamer.h:62
kBlameCorrect
const char kBlameCorrect[]
Definition: blamer.cpp:34
WERD_RES::blamer_bundle
BlamerBundle * blamer_bundle
Definition: pageres.h:246
BlamerBundle::SetWordTruth
void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box)
Definition: blamer.cpp:72
BlamerBundle::IncorrectReasonName
static const char * IncorrectReasonName(IncorrectResultReason irr)
Definition: blamer.cpp:62
kBlameUnknown
const char kBlameUnknown[]
Definition: blamer.cpp:45
WERD_CHOICE::string_and_lengths
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:451
kBlameClassOldLMTradeoff
const char kBlameClassOldLMTradeoff[]
Definition: blamer.cpp:41
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
kBlameSegsearchHeur
const char kBlameSegsearchHeur[]
Definition: blamer.cpp:39
IRR_CLASS_LM_TRADEOFF
Definition: blamer.h:69
IRR_ADAPTION
Definition: blamer.h:89
tesseract::BoxWord::BlobBox
const TBOX & BlobBox(int index) const
Definition: boxword.h:83
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
kBlamePageLayout
const char kBlamePageLayout[]
Definition: blamer.cpp:38
IRR_CLASSIFIER
Definition: blamer.h:59
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
unicharset.h
TPOINT::x
int16_t x
Definition: blobs.h:91
tesseract::ParamsTrainingBundle::StartHypothesisList
void StartHypothesisList()
Definition: params_training_featdef.h:137
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
BlamerBundle::GuidedSegsearchStillGoing
bool GuidedSegsearchStillGoing() const
Definition: blamer.cpp:512
TPOINT::y
int16_t y
Definition: blobs.h:92
matrix.h
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:457
BlamerBundle::InitForSegSearch
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, tesseract::LMPainPoints *pain_points, double max_char_wh_ratio, WERD_RES *word_res)
Definition: blamer.cpp:478
kIncorrectResultReasonNames
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:47
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
UNICHARSET
Definition: unicharset.h:145
kBlameChopper
const char kBlameChopper[]
Definition: blamer.cpp:36
tesseract::LMPainPoints::GeneratePainPoint
bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res)
Definition: lm_pain_points.cpp:148
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::LMPainPoints
Definition: lm_pain_points.h:56
WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:206
tesseract::LM_PPTYPE_BLAMER
Definition: lm_pain_points.h:41
IRR_CORRECT
Definition: blamer.h:54
IRR_NO_TRUTH
Definition: blamer.h:94
kBlameNoTruth
const char kBlameNoTruth[]
Definition: blamer.cpp:44
PAGE_RES_IT::word
WERD_RES * word() const
Definition: pageres.h:748
TBOX::print_to_str
void print_to_str(STRING *str) const
Definition: rect.cpp:169
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
GenericVector< UNICHAR_ID >
tesseract::BoxWord::InsertBox
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
IRR_PAGE_LAYOUT
Definition: blamer.h:73
BlamerBundle::SetMisAdaptionDebug
void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug)
Definition: blamer.cpp:585
STRING::length
int32_t length() const
Definition: strngs.cpp:187
BlamerBundle::debug
const STRING & debug() const
Definition: blamer.h:131
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
BLOB_CHOICE
Definition: ratngs.h:49
tesseract::BoxWord::length
int length() const
Definition: boxword.h:82
kBlameClassifier
const char kBlameClassifier[]
Definition: blamer.cpp:35
TBLOB
Definition: blobs.h:282
TBOX::left
int16_t left() const
Definition: rect.h:71
STRING::add_str_double
void add_str_double(const char *str, double number)
Definition: strngs.cpp:380
BlamerBundle::IncorrectReason
const char * IncorrectReason() const
Definition: blamer.cpp:66
GenericVector::clear
void clear()
Definition: genericvector.h:857
DENORM::x_scale
float x_scale() const
Definition: normalis.h:266
TBOX::right
int16_t right() const
Definition: rect.h:78
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
kBlameClassLMTradeoff
const char kBlameClassLMTradeoff[]
Definition: blamer.cpp:37
BlamerBundle::SetRejectedTruth
void SetRejectedTruth()
Definition: blamer.cpp:111
UNICHARSET::get_fragment
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:724
errcode.h
BlamerBundle::NoTruth
bool NoTruth() const
Definition: blamer.h:124
BLOB_CHOICE::IsAdapted
bool IsAdapted() const
Definition: ratngs.h:130
lm_pain_points.h
WERD_CHOICE::rating
float rating() const
Definition: ratngs.h:315
IRR_CLASS_OLD_LM_TRADEOFF
Definition: blamer.h:86
UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
BlamerBundle::SplitBundle
void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const
Definition: blamer.cpp:175
BlamerBundle
Definition: blamer.h:103
BlamerBundle::GuidedSegsearchNeeded
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const
Definition: blamer.cpp:469
blamer.h
GenericVector::size
int size() const
Definition: genericvector.h:71
BlamerBundle::FinishSegSearch
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str)
Definition: blamer.cpp:517
kBlameNoTruthSplit
const char kBlameNoTruthSplit[]
Definition: blamer.cpp:43
IRR_SEGSEARCH_HEUR
Definition: blamer.h:76
kBlameAdaption
const char kBlameAdaption[]
Definition: blamer.cpp:42
BlamerBundle::SetupCorrectSegmentation
void SetupCorrectSegmentation(const TWERD *word, bool debug)
Definition: blamer.cpp:413
TBOX
Definition: rect.h:33
DENORM
Definition: normalis.h:49
MATRIX::Classified
bool Classified(int col, int row, int wildcard_id) const
Definition: matrix.cpp:34
BlamerBundle::LastChanceBlame
static void LastChanceBlame(bool debug, WERD_RES *word)
Definition: blamer.cpp:558