tesseract  4.0.0-1-g2a2b
BlamerBundle Struct Reference

#include <blamer.h>

Public Member Functions

 BlamerBundle ()
 
 BlamerBundle (const BlamerBundle &other)
 
 ~BlamerBundle ()
 
STRING TruthString () const
 
IncorrectResultReason incorrect_result_reason () const
 
bool NoTruth () const
 
bool HasDebugInfo () const
 
const STRINGdebug () const
 
const STRINGmisadaption_debug () const
 
void UpdateBestRating (float rating)
 
int correct_segmentation_length () const
 
bool MatrixPositionCorrect (int index, const MATRIX_COORD &coord)
 
void set_best_choice_is_dict_and_top_choice (bool value)
 
const char * lattice_data () const
 
int lattice_size () const
 
void set_lattice_data (const char *data, int size)
 
const tesseract::ParamsTrainingBundleparams_training_bundle () const
 
void AddHypothesis (const tesseract::ParamsTrainingHypothesis &hypo)
 
void SetWordTruth (const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box)
 
void SetSymbolTruth (const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box)
 
void SetRejectedTruth ()
 
bool ChoiceIsCorrect (const WERD_CHOICE *word_choice) const
 
void ClearResults ()
 
void CopyTruth (const BlamerBundle &other)
 
void CopyResults (const BlamerBundle &other)
 
const char * IncorrectReason () const
 
void FillDebugString (const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
 
void SetupNormTruthWord (const DENORM &denorm)
 
void SplitBundle (int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const
 
void JoinBlames (const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug)
 
void BlameClassifier (const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug)
 
void SetChopperBlame (const WERD_RES *word, bool debug)
 
void BlameClassifierOrLangModel (const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
 
void SetupCorrectSegmentation (const TWERD *word, bool debug)
 
bool GuidedSegsearchNeeded (const WERD_CHOICE *best_choice) const
 
void InitForSegSearch (const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, TessResultCallback2< bool, int, int > *pp_cb)
 
bool GuidedSegsearchStillGoing () const
 
void FinishSegSearch (const WERD_CHOICE *best_choice, bool debug, STRING *debug_str)
 
void SetMisAdaptionDebug (const WERD_CHOICE *best_choice, bool debug)
 

Static Public Member Functions

static const char * IncorrectReasonName (IncorrectResultReason irr)
 
static void LastChanceBlame (bool debug, WERD_RES *word)
 

Detailed Description

Definition at line 100 of file blamer.h.

Constructor & Destructor Documentation

◆ BlamerBundle() [1/2]

BlamerBundle::BlamerBundle ( )
inline

Definition at line 102 of file blamer.h.

102  : truth_has_char_boxes_(false),
103  incorrect_result_reason_(IRR_CORRECT),
104  lattice_data_(nullptr) { ClearResults(); }
void ClearResults()
Definition: blamer.h:185

◆ BlamerBundle() [2/2]

BlamerBundle::BlamerBundle ( const BlamerBundle other)
inline

Definition at line 105 of file blamer.h.

105  {
106  this->CopyTruth(other);
107  this->CopyResults(other);
108  }
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:206
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:199

◆ ~BlamerBundle()

BlamerBundle::~BlamerBundle ( )
inline

Definition at line 109 of file blamer.h.

109 { delete[] lattice_data_; }

Member Function Documentation

◆ AddHypothesis()

void BlamerBundle::AddHypothesis ( const tesseract::ParamsTrainingHypothesis hypo)
inline

Definition at line 166 of file blamer.h.

166  {
167  params_training_bundle_.AddHypothesis(hypo);
168  }
ParamsTrainingHypothesis & AddHypothesis(const ParamsTrainingHypothesis &other)

◆ BlameClassifier()

void BlamerBundle::BlameClassifier ( const UNICHARSET unicharset,
const TBOX blob_box,
const BLOB_CHOICE_LIST &  choices,
bool  debug 
)

Definition at line 262 of file blamer.cpp.

265  {
266  if (!truth_has_char_boxes_ ||
267  incorrect_result_reason_ != IRR_CORRECT)
268  return; // Nothing to do here.
269 
270  for (int b = 0; b < norm_truth_word_.length(); ++b) {
271  const TBOX &truth_box = norm_truth_word_.BlobBox(b);
272  // Note that we are more strict on the bounding box boundaries here
273  // than in other places (chopper, segmentation search), since we do
274  // not have the ability to check the previous and next bounding box.
275  if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) {
276  bool found = false;
277  bool incorrect_adapted = false;
278  UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
279  const char *truth_str = truth_text_[b].string();
280  // We promise not to modify the list or its contents, using a
281  // const BLOB_CHOICE* below.
282  BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
283  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
284  choices_it.forward()) {
285  const BLOB_CHOICE* choice = choices_it.data();
286  if (strcmp(truth_str, unicharset.get_normed_unichar(
287  choice->unichar_id())) == 0) {
288  found = true;
289  break;
290  } else if (choice->IsAdapted()) {
291  incorrect_adapted = true;
292  incorrect_adapted_id = choice->unichar_id();
293  }
294  } // end choices_it for loop
295  if (!found) {
296  STRING debug_str = "unichar ";
297  debug_str += truth_str;
298  debug_str += " not found in classification list";
299  SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
300  } else if (incorrect_adapted) {
301  STRING debug_str = "better rating for adapted ";
302  debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
303  debug_str += " than for correct ";
304  debug_str += truth_str;
305  SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
306  }
307  break;
308  }
309  } // end iterating over blamer_bundle->norm_truth_word
310 }
const STRING & debug() const
Definition: blamer.h:128
int UNICHAR_ID
Definition: unichar.h:35
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:823
Definition: rect.h:34
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
bool IsAdapted() const
Definition: ratngs.h:136
bool x_almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:253
Definition: strngs.h:45
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
int length() const
Definition: boxword.h:83
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77

◆ BlameClassifierOrLangModel()

void BlamerBundle::BlameClassifierOrLangModel ( const WERD_RES word,
const UNICHARSET unicharset,
bool  valid_permuter,
bool  debug 
)

Definition at line 374 of file blamer.cpp.

376  {
377  if (valid_permuter) {
378  // Find out whether best choice is a top choice.
379  best_choice_is_dict_and_top_choice_ = true;
380  for (int i = 0; i < word->best_choice->length(); ++i) {
381  BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
382  ASSERT_HOST(!blob_choice_it.empty());
383  BLOB_CHOICE *first_choice = nullptr;
384  for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
385  blob_choice_it.forward()) { // find first non-fragment choice
386  if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
387  first_choice = blob_choice_it.data();
388  break;
389  }
390  }
391  ASSERT_HOST(first_choice != nullptr);
392  if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
393  best_choice_is_dict_and_top_choice_ = false;
394  break;
395  }
396  }
397  }
398  STRING debug_str;
399  if (best_choice_is_dict_and_top_choice_) {
400  debug_str = "Best choice is: incorrect, top choice, dictionary word";
401  debug_str += " with permuter ";
402  debug_str += word->best_choice->permuter_name();
403  } else {
404  debug_str = "Classifier/Old LM tradeoff is to blame";
405  }
406  SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
408  debug_str, word->best_choice, debug);
409 }
const STRING & debug() const
Definition: blamer.h:128
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:765
static const char * permuter_name(uint8_t permuter)
Definition: ratngs.cpp:194
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:729
Definition: strngs.h:45
WERD_CHOICE * best_choice
Definition: pageres.h:235
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ChoiceIsCorrect()

bool BlamerBundle::ChoiceIsCorrect ( const WERD_CHOICE word_choice) const

Definition at line 116 of file blamer.cpp.

116  {
117  if (word_choice == nullptr) return false;
118  const UNICHARSET* uni_set = word_choice->unicharset();
119  STRING normed_choice_str;
120  for (int i = 0; i < word_choice->length(); ++i) {
121  normed_choice_str +=
122  uni_set->get_normed_unichar(word_choice->unichar_id(i));
123  }
124  STRING truth_str = TruthString();
125  return truth_str == normed_choice_str;
126 }
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:823
const UNICHARSET * unicharset() const
Definition: ratngs.h:300
STRING TruthString() const
Definition: blamer.h:112
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
Definition: strngs.h:45

◆ ClearResults()

void BlamerBundle::ClearResults ( )
inline

Definition at line 185 of file blamer.h.

185  {
186  norm_truth_word_.DeleteAllBoxes();
187  norm_box_tolerance_ = 0;
188  if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT;
189  debug_ = "";
190  segsearch_is_looking_for_blame_ = false;
191  best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
192  correct_segmentation_cols_.clear();
193  correct_segmentation_rows_.clear();
194  best_choice_is_dict_and_top_choice_ = false;
195  delete[] lattice_data_;
196  lattice_data_ = nullptr;
197  lattice_size_ = 0;
198  }
static const float kBadRating
Definition: ratngs.h:275
bool NoTruth() const
Definition: blamer.h:121
void DeleteAllBoxes()
Definition: boxword.cpp:174

◆ CopyResults()

void BlamerBundle::CopyResults ( const BlamerBundle other)
inline

Definition at line 206 of file blamer.h.

206  {
207  norm_truth_word_ = other.norm_truth_word_;
208  norm_box_tolerance_ = other.norm_box_tolerance_;
209  incorrect_result_reason_ = other.incorrect_result_reason_;
210  segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
211  best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
212  correct_segmentation_cols_ = other.correct_segmentation_cols_;
213  correct_segmentation_rows_ = other.correct_segmentation_rows_;
214  best_choice_is_dict_and_top_choice_ =
215  other.best_choice_is_dict_and_top_choice_;
216  if (other.lattice_data_ != nullptr) {
217  lattice_data_ = new char[other.lattice_size_];
218  memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
219  lattice_size_ = other.lattice_size_;
220  } else {
221  lattice_data_ = nullptr;
222  }
223  }

◆ CopyTruth()

void BlamerBundle::CopyTruth ( const BlamerBundle other)
inline

Definition at line 199 of file blamer.h.

199  {
200  truth_has_char_boxes_ = other.truth_has_char_boxes_;
201  truth_word_ = other.truth_word_;
202  truth_text_ = other.truth_text_;
203  incorrect_result_reason_ =
204  (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
205  }
bool NoTruth() const
Definition: blamer.h:121

◆ correct_segmentation_length()

int BlamerBundle::correct_segmentation_length ( ) const
inline

Definition at line 138 of file blamer.h.

138  {
139  return correct_segmentation_cols_.length();
140  }
int length() const
Definition: genericvector.h:85

◆ debug()

const STRING& BlamerBundle::debug ( ) const
inline

Definition at line 128 of file blamer.h.

128  {
129  return debug_;
130  }

◆ FillDebugString()

void BlamerBundle::FillDebugString ( const STRING msg,
const WERD_CHOICE choice,
STRING debug 
)

Definition at line 128 of file blamer.cpp.

130  {
131  (*debug) += "Truth ";
132  for (int i = 0; i < this->truth_text_.length(); ++i) {
133  (*debug) += this->truth_text_[i];
134  }
135  if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
136  if (choice != nullptr) {
137  (*debug) += " Choice ";
138  STRING choice_str;
139  choice->string_and_lengths(&choice_str, nullptr);
140  (*debug) += choice_str;
141  }
142  if (msg.length() > 0) {
143  (*debug) += "\n";
144  (*debug) += msg;
145  }
146  (*debug) += "\n";
147 }
int length() const
Definition: genericvector.h:85
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:449
Definition: strngs.h:45
int32_t length() const
Definition: strngs.cpp:191

◆ FinishSegSearch()

void BlamerBundle::FinishSegSearch ( const WERD_CHOICE best_choice,
bool  debug,
STRING debug_str 
)

Definition at line 511 of file blamer.cpp.

512  {
513  // If we are still looking for blame (i.e. best_choice is incorrect, but a
514  // path representing the correct segmentation could be constructed), we can
515  // blame segmentation search pain point prioritization if the rating of the
516  // path corresponding to the correct segmentation is better than that of
517  // best_choice (i.e. language model would have done the correct thing, but
518  // because of poor pain point prioritization the correct segmentation was
519  // never explored). Otherwise we blame the tradeoff between the language model
520  // and the classifier, since even after exploring the path corresponding to
521  // the correct segmentation incorrect best_choice would have been chosen.
522  // One special case when we blame the classifier instead is when best choice
523  // is incorrect, but it is a dictionary word and it classifier's top choice.
524  if (segsearch_is_looking_for_blame_) {
525  segsearch_is_looking_for_blame_ = false;
526  if (best_choice_is_dict_and_top_choice_) {
527  *debug_str = "Best choice is: incorrect, top choice, dictionary word";
528  *debug_str += " with permuter ";
529  *debug_str += best_choice->permuter_name();
530  SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
531  } else if (best_correctly_segmented_rating_ <
532  best_choice->rating()) {
533  *debug_str += "Correct segmentation state was not explored";
534  SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
535  } else {
536  if (best_correctly_segmented_rating_ >=
538  *debug_str += "Correct segmentation paths were pruned by LM\n";
539  } else {
540  debug_str->add_str_double("Best correct segmentation rating ",
541  best_correctly_segmented_rating_);
542  debug_str->add_str_double(" vs. best choice rating ",
543  best_choice->rating());
544  }
545  SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
546  }
547  }
548 }
const STRING & debug() const
Definition: blamer.h:128
static const float kBadRating
Definition: ratngs.h:275
static const char * permuter_name(uint8_t permuter)
Definition: ratngs.cpp:194
float rating() const
Definition: ratngs.h:327
void add_str_double(const char *str, double number)
Definition: strngs.cpp:389

◆ GuidedSegsearchNeeded()

bool BlamerBundle::GuidedSegsearchNeeded ( const WERD_CHOICE best_choice) const

Definition at line 466 of file blamer.cpp.

466  {
467  return incorrect_result_reason_ == IRR_CORRECT &&
468  !segsearch_is_looking_for_blame_ &&
469  truth_has_char_boxes_ &&
470  !ChoiceIsCorrect(best_choice);
471 }
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116

◆ GuidedSegsearchStillGoing()

bool BlamerBundle::GuidedSegsearchStillGoing ( ) const

Definition at line 506 of file blamer.cpp.

506  {
507  return segsearch_is_looking_for_blame_;
508 }

◆ HasDebugInfo()

bool BlamerBundle::HasDebugInfo ( ) const
inline

Definition at line 125 of file blamer.h.

125  {
126  return debug_.length() > 0 || misadaption_debug_.length() > 0;
127  }
int32_t length() const
Definition: strngs.cpp:191

◆ incorrect_result_reason()

IncorrectResultReason BlamerBundle::incorrect_result_reason ( ) const
inline

Definition at line 118 of file blamer.h.

118  {
119  return incorrect_result_reason_;
120  }

◆ IncorrectReason()

const char * BlamerBundle::IncorrectReason ( ) const

Definition at line 65 of file blamer.cpp.

65  {
66  return kIncorrectResultReasonNames[incorrect_result_reason_];
67 }
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:46

◆ IncorrectReasonName()

const char * BlamerBundle::IncorrectReasonName ( IncorrectResultReason  irr)
static

Definition at line 61 of file blamer.cpp.

61  {
62  return kIncorrectResultReasonNames[irr];
63 }
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:46

◆ InitForSegSearch()

void BlamerBundle::InitForSegSearch ( const WERD_CHOICE best_choice,
MATRIX ratings,
UNICHAR_ID  wildcard_id,
bool  debug,
STRING debug_str,
TessResultCallback2< bool, int, int > *  pp_cb 
)

Definition at line 478 of file blamer.cpp.

481  {
482  segsearch_is_looking_for_blame_ = true;
483  if (debug) {
484  tprintf("segsearch starting to look for blame\n");
485  }
486  // Fill pain points for any unclassifed blob corresponding to the
487  // correct segmentation state.
488  *debug_str += "Correct segmentation:\n";
489  for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) {
490  debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
491  debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
492  *debug_str += "\n";
493  if (!ratings->Classified(correct_segmentation_cols_[idx],
494  correct_segmentation_rows_[idx],
495  wildcard_id) &&
496  !cb->Run(correct_segmentation_cols_[idx],
497  correct_segmentation_rows_[idx])) {
498  segsearch_is_looking_for_blame_ = false;
499  *debug_str += "\nFailed to insert pain point\n";
500  SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
501  break;
502  }
503  } // end for blamer_bundle->correct_segmentation_cols/rows
504 }
const STRING & debug() const
Definition: blamer.h:128
int length() const
Definition: genericvector.h:85
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void add_str_int(const char *str, int number)
Definition: strngs.cpp:379
bool Classified(int col, int row, int wildcard_id) const
Definition: matrix.cpp:41

◆ JoinBlames()

void BlamerBundle::JoinBlames ( const BlamerBundle bundle1,
const BlamerBundle bundle2,
bool  debug 
)

Definition at line 230 of file blamer.cpp.

231  {
232  STRING debug_str;
233  IncorrectResultReason irr = incorrect_result_reason_;
234  if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
235  if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
236  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
237  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
238  debug_str += "Blame from part 1: ";
239  debug_str += bundle1.debug_;
240  irr = bundle1.incorrect_result_reason_;
241  }
242  if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
243  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
244  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
245  debug_str += "Blame from part 2: ";
246  debug_str += bundle2.debug_;
247  if (irr == IRR_CORRECT) {
248  irr = bundle2.incorrect_result_reason_;
249  } else if (irr != bundle2.incorrect_result_reason_) {
250  irr = IRR_UNKNOWN;
251  }
252  }
253  incorrect_result_reason_ = irr;
254  if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
255  SetBlame(irr, debug_str, nullptr, debug);
256  }
257 }
const STRING & debug() const
Definition: blamer.h:128
IncorrectResultReason
Definition: blamer.h:49
Definition: strngs.h:45

◆ LastChanceBlame()

void BlamerBundle::LastChanceBlame ( bool  debug,
WERD_RES word 
)
static

Definition at line 552 of file blamer.cpp.

552  {
553  if (word->blamer_bundle == nullptr) {
554  word->blamer_bundle = new BlamerBundle();
555  word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
556  word->best_choice, debug);
557  } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
558  word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
559  word->best_choice, debug);
560  } else {
561  bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
562  IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
563  if (irr == IRR_CORRECT && !correct) {
564  STRING debug_str = "Choice is incorrect after recognition";
565  word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
566  debug);
567  } else if (irr != IRR_CORRECT && correct) {
568  if (debug) {
569  tprintf("Corrected %s\n", word->blamer_bundle->debug_.string());
570  }
571  word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
572  word->blamer_bundle->debug_ = "";
573  }
574  }
575 }
const STRING & debug() const
Definition: blamer.h:128
IncorrectResultReason
Definition: blamer.h:49
const char * string() const
Definition: strngs.cpp:196
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: strngs.h:45
BlamerBundle * blamer_bundle
Definition: pageres.h:246
WERD_CHOICE * best_choice
Definition: pageres.h:235
BlamerBundle()
Definition: blamer.h:102

◆ lattice_data()

const char* BlamerBundle::lattice_data ( ) const
inline

Definition at line 150 of file blamer.h.

150  {
151  return lattice_data_;
152  }

◆ lattice_size()

int BlamerBundle::lattice_size ( ) const
inline

Definition at line 153 of file blamer.h.

153  {
154  return lattice_size_; // size of lattice_data in bytes
155  }

◆ MatrixPositionCorrect()

bool BlamerBundle::MatrixPositionCorrect ( int  index,
const MATRIX_COORD coord 
)
inline

Definition at line 143 of file blamer.h.

143  {
144  return correct_segmentation_cols_[index] == coord.col &&
145  correct_segmentation_rows_[index] == coord.row;
146  }

◆ misadaption_debug()

const STRING& BlamerBundle::misadaption_debug ( ) const
inline

Definition at line 131 of file blamer.h.

131  {
132  return misadaption_debug_;
133  }

◆ NoTruth()

bool BlamerBundle::NoTruth ( ) const
inline

Definition at line 121 of file blamer.h.

121  {
122  return incorrect_result_reason_ == IRR_NO_TRUTH ||
123  incorrect_result_reason_ == IRR_PAGE_LAYOUT;
124  }

◆ params_training_bundle()

const tesseract::ParamsTrainingBundle& BlamerBundle::params_training_bundle ( ) const
inline

Definition at line 162 of file blamer.h.

162  {
163  return params_training_bundle_;
164  }

◆ set_best_choice_is_dict_and_top_choice()

void BlamerBundle::set_best_choice_is_dict_and_top_choice ( bool  value)
inline

Definition at line 147 of file blamer.h.

147  {
148  best_choice_is_dict_and_top_choice_ = value;
149  }

◆ set_lattice_data()

void BlamerBundle::set_lattice_data ( const char *  data,
int  size 
)
inline

Definition at line 156 of file blamer.h.

156  {
157  lattice_size_ = size;
158  delete [] lattice_data_;
159  lattice_data_ = new char[lattice_size_];
160  memcpy(lattice_data_, data, lattice_size_);
161  }

◆ SetChopperBlame()

void BlamerBundle::SetChopperBlame ( const WERD_RES word,
bool  debug 
)

Definition at line 315 of file blamer.cpp.

315  {
316  if (NoTruth() || !truth_has_char_boxes_ ||
317  word->chopped_word->blobs.empty()) {
318  return;
319  }
320  STRING debug_str;
321  bool missing_chop = false;
322  int num_blobs = word->chopped_word->blobs.size();
323  int box_index = 0;
324  int blob_index = 0;
325  int16_t truth_x = -1;
326  while (box_index < truth_word_.length() && blob_index < num_blobs) {
327  truth_x = norm_truth_word_.BlobBox(box_index).right();
328  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
329  if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
330  ++blob_index;
331  continue; // encountered an extra chop, keep looking
332  } else if (curr_blob->bounding_box().right() >
333  truth_x + norm_box_tolerance_) {
334  missing_chop = true;
335  break;
336  } else {
337  ++blob_index;
338  }
339  }
340  if (missing_chop || box_index < norm_truth_word_.length()) {
341  STRING debug_str;
342  if (missing_chop) {
343  debug_str.add_str_int("Detected missing chop (tolerance=",
344  norm_box_tolerance_);
345  debug_str += ") at Bounding Box=";
346  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
347  curr_blob->bounding_box().print_to_str(&debug_str);
348  debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
349  } else {
350  debug_str.add_str_int("Missing chops for last ",
351  norm_truth_word_.length() - box_index);
352  debug_str += " truth box(es)";
353  }
354  debug_str += "\nMaximally chopped word boxes:\n";
355  for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
356  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
357  curr_blob->bounding_box().print_to_str(&debug_str);
358  debug_str += '\n';
359  }
360  debug_str += "Truth bounding boxes:\n";
361  for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
362  norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
363  debug_str += '\n';
364  }
365  SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
366  }
367 }
const STRING & debug() const
Definition: blamer.h:128
int size() const
Definition: genericvector.h:71
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
bool NoTruth() const
Definition: blamer.h:121
bool empty() const
Definition: genericvector.h:90
TBOX bounding_box() const
Definition: blobs.cpp:478
GenericVector< TBLOB * > blobs
Definition: blobs.h:443
void add_str_int(const char *str, int number)
Definition: strngs.cpp:379
Definition: strngs.h:45
int length() const
Definition: boxword.h:83
int16_t right() const
Definition: rect.h:79
Definition: blobs.h:268
TWERD * chopped_word
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:235
void print_to_str(STRING *str) const
Definition: rect.cpp:175

◆ SetMisAdaptionDebug()

void BlamerBundle::SetMisAdaptionDebug ( const WERD_CHOICE best_choice,
bool  debug 
)

Definition at line 579 of file blamer.cpp.

580  {
581  if (incorrect_result_reason_ != IRR_NO_TRUTH &&
582  !ChoiceIsCorrect(best_choice)) {
583  misadaption_debug_ ="misadapt to word (";
584  misadaption_debug_ += best_choice->permuter_name();
585  misadaption_debug_ += "): ";
586  FillDebugString("", best_choice, &misadaption_debug_);
587  if (debug) {
588  tprintf("%s\n", misadaption_debug_.string());
589  }
590  }
591 }
const STRING & debug() const
Definition: blamer.h:128
const char * string() const
Definition: strngs.cpp:196
static const char * permuter_name(uint8_t permuter)
Definition: ratngs.cpp:194
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
Definition: blamer.cpp:128

◆ SetRejectedTruth()

void BlamerBundle::SetRejectedTruth ( )

Definition at line 110 of file blamer.cpp.

110  {
111  incorrect_result_reason_ = IRR_NO_TRUTH;
112  truth_has_char_boxes_ = false;
113 }

◆ SetSymbolTruth()

void BlamerBundle::SetSymbolTruth ( const UNICHARSET unicharset,
const char *  char_str,
const TBOX char_box 
)

Definition at line 91 of file blamer.cpp.

92  {
93  STRING symbol_str(char_str);
94  UNICHAR_ID id = unicharset.unichar_to_id(char_str);
95  if (id != INVALID_UNICHAR_ID) {
96  STRING normed_uch(unicharset.get_normed_unichar(id));
97  if (normed_uch.length() > 0) symbol_str = normed_uch;
98  }
99  int length = truth_word_.length();
100  truth_text_.push_back(symbol_str);
101  truth_word_.InsertBox(length, char_box);
102  if (length == 0)
103  truth_has_char_boxes_ = true;
104  else if (truth_word_.BlobBox(length - 1) == char_box)
105  truth_has_char_boxes_ = false;
106 }
int UNICHAR_ID
Definition: unichar.h:35
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:823
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
int push_back(T object)
Definition: strngs.h:45
int length() const
Definition: boxword.h:83
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148

◆ SetupCorrectSegmentation()

void BlamerBundle::SetupCorrectSegmentation ( const TWERD word,
bool  debug 
)

Definition at line 412 of file blamer.cpp.

412  {
413  params_training_bundle_.StartHypothesisList();
414  if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
415  return; // Nothing to do here.
416 
417  STRING debug_str;
418  debug_str += "Blamer computing correct_segmentation_cols\n";
419  int curr_box_col = 0;
420  int next_box_col = 0;
421  int num_blobs = word->NumBlobs();
422  if (num_blobs == 0) return; // No blobs to play with.
423  int blob_index = 0;
424  int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
425  for (int truth_idx = 0; blob_index < num_blobs &&
426  truth_idx < norm_truth_word_.length();
427  ++blob_index) {
428  ++next_box_col;
429  int16_t curr_box_x = next_box_x;
430  if (blob_index + 1 < num_blobs)
431  next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
432  int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
433  debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
434  debug_str.add_str_int(" ", truth_x);
435  debug_str += "\n";
436  if (curr_box_x > (truth_x + norm_box_tolerance_)) {
437  break; // failed to find a matching box
438  } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
439  (blob_index + 1 >= num_blobs || // next box can't be included
440  next_box_x > truth_x + norm_box_tolerance_)) {
441  correct_segmentation_cols_.push_back(curr_box_col);
442  correct_segmentation_rows_.push_back(next_box_col-1);
443  ++truth_idx;
444  debug_str.add_str_int("col=", curr_box_col);
445  debug_str.add_str_int(" row=", next_box_col-1);
446  debug_str += "\n";
447  curr_box_col = next_box_col;
448  }
449  }
450  if (blob_index < num_blobs || // trailing blobs
451  correct_segmentation_cols_.length() != norm_truth_word_.length()) {
452  debug_str.add_str_int("Blamer failed to find correct segmentation"
453  " (tolerance=", norm_box_tolerance_);
454  if (blob_index >= num_blobs) debug_str += " blob == nullptr";
455  debug_str += ")\n";
456  debug_str.add_str_int(" path length ", correct_segmentation_cols_.length());
457  debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
458  debug_str += "\n";
459  SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
460  correct_segmentation_cols_.clear();
461  correct_segmentation_rows_.clear();
462  }
463 }
const STRING & debug() const
Definition: blamer.h:128
int NumBlobs() const
Definition: blobs.h:432
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
int length() const
Definition: genericvector.h:85
int push_back(T object)
GenericVector< TBLOB * > blobs
Definition: blobs.h:443
void add_str_int(const char *str, int number)
Definition: strngs.cpp:379
Definition: strngs.h:45
int length() const
Definition: boxword.h:83
int16_t right() const
Definition: rect.h:79

◆ SetupNormTruthWord()

void BlamerBundle::SetupNormTruthWord ( const DENORM denorm)

Definition at line 150 of file blamer.cpp.

150  {
151  // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
152  norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
153  TPOINT topleft;
154  TPOINT botright;
155  TPOINT norm_topleft;
156  TPOINT norm_botright;
157  for (int b = 0; b < truth_word_.length(); ++b) {
158  const TBOX &box = truth_word_.BlobBox(b);
159  topleft.x = box.left();
160  topleft.y = box.top();
161  botright.x = box.right();
162  botright.y = box.bottom();
163  denorm.NormTransform(nullptr, topleft, &norm_topleft);
164  denorm.NormTransform(nullptr, botright, &norm_botright);
165  TBOX norm_box(norm_topleft.x, norm_botright.y,
166  norm_botright.x, norm_topleft.y);
167  norm_truth_word_.InsertBox(b, norm_box);
168  }
169 }
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:335
Definition: rect.h:34
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
float x_scale() const
Definition: normalis.h:267
int16_t x
Definition: blobs.h:78
int length() const
Definition: boxword.h:83
int16_t right() const
Definition: rect.h:79
Definition: blobs.h:57
int16_t y
Definition: blobs.h:79
int16_t bottom() const
Definition: rect.h:65
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148

◆ SetWordTruth()

void BlamerBundle::SetWordTruth ( const UNICHARSET unicharset,
const char *  truth_str,
const TBOX word_box 
)

Definition at line 71 of file blamer.cpp.

72  {
73  truth_word_.InsertBox(0, word_box);
74  truth_has_char_boxes_ = false;
75  // Encode the string as UNICHAR_IDs.
77  GenericVector<char> lengths;
78  unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
79  int total_length = 0;
80  for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
81  STRING uch(truth_str + total_length);
82  uch.truncate_at(lengths[i] - total_length);
83  UNICHAR_ID id = encoding[i];
84  if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
85  truth_text_.push_back(uch);
86  }
87 }
int UNICHAR_ID
Definition: unichar.h:35
int size() const
Definition: genericvector.h:71
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
Definition: unicharset.cpp:258
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:823
int push_back(T object)
Definition: strngs.h:45
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148

◆ SplitBundle()

void BlamerBundle::SplitBundle ( int  word1_right,
int  word2_left,
bool  debug,
BlamerBundle bundle1,
BlamerBundle bundle2 
) const

Definition at line 174 of file blamer.cpp.

176  {
177  STRING debug_str;
178  // Find truth boxes that correspond to the split in the blobs.
179  int b;
180  int begin2_truth_index = -1;
181  if (incorrect_result_reason_ != IRR_NO_TRUTH &&
182  truth_has_char_boxes_) {
183  debug_str = "Looking for truth split at";
184  debug_str.add_str_int(" end1_x ", word1_right);
185  debug_str.add_str_int(" begin2_x ", word2_left);
186  debug_str += "\nnorm_truth_word boxes:\n";
187  if (norm_truth_word_.length() > 1) {
188  norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
189  for (b = 1; b < norm_truth_word_.length(); ++b) {
190  norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
191  if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
192  norm_box_tolerance_) &&
193  (abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
194  norm_box_tolerance_)) {
195  begin2_truth_index = b;
196  debug_str += "Split found";
197  break;
198  }
199  }
200  debug_str += '\n';
201  }
202  }
203  // Populate truth information in word and word2 with the first and second
204  // part of the original truth.
205  if (begin2_truth_index > 0) {
206  bundle1->truth_has_char_boxes_ = true;
207  bundle1->norm_box_tolerance_ = norm_box_tolerance_;
208  bundle2->truth_has_char_boxes_ = true;
209  bundle2->norm_box_tolerance_ = norm_box_tolerance_;
210  BlamerBundle *curr_bb = bundle1;
211  for (b = 0; b < norm_truth_word_.length(); ++b) {
212  if (b == begin2_truth_index) curr_bb = bundle2;
213  curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
214  curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
215  curr_bb->truth_text_.push_back(truth_text_[b]);
216  }
217  } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
218  bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
219  bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
220  } else {
221  debug_str += "Truth split not found";
222  debug_str += truth_has_char_boxes_ ?
223  "\n" : " (no truth char boxes)\n";
224  bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
225  bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
226  }
227 }
const STRING & debug() const
Definition: blamer.h:128
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
int16_t left() const
Definition: rect.h:72
int push_back(T object)
void add_str_int(const char *str, int number)
Definition: strngs.cpp:379
Definition: strngs.h:45
int length() const
Definition: boxword.h:83
int16_t right() const
Definition: rect.h:79
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
void print_to_str(STRING *str) const
Definition: rect.cpp:175

◆ TruthString()

STRING BlamerBundle::TruthString ( ) const
inline

Definition at line 112 of file blamer.h.

112  {
113  STRING truth_str;
114  for (int i = 0; i < truth_text_.length(); ++i)
115  truth_str += truth_text_[i];
116  return truth_str;
117  }
int length() const
Definition: genericvector.h:85
Definition: strngs.h:45

◆ UpdateBestRating()

void BlamerBundle::UpdateBestRating ( float  rating)
inline

Definition at line 134 of file blamer.h.

134  {
135  if (rating < best_correctly_segmented_rating_)
136  best_correctly_segmented_rating_ = rating;
137  }

The documentation for this struct was generated from the following files: