All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
BlamerBundle Struct Reference

#include <blamer.h>

Public Member Functions

 BlamerBundle ()
 
 BlamerBundle (const BlamerBundle &other)
 
 ~BlamerBundle ()
 
STRING TruthString () const
 
IncorrectResultReason incorrect_result_reason () const
 
bool NoTruth () const
 
bool HasDebugInfo () const
 
const STRINGdebug () const
 
const STRINGmisadaption_debug () const
 
void UpdateBestRating (float rating)
 
int correct_segmentation_length () const
 
bool MatrixPositionCorrect (int index, const MATRIX_COORD &coord)
 
void set_best_choice_is_dict_and_top_choice (bool value)
 
const char * lattice_data () const
 
int lattice_size () const
 
void set_lattice_data (const char *data, int size)
 
const
tesseract::ParamsTrainingBundle
params_training_bundle () const
 
void AddHypothesis (const tesseract::ParamsTrainingHypothesis &hypo)
 
void SetWordTruth (const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box)
 
void SetSymbolTruth (const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box)
 
void SetRejectedTruth ()
 
bool ChoiceIsCorrect (const WERD_CHOICE *word_choice) const
 
void ClearResults ()
 
void CopyTruth (const BlamerBundle &other)
 
void CopyResults (const BlamerBundle &other)
 
const char * IncorrectReason () const
 
void FillDebugString (const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
 
void SetupNormTruthWord (const DENORM &denorm)
 
void SplitBundle (int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const
 
void JoinBlames (const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug)
 
void BlameClassifier (const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug)
 
void SetChopperBlame (const WERD_RES *word, bool debug)
 
void BlameClassifierOrLangModel (const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
 
void SetupCorrectSegmentation (const TWERD *word, bool debug)
 
bool GuidedSegsearchNeeded (const WERD_CHOICE *best_choice) const
 
void InitForSegSearch (const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, TessResultCallback2< bool, int, int > *pp_cb)
 
bool GuidedSegsearchStillGoing () const
 
void FinishSegSearch (const WERD_CHOICE *best_choice, bool debug, STRING *debug_str)
 
void SetMisAdaptionDebug (const WERD_CHOICE *best_choice, bool debug)
 

Static Public Member Functions

static const char * IncorrectReasonName (IncorrectResultReason irr)
 
static void LastChanceBlame (bool debug, WERD_RES *word)
 

Detailed Description

Definition at line 88 of file blamer.h.

Constructor & Destructor Documentation

BlamerBundle::BlamerBundle ( )
inline

Definition at line 90 of file blamer.h.

90  : truth_has_char_boxes_(false),
91  incorrect_result_reason_(IRR_CORRECT),
92  lattice_data_(NULL) { ClearResults(); }
#define NULL
Definition: host.h:144
void ClearResults()
Definition: blamer.h:173
BlamerBundle::BlamerBundle ( const BlamerBundle other)
inline

Definition at line 93 of file blamer.h.

93  {
94  this->CopyTruth(other);
95  this->CopyResults(other);
96  }
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:187
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:194
BlamerBundle::~BlamerBundle ( )
inline

Definition at line 97 of file blamer.h.

97 { delete[] lattice_data_; }

Member Function Documentation

void BlamerBundle::AddHypothesis ( const tesseract::ParamsTrainingHypothesis hypo)
inline

Definition at line 154 of file blamer.h.

154  {
155  params_training_bundle_.AddHypothesis(hypo);
156  }
ParamsTrainingHypothesis & AddHypothesis(const ParamsTrainingHypothesis &other)
void BlamerBundle::BlameClassifier ( const UNICHARSET unicharset,
const TBOX blob_box,
const BLOB_CHOICE_LIST &  choices,
bool  debug 
)

Definition at line 257 of file blamer.cpp.

260  {
261  if (!truth_has_char_boxes_ ||
262  incorrect_result_reason_ != IRR_CORRECT)
263  return; // Nothing to do here.
264 
265  for (int b = 0; b < norm_truth_word_.length(); ++b) {
266  const TBOX &truth_box = norm_truth_word_.BlobBox(b);
267  // Note that we are more strict on the bounding box boundaries here
268  // than in other places (chopper, segmentation search), since we do
269  // not have the ability to check the previous and next bounding box.
270  if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) {
271  bool found = false;
272  bool incorrect_adapted = false;
273  UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
274  const char *truth_str = truth_text_[b].string();
275  // We promise not to modify the list or its contents, using a
276  // const BLOB_CHOICE* below.
277  BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
278  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
279  choices_it.forward()) {
280  const BLOB_CHOICE* choice = choices_it.data();
281  if (strcmp(truth_str, unicharset.get_normed_unichar(
282  choice->unichar_id())) == 0) {
283  found = true;
284  break;
285  } else if (choice->IsAdapted()) {
286  incorrect_adapted = true;
287  incorrect_adapted_id = choice->unichar_id();
288  }
289  } // end choices_it for loop
290  if (!found) {
291  STRING debug_str = "unichar ";
292  debug_str += truth_str;
293  debug_str += " not found in classification list";
294  SetBlame(IRR_CLASSIFIER, debug_str, NULL, debug);
295  } else if (incorrect_adapted) {
296  STRING debug_str = "better rating for adapted ";
297  debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
298  debug_str += " than for correct ";
299  debug_str += truth_str;
300  SetBlame(IRR_ADAPTION, debug_str, NULL, debug);
301  }
302  break;
303  }
304  } // end iterating over blamer_bundle->norm_truth_word
305 }
const TBOX & BlobBox(int index) const
Definition: boxword.h:88
bool x_almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:253
bool IsAdapted() const
Definition: ratngs.h:135
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
int UNICHAR_ID
Definition: unichar.h:33
const int length() const
Definition: boxword.h:85
Definition: rect.h:30
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:776
Definition: strngs.h:44
#define NULL
Definition: host.h:144
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
const STRING & debug() const
Definition: blamer.h:116
void BlamerBundle::BlameClassifierOrLangModel ( const WERD_RES word,
const UNICHARSET unicharset,
bool  valid_permuter,
bool  debug 
)

Definition at line 369 of file blamer.cpp.

371  {
372  if (valid_permuter) {
373  // Find out whether best choice is a top choice.
374  best_choice_is_dict_and_top_choice_ = true;
375  for (int i = 0; i < word->best_choice->length(); ++i) {
376  BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
377  ASSERT_HOST(!blob_choice_it.empty());
378  BLOB_CHOICE *first_choice = NULL;
379  for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
380  blob_choice_it.forward()) { // find first non-fragment choice
381  if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
382  first_choice = blob_choice_it.data();
383  break;
384  }
385  }
386  ASSERT_HOST(first_choice != NULL);
387  if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
388  best_choice_is_dict_and_top_choice_ = false;
389  break;
390  }
391  }
392  }
393  STRING debug_str;
394  if (best_choice_is_dict_and_top_choice_) {
395  debug_str = "Best choice is: incorrect, top choice, dictionary word";
396  debug_str += " with permuter ";
397  debug_str += word->best_choice->permuter_name();
398  } else {
399  debug_str = "Classifier/Old LM tradeoff is to blame";
400  }
401  SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
403  debug_str, word->best_choice, debug);
404 }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define ASSERT_HOST(x)
Definition: errcode.h:84
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
static const char * permuter_name(uinT8 permuter)
Definition: ratngs.cpp:174
Definition: strngs.h:44
#define NULL
Definition: host.h:144
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:751
const STRING & debug() const
Definition: blamer.h:116
bool BlamerBundle::ChoiceIsCorrect ( const WERD_CHOICE word_choice) const

Definition at line 111 of file blamer.cpp.

111  {
112  if (word_choice == NULL) return false;
113  const UNICHARSET* uni_set = word_choice->unicharset();
114  STRING normed_choice_str;
115  for (int i = 0; i < word_choice->length(); ++i) {
116  normed_choice_str +=
117  uni_set->get_normed_unichar(word_choice->unichar_id(i));
118  }
119  STRING truth_str = TruthString();
120  return truth_str == normed_choice_str;
121 }
int length() const
Definition: ratngs.h:300
const UNICHARSET * unicharset() const
Definition: ratngs.h:297
STRING TruthString() const
Definition: blamer.h:100
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:776
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void BlamerBundle::ClearResults ( )
inline

Definition at line 173 of file blamer.h.

173  {
174  norm_truth_word_.DeleteAllBoxes();
175  norm_box_tolerance_ = 0;
176  if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT;
177  debug_ = "";
178  segsearch_is_looking_for_blame_ = false;
179  best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
180  correct_segmentation_cols_.clear();
181  correct_segmentation_rows_.clear();
182  best_choice_is_dict_and_top_choice_ = false;
183  delete[] lattice_data_;
184  lattice_data_ = NULL;
185  lattice_size_ = 0;
186  }
static const float kBadRating
Definition: ratngs.h:273
bool NoTruth() const
Definition: blamer.h:109
void DeleteAllBoxes()
Definition: boxword.cpp:177
#define NULL
Definition: host.h:144
void BlamerBundle::CopyResults ( const BlamerBundle other)
inline

Definition at line 194 of file blamer.h.

194  {
195  norm_truth_word_ = other.norm_truth_word_;
196  norm_box_tolerance_ = other.norm_box_tolerance_;
197  incorrect_result_reason_ = other.incorrect_result_reason_;
198  segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
199  best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
200  correct_segmentation_cols_ = other.correct_segmentation_cols_;
201  correct_segmentation_rows_ = other.correct_segmentation_rows_;
202  best_choice_is_dict_and_top_choice_ =
203  other.best_choice_is_dict_and_top_choice_;
204  if (other.lattice_data_ != NULL) {
205  lattice_data_ = new char[other.lattice_size_];
206  memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
207  lattice_size_ = other.lattice_size_;
208  } else {
209  lattice_data_ = NULL;
210  }
211  }
#define NULL
Definition: host.h:144
void BlamerBundle::CopyTruth ( const BlamerBundle other)
inline

Definition at line 187 of file blamer.h.

187  {
188  truth_has_char_boxes_ = other.truth_has_char_boxes_;
189  truth_word_ = other.truth_word_;
190  truth_text_ = other.truth_text_;
191  incorrect_result_reason_ =
192  (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
193  }
bool NoTruth() const
Definition: blamer.h:109
int BlamerBundle::correct_segmentation_length ( ) const
inline

Definition at line 126 of file blamer.h.

126  {
127  return correct_segmentation_cols_.length();
128  }
int length() const
Definition: genericvector.h:79
const STRING& BlamerBundle::debug ( ) const
inline

Definition at line 116 of file blamer.h.

116  {
117  return debug_;
118  }
void BlamerBundle::FillDebugString ( const STRING msg,
const WERD_CHOICE choice,
STRING debug 
)

Definition at line 123 of file blamer.cpp.

125  {
126  (*debug) += "Truth ";
127  for (int i = 0; i < this->truth_text_.length(); ++i) {
128  (*debug) += this->truth_text_[i];
129  }
130  if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
131  if (choice != NULL) {
132  (*debug) += " Choice ";
133  STRING choice_str;
134  choice->string_and_lengths(&choice_str, NULL);
135  (*debug) += choice_str;
136  }
137  if (msg.length() > 0) {
138  (*debug) += "\n";
139  (*debug) += msg;
140  }
141  (*debug) += "\n";
142 }
int length() const
Definition: genericvector.h:79
inT32 length() const
Definition: strngs.cpp:188
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:427
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void BlamerBundle::FinishSegSearch ( const WERD_CHOICE best_choice,
bool  debug,
STRING debug_str 
)

Definition at line 506 of file blamer.cpp.

507  {
508  // If we are still looking for blame (i.e. best_choice is incorrect, but a
509  // path representing the correct segmentation could be constructed), we can
510  // blame segmentation search pain point prioritization if the rating of the
511  // path corresponding to the correct segmentation is better than that of
512  // best_choice (i.e. language model would have done the correct thing, but
513  // because of poor pain point prioritization the correct segmentation was
514  // never explored). Otherwise we blame the tradeoff between the language model
515  // and the classifier, since even after exploring the path corresponding to
516  // the correct segmentation incorrect best_choice would have been chosen.
517  // One special case when we blame the classifier instead is when best choice
518  // is incorrect, but it is a dictionary word and it classifier's top choice.
519  if (segsearch_is_looking_for_blame_) {
520  segsearch_is_looking_for_blame_ = false;
521  if (best_choice_is_dict_and_top_choice_) {
522  *debug_str = "Best choice is: incorrect, top choice, dictionary word";
523  *debug_str += " with permuter ";
524  *debug_str += best_choice->permuter_name();
525  SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
526  } else if (best_correctly_segmented_rating_ <
527  best_choice->rating()) {
528  *debug_str += "Correct segmentation state was not explored";
529  SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
530  } else {
531  if (best_correctly_segmented_rating_ >=
533  *debug_str += "Correct segmentation paths were pruned by LM\n";
534  } else {
535  debug_str->add_str_double("Best correct segmentation rating ",
536  best_correctly_segmented_rating_);
537  debug_str->add_str_double(" vs. best choice rating ",
538  best_choice->rating());
539  }
540  SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
541  }
542  }
543 }
static const float kBadRating
Definition: ratngs.h:273
float rating() const
Definition: ratngs.h:324
static const char * permuter_name(uinT8 permuter)
Definition: ratngs.cpp:174
void add_str_double(const char *str, double number)
Definition: strngs.cpp:386
const STRING & debug() const
Definition: blamer.h:116
bool BlamerBundle::GuidedSegsearchNeeded ( const WERD_CHOICE best_choice) const

Definition at line 461 of file blamer.cpp.

461  {
462  return incorrect_result_reason_ == IRR_CORRECT &&
463  !segsearch_is_looking_for_blame_ &&
464  truth_has_char_boxes_ &&
465  !ChoiceIsCorrect(best_choice);
466 }
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:111
bool BlamerBundle::GuidedSegsearchStillGoing ( ) const

Definition at line 501 of file blamer.cpp.

501  {
502  return segsearch_is_looking_for_blame_;
503 }
bool BlamerBundle::HasDebugInfo ( ) const
inline

Definition at line 113 of file blamer.h.

113  {
114  return debug_.length() > 0 || misadaption_debug_.length() > 0;
115  }
inT32 length() const
Definition: strngs.cpp:188
IncorrectResultReason BlamerBundle::incorrect_result_reason ( ) const
inline

Definition at line 106 of file blamer.h.

106  {
107  return incorrect_result_reason_;
108  }
const char * BlamerBundle::IncorrectReason ( ) const

Definition at line 60 of file blamer.cpp.

60  {
61  return kIncorrectResultReasonNames[incorrect_result_reason_];
62 }
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:41
const char * BlamerBundle::IncorrectReasonName ( IncorrectResultReason  irr)
static

Definition at line 56 of file blamer.cpp.

56  {
57  return kIncorrectResultReasonNames[irr];
58 }
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:41
void BlamerBundle::InitForSegSearch ( const WERD_CHOICE best_choice,
MATRIX ratings,
UNICHAR_ID  wildcard_id,
bool  debug,
STRING debug_str,
TessResultCallback2< bool, int, int > *  pp_cb 
)

Definition at line 473 of file blamer.cpp.

476  {
477  segsearch_is_looking_for_blame_ = true;
478  if (debug) {
479  tprintf("segsearch starting to look for blame\n");
480  }
481  // Fill pain points for any unclassifed blob corresponding to the
482  // correct segmentation state.
483  *debug_str += "Correct segmentation:\n";
484  for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) {
485  debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
486  debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
487  *debug_str += "\n";
488  if (!ratings->Classified(correct_segmentation_cols_[idx],
489  correct_segmentation_rows_[idx],
490  wildcard_id) &&
491  !cb->Run(correct_segmentation_cols_[idx],
492  correct_segmentation_rows_[idx])) {
493  segsearch_is_looking_for_blame_ = false;
494  *debug_str += "\nFailed to insert pain point\n";
495  SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
496  break;
497  }
498  } // end for blamer_bundle->correct_segmentation_cols/rows
499 }
int length() const
Definition: genericvector.h:79
#define tprintf(...)
Definition: tprintf.h:31
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
bool Classified(int col, int row, int wildcard_id) const
Definition: matrix.cpp:36
const STRING & debug() const
Definition: blamer.h:116
void BlamerBundle::JoinBlames ( const BlamerBundle bundle1,
const BlamerBundle bundle2,
bool  debug 
)

Definition at line 225 of file blamer.cpp.

226  {
227  STRING debug_str;
228  IncorrectResultReason irr = incorrect_result_reason_;
229  if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
230  if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
231  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
232  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
233  debug_str += "Blame from part 1: ";
234  debug_str += bundle1.debug_;
235  irr = bundle1.incorrect_result_reason_;
236  }
237  if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
238  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
239  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
240  debug_str += "Blame from part 2: ";
241  debug_str += bundle2.debug_;
242  if (irr == IRR_CORRECT) {
243  irr = bundle2.incorrect_result_reason_;
244  } else if (irr != bundle2.incorrect_result_reason_) {
245  irr = IRR_UNKNOWN;
246  }
247  }
248  incorrect_result_reason_ = irr;
249  if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
250  SetBlame(irr, debug_str, NULL, debug);
251  }
252 }
IncorrectResultReason
Definition: blamer.h:37
Definition: strngs.h:44
#define NULL
Definition: host.h:144
const STRING & debug() const
Definition: blamer.h:116
void BlamerBundle::LastChanceBlame ( bool  debug,
WERD_RES word 
)
static

Definition at line 547 of file blamer.cpp.

547  {
548  if (word->blamer_bundle == NULL) {
549  word->blamer_bundle = new BlamerBundle();
550  word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
551  word->best_choice, debug);
552  } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
553  word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
554  word->best_choice, debug);
555  } else {
556  bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
557  IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
558  if (irr == IRR_CORRECT && !correct) {
559  STRING debug_str = "Choice is incorrect after recognition";
560  word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
561  debug);
562  } else if (irr != IRR_CORRECT && correct) {
563  if (debug) {
564  tprintf("Corrected %s\n", word->blamer_bundle->debug_.string());
565  }
566  word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
567  word->blamer_bundle->debug_ = "";
568  }
569  }
570 }
IncorrectResultReason
Definition: blamer.h:37
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:111
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
BlamerBundle()
Definition: blamer.h:90
Definition: strngs.h:44
#define NULL
Definition: host.h:144
const char * string() const
Definition: strngs.cpp:193
BlamerBundle * blamer_bundle
Definition: pageres.h:230
const STRING & debug() const
Definition: blamer.h:116
const char* BlamerBundle::lattice_data ( ) const
inline

Definition at line 138 of file blamer.h.

138  {
139  return lattice_data_;
140  }
int BlamerBundle::lattice_size ( ) const
inline

Definition at line 141 of file blamer.h.

141  {
142  return lattice_size_; // size of lattice_data in bytes
143  }
bool BlamerBundle::MatrixPositionCorrect ( int  index,
const MATRIX_COORD coord 
)
inline

Definition at line 131 of file blamer.h.

131  {
132  return correct_segmentation_cols_[index] == coord.col &&
133  correct_segmentation_rows_[index] == coord.row;
134  }
const STRING& BlamerBundle::misadaption_debug ( ) const
inline

Definition at line 119 of file blamer.h.

119  {
120  return misadaption_debug_;
121  }
bool BlamerBundle::NoTruth ( ) const
inline

Definition at line 109 of file blamer.h.

109  {
110  return incorrect_result_reason_ == IRR_NO_TRUTH ||
111  incorrect_result_reason_ == IRR_PAGE_LAYOUT;
112  }
const tesseract::ParamsTrainingBundle& BlamerBundle::params_training_bundle ( ) const
inline

Definition at line 150 of file blamer.h.

150  {
151  return params_training_bundle_;
152  }
void BlamerBundle::set_best_choice_is_dict_and_top_choice ( bool  value)
inline

Definition at line 135 of file blamer.h.

135  {
136  best_choice_is_dict_and_top_choice_ = value;
137  }
void BlamerBundle::set_lattice_data ( const char *  data,
int  size 
)
inline

Definition at line 144 of file blamer.h.

144  {
145  lattice_size_ = size;
146  delete [] lattice_data_;
147  lattice_data_ = new char[lattice_size_];
148  memcpy(lattice_data_, data, lattice_size_);
149  }
void BlamerBundle::SetChopperBlame ( const WERD_RES word,
bool  debug 
)

Definition at line 310 of file blamer.cpp.

310  {
311  if (NoTruth() || !truth_has_char_boxes_ ||
312  word->chopped_word->blobs.empty()) {
313  return;
314  }
315  STRING debug_str;
316  bool missing_chop = false;
317  int num_blobs = word->chopped_word->blobs.size();
318  int box_index = 0;
319  int blob_index = 0;
320  inT16 truth_x;
321  while (box_index < truth_word_.length() && blob_index < num_blobs) {
322  truth_x = norm_truth_word_.BlobBox(box_index).right();
323  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
324  if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
325  ++blob_index;
326  continue; // encountered an extra chop, keep looking
327  } else if (curr_blob->bounding_box().right() >
328  truth_x + norm_box_tolerance_) {
329  missing_chop = true;
330  break;
331  } else {
332  ++blob_index;
333  }
334  }
335  if (missing_chop || box_index < norm_truth_word_.length()) {
336  STRING debug_str;
337  if (missing_chop) {
338  debug_str.add_str_int("Detected missing chop (tolerance=",
339  norm_box_tolerance_);
340  debug_str += ") at Bounding Box=";
341  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
342  curr_blob->bounding_box().print_to_str(&debug_str);
343  debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
344  } else {
345  debug_str.add_str_int("Missing chops for last ",
346  norm_truth_word_.length() - box_index);
347  debug_str += " truth box(es)";
348  }
349  debug_str += "\nMaximally chopped word boxes:\n";
350  for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
351  TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
352  curr_blob->bounding_box().print_to_str(&debug_str);
353  debug_str += '\n';
354  }
355  debug_str += "Truth bounding boxes:\n";
356  for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
357  norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
358  debug_str += '\n';
359  }
360  SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
361  }
362 }
Definition: blobs.h:261
int size() const
Definition: genericvector.h:72
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool NoTruth() const
Definition: blamer.h:109
TWERD * chopped_word
Definition: pageres.h:201
const TBOX & BlobBox(int index) const
Definition: boxword.h:88
inT16 right() const
Definition: rect.h:75
bool empty() const
Definition: genericvector.h:84
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
const int length() const
Definition: boxword.h:85
Definition: strngs.h:44
TBOX bounding_box() const
Definition: blobs.cpp:482
void print_to_str(STRING *str) const
Definition: rect.cpp:175
short inT16
Definition: host.h:100
const STRING & debug() const
Definition: blamer.h:116
void BlamerBundle::SetMisAdaptionDebug ( const WERD_CHOICE best_choice,
bool  debug 
)

Definition at line 574 of file blamer.cpp.

575  {
576  if (incorrect_result_reason_ != IRR_NO_TRUTH &&
577  !ChoiceIsCorrect(best_choice)) {
578  misadaption_debug_ ="misadapt to word (";
579  misadaption_debug_ += best_choice->permuter_name();
580  misadaption_debug_ += "): ";
581  FillDebugString("", best_choice, &misadaption_debug_);
582  if (debug) {
583  tprintf("%s\n", misadaption_debug_.string());
584  }
585  }
586 }
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:111
#define tprintf(...)
Definition: tprintf.h:31
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
Definition: blamer.cpp:123
static const char * permuter_name(uinT8 permuter)
Definition: ratngs.cpp:174
const char * string() const
Definition: strngs.cpp:193
const STRING & debug() const
Definition: blamer.h:116
void BlamerBundle::SetRejectedTruth ( )

Definition at line 105 of file blamer.cpp.

105  {
106  incorrect_result_reason_ = IRR_NO_TRUTH;
107  truth_has_char_boxes_ = false;
108 }
void BlamerBundle::SetSymbolTruth ( const UNICHARSET unicharset,
const char *  char_str,
const TBOX char_box 
)

Definition at line 86 of file blamer.cpp.

87  {
88  STRING symbol_str(char_str);
89  UNICHAR_ID id = unicharset.unichar_to_id(char_str);
90  if (id != INVALID_UNICHAR_ID) {
91  STRING normed_uch(unicharset.get_normed_unichar(id));
92  if (normed_uch.length() > 0) symbol_str = normed_uch;
93  }
94  int length = truth_word_.length();
95  truth_text_.push_back(symbol_str);
96  truth_word_.InsertBox(length, char_box);
97  if (length == 0)
98  truth_has_char_boxes_ = true;
99  else if (truth_word_.BlobBox(length - 1) == char_box)
100  truth_has_char_boxes_ = false;
101 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
int push_back(T object)
const TBOX & BlobBox(int index) const
Definition: boxword.h:88
int UNICHAR_ID
Definition: unichar.h:33
const int length() const
Definition: boxword.h:85
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:776
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
Definition: strngs.h:44
void BlamerBundle::SetupCorrectSegmentation ( const TWERD word,
bool  debug 
)

Definition at line 407 of file blamer.cpp.

407  {
408  params_training_bundle_.StartHypothesisList();
409  if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
410  return; // Nothing to do here.
411 
412  STRING debug_str;
413  debug_str += "Blamer computing correct_segmentation_cols\n";
414  int curr_box_col = 0;
415  int next_box_col = 0;
416  int num_blobs = word->NumBlobs();
417  if (num_blobs == 0) return; // No blobs to play with.
418  int blob_index = 0;
419  inT16 next_box_x = word->blobs[blob_index]->bounding_box().right();
420  for (int truth_idx = 0; blob_index < num_blobs &&
421  truth_idx < norm_truth_word_.length();
422  ++blob_index) {
423  ++next_box_col;
424  inT16 curr_box_x = next_box_x;
425  if (blob_index + 1 < num_blobs)
426  next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
427  inT16 truth_x = norm_truth_word_.BlobBox(truth_idx).right();
428  debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
429  debug_str.add_str_int(" ", truth_x);
430  debug_str += "\n";
431  if (curr_box_x > (truth_x + norm_box_tolerance_)) {
432  break; // failed to find a matching box
433  } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
434  (blob_index + 1 >= num_blobs || // next box can't be included
435  next_box_x > truth_x + norm_box_tolerance_)) {
436  correct_segmentation_cols_.push_back(curr_box_col);
437  correct_segmentation_rows_.push_back(next_box_col-1);
438  ++truth_idx;
439  debug_str.add_str_int("col=", curr_box_col);
440  debug_str.add_str_int(" row=", next_box_col-1);
441  debug_str += "\n";
442  curr_box_col = next_box_col;
443  }
444  }
445  if (blob_index < num_blobs || // trailing blobs
446  correct_segmentation_cols_.length() != norm_truth_word_.length()) {
447  debug_str.add_str_int("Blamer failed to find correct segmentation"
448  " (tolerance=", norm_box_tolerance_);
449  if (blob_index >= num_blobs) debug_str += " blob == NULL";
450  debug_str += ")\n";
451  debug_str.add_str_int(" path length ", correct_segmentation_cols_.length());
452  debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
453  debug_str += "\n";
454  SetBlame(IRR_UNKNOWN, debug_str, NULL, debug);
455  correct_segmentation_cols_.clear();
456  correct_segmentation_rows_.clear();
457  }
458 }
int length() const
Definition: genericvector.h:79
int push_back(T object)
const TBOX & BlobBox(int index) const
Definition: boxword.h:88
inT16 right() const
Definition: rect.h:75
int NumBlobs() const
Definition: blobs.h:425
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
const int length() const
Definition: boxword.h:85
Definition: strngs.h:44
#define NULL
Definition: host.h:144
short inT16
Definition: host.h:100
const STRING & debug() const
Definition: blamer.h:116
void BlamerBundle::SetupNormTruthWord ( const DENORM denorm)

Definition at line 145 of file blamer.cpp.

145  {
146  // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
147  norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
148  TPOINT topleft;
149  TPOINT botright;
150  TPOINT norm_topleft;
151  TPOINT norm_botright;
152  for (int b = 0; b < truth_word_.length(); ++b) {
153  const TBOX &box = truth_word_.BlobBox(b);
154  topleft.x = box.left();
155  topleft.y = box.top();
156  botright.x = box.right();
157  botright.y = box.bottom();
158  denorm.NormTransform(NULL, topleft, &norm_topleft);
159  denorm.NormTransform(NULL, botright, &norm_botright);
160  TBOX norm_box(norm_topleft.x, norm_botright.y,
161  norm_botright.x, norm_topleft.y);
162  norm_truth_word_.InsertBox(b, norm_box);
163  }
164 }
const TBOX & BlobBox(int index) const
Definition: boxword.h:88
inT16 y
Definition: blobs.h:72
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
Definition: blobs.h:50
inT16 x
Definition: blobs.h:71
inT16 bottom() const
Definition: rect.h:61
const int length() const
Definition: boxword.h:85
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:334
float x_scale() const
Definition: normalis.h:269
Definition: rect.h:30
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
#define NULL
Definition: host.h:144
inT16 top() const
Definition: rect.h:54
void BlamerBundle::SetWordTruth ( const UNICHARSET unicharset,
const char *  truth_str,
const TBOX word_box 
)

Definition at line 66 of file blamer.cpp.

67  {
68  truth_word_.InsertBox(0, word_box);
69  truth_has_char_boxes_ = false;
70  // Encode the string as UNICHAR_IDs.
72  GenericVector<char> lengths;
73  unicharset.encode_string(truth_str, false, &encoding, &lengths, NULL);
74  int total_length = 0;
75  for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
76  STRING uch(truth_str + total_length);
77  uch.truncate_at(lengths[i] - total_length);
78  UNICHAR_ID id = encoding[i];
79  if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
80  truth_text_.push_back(uch);
81  }
82 }
int size() const
Definition: genericvector.h:72
int push_back(T object)
int UNICHAR_ID
Definition: unichar.h:33
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
Definition: unicharset.cpp:234
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:776
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void BlamerBundle::SplitBundle ( int  word1_right,
int  word2_left,
bool  debug,
BlamerBundle bundle1,
BlamerBundle bundle2 
) const

Definition at line 169 of file blamer.cpp.

171  {
172  STRING debug_str;
173  // Find truth boxes that correspond to the split in the blobs.
174  int b;
175  int begin2_truth_index = -1;
176  if (incorrect_result_reason_ != IRR_NO_TRUTH &&
177  truth_has_char_boxes_) {
178  debug_str = "Looking for truth split at";
179  debug_str.add_str_int(" end1_x ", word1_right);
180  debug_str.add_str_int(" begin2_x ", word2_left);
181  debug_str += "\nnorm_truth_word boxes:\n";
182  if (norm_truth_word_.length() > 1) {
183  norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
184  for (b = 1; b < norm_truth_word_.length(); ++b) {
185  norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
186  if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
187  norm_box_tolerance_) &&
188  (abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
189  norm_box_tolerance_)) {
190  begin2_truth_index = b;
191  debug_str += "Split found";
192  break;
193  }
194  }
195  debug_str += '\n';
196  }
197  }
198  // Populate truth information in word and word2 with the first and second
199  // part of the original truth.
200  if (begin2_truth_index > 0) {
201  bundle1->truth_has_char_boxes_ = true;
202  bundle1->norm_box_tolerance_ = norm_box_tolerance_;
203  bundle2->truth_has_char_boxes_ = true;
204  bundle2->norm_box_tolerance_ = norm_box_tolerance_;
205  BlamerBundle *curr_bb = bundle1;
206  for (b = 0; b < norm_truth_word_.length(); ++b) {
207  if (b == begin2_truth_index) curr_bb = bundle2;
208  curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
209  curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
210  curr_bb->truth_text_.push_back(truth_text_[b]);
211  }
212  } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
213  bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
214  bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
215  } else {
216  debug_str += "Truth split not found";
217  debug_str += truth_has_char_boxes_ ?
218  "\n" : " (no truth char boxes)\n";
219  bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
220  bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
221  }
222 }
int push_back(T object)
const TBOX & BlobBox(int index) const
Definition: boxword.h:88
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
const int length() const
Definition: boxword.h:85
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void print_to_str(STRING *str) const
Definition: rect.cpp:175
const STRING & debug() const
Definition: blamer.h:116
STRING BlamerBundle::TruthString ( ) const
inline

Definition at line 100 of file blamer.h.

100  {
101  STRING truth_str;
102  for (int i = 0; i < truth_text_.length(); ++i)
103  truth_str += truth_text_[i];
104  return truth_str;
105  }
int length() const
Definition: genericvector.h:79
Definition: strngs.h:44
void BlamerBundle::UpdateBestRating ( float  rating)
inline

Definition at line 122 of file blamer.h.

122  {
123  if (rating < best_correctly_segmented_rating_)
124  best_correctly_segmented_rating_ = rating;
125  }

The documentation for this struct was generated from the following files: