tesseract  5.0.0-alpha-619-ge9db
chopper.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: chopper.cpp (Formerly chopper.c)
4  * Author: Mark Seaman, OCR Technology
5  *
6  * (c) Copyright 1987, Hewlett-Packard Company.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  *****************************************************************************/
18 
19 /*----------------------------------------------------------------------
20  I n c l u d e s
21 ----------------------------------------------------------------------*/
22 
23 #include "blamer.h" // for BlamerBundle, IRR_CORRECT
24 #include "blobs.h" // for TPOINT, TBLOB, EDGEPT, TESSLINE, divisible_blob
25 #include "callcpp.h" // for Red
26 #include "dict.h" // for Dict
27 #include "lm_pain_points.h" // for LMPainPoints
28 #include "lm_state.h" // for BestChoiceBundle
29 #include "matrix.h" // for MATRIX
30 #include "normalis.h" // for DENORM
31 #include "pageres.h" // for WERD_RES
32 #include "params.h" // for IntParam, BoolParam
33 #include "ratngs.h" // for BLOB_CHOICE (ptr only), BLOB_CHOICE_LIST (ptr ...
34 #include "rect.h" // for TBOX
35 #include "render.h" // for display_blob
36 #include "seam.h" // for SEAM
37 #include "split.h" // for remove_edgept
38 #include "stopper.h" // for DANGERR
39 #include "tprintf.h" // for tprintf
40 #include "wordrec.h" // for Wordrec, SegSearchPending (ptr only)
41 
42 template <typename T> class GenericVector;
43 
44 // Include automatically generated configuration file if running autoconf.
45 #ifdef HAVE_CONFIG_H
46 #include "config_auto.h"
47 #endif
48 
49 // Even though the limit on the number of chunks may now be removed, keep
50 // the same limit for repeatable behavior, and it may be a speed advantage.
51 static const int kMaxNumChunks = 64;
52 
53 /*----------------------------------------------------------------------
54  F u n c t i o n s
55 ----------------------------------------------------------------------*/
56 
62 static int check_blob(TBLOB *blob) {
63  TESSLINE *outline;
64  EDGEPT *edgept;
65 
66  for (outline = blob->outlines; outline != nullptr; outline = outline->next) {
67  edgept = outline->loop;
68  do {
69  if (edgept == nullptr)
70  break;
71  edgept = edgept->next;
72  }
73  while (edgept != outline->loop);
74  if (edgept == nullptr)
75  return 1;
76  }
77  return 0;
78 }
79 
85 static int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam) {
86  int length;
87  int index;
88 
89  length = seams.size();
90  for (index = 0; index < length; index++)
91  if (seam->SharesPosition(*seams[index])) return true;
92  return false;
93 }
94 
100 static void preserve_outline(EDGEPT *start) {
101  EDGEPT *srcpt;
102 
103  if (start == nullptr)
104  return;
105  srcpt = start;
106  do {
107  srcpt->flags[1] = 1;
108  srcpt = srcpt->next;
109  }
110  while (srcpt != start);
111  srcpt->flags[1] = 2;
112 }
113 
114 static void preserve_outline_tree(TESSLINE *srcline) {
115  TESSLINE *outline;
116 
117  for (outline = srcline; outline != nullptr; outline = outline->next) {
118  preserve_outline (outline->loop);
119  }
120 }
121 
127 static EDGEPT *restore_outline(EDGEPT *start) {
128  EDGEPT *srcpt;
129  EDGEPT *real_start;
130 
131  if (start == nullptr)
132  return nullptr;
133  srcpt = start;
134  do {
135  if (srcpt->flags[1] == 2)
136  break;
137  srcpt = srcpt->next;
138  }
139  while (srcpt != start);
140  real_start = srcpt;
141  do {
142  srcpt = srcpt->next;
143  if (srcpt->prev->flags[1] == 0) {
144  remove_edgept(srcpt->prev);
145  }
146  }
147  while (srcpt != real_start);
148  return real_start;
149 }
150 
151 static void restore_outline_tree(TESSLINE *srcline) {
152  TESSLINE *outline;
153 
154  for (outline = srcline; outline != nullptr; outline = outline->next) {
155  outline->loop = restore_outline (outline->loop);
156  outline->start = outline->loop->pos;
157  }
158 }
159 
160 /**********************************************************************
161  * total_containment
162  *
163  * Check to see if one of these outlines is totally contained within
164  * the bounding box of the other.
165  **********************************************************************/
166 static int16_t total_containment(TBLOB *blob1, TBLOB *blob2) {
167  TBOX box1 = blob1->bounding_box();
168  TBOX box2 = blob2->bounding_box();
169  return box1.contains(box2) || box2.contains(box1);
170 }
171 
172 // Helper runs all the checks on a seam to make sure it is valid.
173 // Returns the seam if OK, otherwise deletes the seam and returns nullptr.
174 static SEAM* CheckSeam(int debug_level, int32_t blob_number, TWERD* word,
175  TBLOB* blob, TBLOB* other_blob,
176  const GenericVector<SEAM*>& seams, SEAM* seam) {
177  if (seam == nullptr || blob->outlines == nullptr || other_blob->outlines == nullptr ||
178  total_containment(blob, other_blob) || check_blob(other_blob) ||
179  !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) ||
180  any_shared_split_points(seams, seam) ||
181  !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) {
182  word->blobs.remove(blob_number + 1);
183  if (seam) {
184  seam->UndoSeam(blob, other_blob);
185  delete seam;
186  seam = nullptr;
187 #ifndef GRAPHICS_DISABLED
188  if (debug_level) {
189  if (debug_level >2)
190  display_blob(blob, Red);
191  tprintf("\n** seam being removed ** \n");
192  }
193 #endif
194  } else {
195  delete other_blob;
196  }
197  return nullptr;
198  }
199  return seam;
200 }
201 
202 namespace tesseract {
203 
210 SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number,
211  bool italic_blob,
212  const GenericVector<SEAM*>& seams) {
214  preserve_outline_tree (blob->outlines);
215  TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
216  // Insert it into the word.
217  word->blobs.insert(other_blob, blob_number + 1);
218 
219  SEAM *seam = nullptr;
220  if (prioritize_division) {
221  TPOINT location;
222  if (divisible_blob(blob, italic_blob, &location)) {
223  seam = new SEAM(0.0f, location);
224  }
225  }
226  if (seam == nullptr)
227  seam = pick_good_seam(blob);
228  if (chop_debug) {
229  if (seam != nullptr)
230  seam->Print("Good seam picked=");
231  else
232  tprintf("\n** no seam picked *** \n");
233  }
234  if (seam) {
235  seam->ApplySeam(italic_blob, blob, other_blob);
236  }
237 
238  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
239  seams, seam);
240  if (seam == nullptr) {
242  restore_outline_tree(blob->outlines);
244  // If the blob can simply be divided into outlines, then do that.
245  TPOINT location;
246  if (divisible_blob(blob, italic_blob, &location)) {
247  other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
248  word->blobs.insert(other_blob, blob_number + 1);
249  seam = new SEAM(0.0f, location);
250  seam->ApplySeam(italic_blob, blob, other_blob);
251  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
252  seams, seam);
253  }
254  }
255  }
256  if (seam != nullptr) {
257  // Make sure this seam doesn't get chopped again.
258  seam->Finalize();
259  }
260  return seam;
261 }
262 
263 
264 SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number,
265  bool italic_blob,
266  const GenericVector<SEAM*>& seams) {
267  return attempt_blob_chop(word, word->blobs[blob_number], blob_number,
268  italic_blob, seams);
269 }
270 
271 
273  bool italic_blob, WERD_RES *word_res,
274  int *blob_number) {
275  TWERD *word = word_res->chopped_word;
276  for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
277  TBLOB *blob = word->blobs[*blob_number];
278  TPOINT topleft, botright;
279  topleft.x = blob->bounding_box().left();
280  topleft.y = blob->bounding_box().top();
281  botright.x = blob->bounding_box().right();
282  botright.y = blob->bounding_box().bottom();
283 
284  TPOINT original_topleft, original_botright;
285  word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft);
286  word_res->denorm.DenormTransform(nullptr, botright, &original_botright);
287 
288  TBOX original_box = TBOX(original_topleft.x, original_botright.y,
289  original_botright.x, original_topleft.y);
290 
291  bool almost_equal_box = false;
292  int num_overlap = 0;
293  for (int i = 0; i < boxes.size(); i++) {
294  if (original_box.overlap_fraction(boxes[i]) > 0.125)
295  num_overlap++;
296  if (original_box.almost_equal(boxes[i], 3))
297  almost_equal_box = true;
298  }
299 
300  TPOINT location;
301  if (divisible_blob(blob, italic_blob, &location) ||
302  (!almost_equal_box && num_overlap > 1)) {
303  SEAM *seam = attempt_blob_chop(word, blob, *blob_number,
304  italic_blob, word_res->seam_array);
305  if (seam != nullptr)
306  return seam;
307  }
308  }
309 
310  *blob_number = -1;
311  return nullptr;
312 }
313 
327  DANGERR *fixpt,
328  bool split_next_to_fragment,
329  bool italic_blob,
330  WERD_RES* word,
331  int* blob_number) {
332  float rating_ceiling = FLT_MAX;
333  SEAM *seam = nullptr;
334  do {
335  *blob_number = select_blob_to_split_from_fixpt(fixpt);
336  if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number);
337  bool split_point_from_dict = (*blob_number != -1);
338  if (split_point_from_dict) {
339  fixpt->clear();
340  } else {
341  *blob_number = select_blob_to_split(blob_choices, rating_ceiling,
342  split_next_to_fragment);
343  }
344  if (chop_debug) tprintf("blob_number = %d\n", *blob_number);
345  if (*blob_number == -1)
346  return nullptr;
347 
348  // TODO(rays) it may eventually help to allow italic_blob to be true,
349  seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob,
350  word->seam_array);
351  if (seam != nullptr)
352  return seam; // Success!
353  if (blob_choices[*blob_number] == nullptr)
354  return nullptr;
355  if (!split_point_from_dict) {
356  // We chopped the worst rated blob, try something else next time.
357  rating_ceiling = blob_choices[*blob_number]->rating();
358  }
359  } while (true);
360  return seam;
361 }
362 
371  const GenericVector<BLOB_CHOICE*>& blob_choices,
372  WERD_RES* word_res,
373  int* blob_number) {
374  if (prioritize_division) {
375  return chop_overlapping_blob(boxes, true, word_res, blob_number);
376  } else {
377  return improve_one_blob(blob_choices, nullptr, false, true, word_res,
378  blob_number);
379  }
380 }
381 
390 void Wordrec::chop_word_main(WERD_RES *word) {
391  int num_blobs = word->chopped_word->NumBlobs();
392  if (word->ratings == nullptr) {
393  word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
394  }
395  if (word->ratings->get(0, 0) == nullptr) {
396  // Run initial classification.
397  for (int b = 0; b < num_blobs; ++b) {
398  BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
399  "Initial:", word->chopped_word,
400  word->blamer_bundle);
401  word->ratings->put(b, b, choices);
402  }
403  } else {
404  // Blobs have been pre-classified. Set matrix cell for all blob choices
405  for (int col = 0; col < word->ratings->dimension(); ++col) {
406  for (int row = col; row < word->ratings->dimension() &&
407  row < col + word->ratings->bandwidth(); ++row) {
408  BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
409  if (choices != nullptr) {
410  BLOB_CHOICE_IT bc_it(choices);
411  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
412  bc_it.data()->set_matrix_cell(col, row);
413  }
414  }
415  }
416  }
417  }
418 
419  // Run Segmentation Search.
420  BestChoiceBundle best_choice_bundle(word->ratings->dimension());
421  SegSearch(word, &best_choice_bundle, word->blamer_bundle);
422 
423  if (word->best_choice == nullptr) {
424  // SegSearch found no valid paths, so just use the leading diagonal.
426  }
427  word->RebuildBestState();
428  // If we finished without a hyphen at the end of the word, let the next word
429  // be found in the dictionary.
430  if (word->word->flag(W_EOL) &&
431  !getDict().has_hyphen_end(*word->best_choice)) {
432  getDict().reset_hyphen_vars(true);
433  }
434 
435  if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) {
436  CallFillLattice(*word->ratings, word->best_choices,
437  *word->uch_set, word->blamer_bundle);
438  }
439  if (wordrec_debug_level > 0) {
440  tprintf("Final Ratings Matrix:\n");
441  word->ratings->print(getDict().getUnicharset());
442  }
443  word->FilterWordChoices(getDict().stopper_debug_level);
444 }
445 
453 void Wordrec::improve_by_chopping(float rating_cert_scale,
454  WERD_RES* word,
455  BestChoiceBundle* best_choice_bundle,
456  BlamerBundle* blamer_bundle,
457  LMPainPoints* pain_points,
459  int blob_number;
460  do { // improvement loop.
461  // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
462  // one to chop.
463  GenericVector<BLOB_CHOICE*> blob_choices;
464  int num_blobs = word->ratings->dimension();
465  for (int i = 0; i < num_blobs; ++i) {
466  BLOB_CHOICE_LIST* choices = word->ratings->get(i, i);
467  if (choices == nullptr || choices->empty()) {
468  blob_choices.push_back(nullptr);
469  } else {
470  BLOB_CHOICE_IT bc_it(choices);
471  blob_choices.push_back(bc_it.data());
472  }
473  }
474  SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt,
475  false, false, word, &blob_number);
476  if (seam == nullptr) break;
477  // A chop has been made. We have to correct all the data structures to
478  // take into account the extra bottom-level blob.
479  // Put the seam into the seam_array and correct everything else on the
480  // word: ratings matrix (including matrix location in the BLOB_CHOICES),
481  // states in WERD_CHOICEs, and blob widths.
482  word->InsertSeam(blob_number, seam);
483  // Insert a new entry in the beam array.
484  best_choice_bundle->beam.insert(new LanguageModelState, blob_number);
485  // Fixpts are outdated, but will get recalculated.
486  best_choice_bundle->fixpt.clear();
487  // Remap existing pain points.
488  pain_points->RemapForSplit(blob_number);
489  // Insert a new pending at the chop point.
490  pending->insert(SegSearchPending(), blob_number);
491 
492  // Classify the two newly created blobs using ProcessSegSearchPainPoint,
493  // as that updates the pending correctly and adds new pain points.
494  MATRIX_COORD pain_point(blob_number, blob_number);
495  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word,
496  pain_points, blamer_bundle);
497  pain_point.col = blob_number + 1;
498  pain_point.row = blob_number + 1;
499  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word,
500  pain_points, blamer_bundle);
501  if (language_model_->language_model_ngram_on) {
502  // N-gram evaluation depends on the number of blobs in a chunk, so we
503  // have to re-evaluate everything in the word.
504  ResetNGramSearch(word, best_choice_bundle, pending);
505  blob_number = 0;
506  }
507  // Run language model incrementally. (Except with the n-gram model on.)
508  UpdateSegSearchNodes(rating_cert_scale, blob_number, pending,
509  word, pain_points, best_choice_bundle, blamer_bundle);
510  } while (!language_model_->AcceptableChoiceFound() &&
511  word->ratings->dimension() < kMaxNumChunks);
512 
513  // If after running only the chopper best_choice is incorrect and no blame
514  // has been yet set, blame the classifier if best_choice is classifier's
515  // top choice and is a dictionary word (i.e. language model could not have
516  // helped). Otherwise blame the tradeoff between the classifier and
517  // the old language model (permuters).
518  if (word->blamer_bundle != nullptr &&
520  !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
521  bool valid_permuter = word->best_choice != nullptr &&
524  getDict().getUnicharset(),
525  valid_permuter,
527  }
528 }
529 
530 
531 /**********************************************************************
532  * select_blob_to_split
533  *
534  * These are the results of the last classification. Find a likely
535  * place to apply splits. If none, return -1.
536  **********************************************************************/
538  const GenericVector<BLOB_CHOICE*>& blob_choices,
539  float rating_ceiling, bool split_next_to_fragment) {
540  BLOB_CHOICE *blob_choice;
541  int x;
542  float worst = -FLT_MAX;
543  int worst_index = -1;
544  float worst_near_fragment = -FLT_MAX;
545  int worst_index_near_fragment = -1;
546  const CHAR_FRAGMENT **fragments = nullptr;
547 
548  if (chop_debug) {
549  if (rating_ceiling < FLT_MAX)
550  tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
551  else
552  tprintf("rating_ceiling = No Limit\n");
553  }
554 
555  if (split_next_to_fragment && blob_choices.size() > 0) {
556  fragments = new const CHAR_FRAGMENT *[blob_choices.size()];
557  if (blob_choices[0] != nullptr) {
558  fragments[0] = getDict().getUnicharset().get_fragment(
559  blob_choices[0]->unichar_id());
560  } else {
561  fragments[0] = nullptr;
562  }
563  }
564 
565  for (x = 0; x < blob_choices.size(); ++x) {
566  if (blob_choices[x] == nullptr) {
567  delete[] fragments;
568  return x;
569  } else {
570  blob_choice = blob_choices[x];
571  // Populate fragments for the following position.
572  if (split_next_to_fragment && x+1 < blob_choices.size()) {
573  if (blob_choices[x + 1] != nullptr) {
574  fragments[x + 1] = getDict().getUnicharset().get_fragment(
575  blob_choices[x + 1]->unichar_id());
576  } else {
577  fragments[x + 1] = nullptr;
578  }
579  }
580  if (blob_choice->rating() < rating_ceiling &&
581  blob_choice->certainty() < tessedit_certainty_threshold) {
582  // Update worst and worst_index.
583  if (blob_choice->rating() > worst) {
584  worst_index = x;
585  worst = blob_choice->rating();
586  }
587  if (split_next_to_fragment) {
588  // Update worst_near_fragment and worst_index_near_fragment.
589  bool expand_following_fragment =
590  (x + 1 < blob_choices.size() &&
591  fragments[x+1] != nullptr && !fragments[x+1]->is_beginning());
592  bool expand_preceding_fragment =
593  (x > 0 && fragments[x-1] != nullptr && !fragments[x-1]->is_ending());
594  if ((expand_following_fragment || expand_preceding_fragment) &&
595  blob_choice->rating() > worst_near_fragment) {
596  worst_index_near_fragment = x;
597  worst_near_fragment = blob_choice->rating();
598  if (chop_debug) {
599  tprintf("worst_index_near_fragment=%d"
600  " expand_following_fragment=%d"
601  " expand_preceding_fragment=%d\n",
602  worst_index_near_fragment,
603  expand_following_fragment,
604  expand_preceding_fragment);
605  }
606  }
607  }
608  }
609  }
610  }
611  delete[] fragments;
612  // TODO(daria): maybe a threshold of badness for
613  // worst_near_fragment would be useful.
614  return worst_index_near_fragment != -1 ?
615  worst_index_near_fragment : worst_index;
616 }
617 
618 /**********************************************************************
619  * select_blob_to_split_from_fixpt
620  *
621  * Given the fix point from a dictionary search, if there is a single
622  * dangerous blob that maps to multiple characters, return that blob
623  * index as a place we need to split. If none, return -1.
624  **********************************************************************/
626  if (!fixpt)
627  return -1;
628  for (int i = 0; i < fixpt->size(); i++) {
629  if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
630  (*fixpt)[i].dangerous &&
631  (*fixpt)[i].correct_is_ngram) {
632  return (*fixpt)[i].begin;
633  }
634  }
635  return -1;
636 }
637 
638 } // namespace tesseract
TBOX
Definition: cleanapi_test.cc:19
render.h
TESSLINE::start
TPOINT start
Definition: blobs.h:276
GenericVector::remove
void remove(int index)
Definition: genericvector.h:765
WERD_RES::FakeWordFromRatings
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:894
BlamerBundle::BlameClassifierOrLangModel
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:375
BlamerBundle::ChoiceIsCorrect
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:117
normalis.h
pageres.h
tesseract::Wordrec::improve_one_blob
SEAM * improve_one_blob(const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
Definition: chopper.cpp:325
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:116
dict.h
SEAM::ApplySeam
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:116
TPOINT
Definition: blobs.h:49
TESSLINE::loop
EDGEPT * loop
Definition: blobs.h:278
TBLOB::ShallowCopy
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:334
split.h
tesseract::Classify::prioritize_division
bool prioritize_division
Definition: classify.h:428
tesseract::Wordrec::SegSearch
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:41
SEAM::ContainedByBlob
bool ContainedByBlob(const TBLOB &blob) const
Definition: seam.h:71
tesseract::Wordrec::chop_word_main
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:389
TWERD
Definition: blobs.h:416
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:398
GenericVector::insert
void insert(const T &t, int index)
Definition: genericvector.h:750
WERD_RES::denorm
DENORM denorm
Definition: pageres.h:195
BLOB_CHOICE::certainty
float certainty() const
Definition: ratngs.h:81
params.h
divisible_blob
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:910
MATRIX::print
void print(const UNICHARSET &unicharset) const
Definition: matrix.cpp:110
wordrec.h
tesseract::Wordrec::chop_debug
int chop_debug
Definition: wordrec.h:204
tesseract::Wordrec::chop_overlapping_blob
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:271
MATRIX
Definition: matrix.h:574
TESSLINE
Definition: blobs.h:201
TBOX::top
int16_t top() const
Definition: rect.h:57
TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:330
WERD_CHOICE::permuter
uint8_t permuter() const
Definition: ratngs.h:334
WERD_RES
Definition: pageres.h:160
stopper.h
tesseract::Wordrec::attempt_blob_chop
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
Definition: chopper.cpp:209
WERD_RES::ratings
MATRIX * ratings
Definition: pageres.h:231
rect.h
TESSLINE::next
TESSLINE * next
Definition: blobs.h:279
blobs.h
tesseract::Dict::valid_word_permuter
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:474
SEAM
Definition: seam.h:36
ratngs.h
WERD_RES::uch_set
const UNICHARSET * uch_set
Definition: pageres.h:197
tesseract::Classify::getDict
virtual Dict & getDict()
Definition: classify.h:107
WERD_RES::blamer_bundle
BlamerBundle * blamer_bundle
Definition: pageres.h:246
tesseract::Wordrec::ResetNGramSearch
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
Definition: segsearch.cpp:310
EDGEPT::prev
EDGEPT * prev
Definition: blobs.h:191
tesseract::Wordrec::tessedit_certainty_threshold
double tessedit_certainty_threshold
Definition: wordrec.h:203
remove_edgept
void remove_edgept(EDGEPT *point)
Definition: split.cpp:196
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
SEAM::UndoSeam
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:132
WERD_RES::InsertSeam
void InsertSeam(int blob_number, SEAM *seam)
Definition: pageres.cpp:414
TPOINT::x
int16_t x
Definition: blobs.h:91
display_blob
void display_blob(TBLOB *blob, C_COL color)
Definition: render.cpp:49
DENORM::DenormTransform
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:389
SEAM::PrepareToInsertSeam
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:74
tesseract::Wordrec::classify_piece
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:52
W_EOL
end of line
Definition: werd.h:47
Red
Definition: callcpp.h:29
TPOINT::y
int16_t y
Definition: blobs.h:92
GENERIC_2D_ARRAY::get
T get(ICOORD pos) const
Definition: matrix.h:227
matrix.h
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:457
tesseract::Wordrec::wordrec_debug_level
int wordrec_debug_level
Definition: wordrec.h:226
tesseract::Wordrec::pick_good_seam
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:210
CHAR_FRAGMENT::is_ending
bool is_ending() const
Definition: unicharset.h:108
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
WERD_RES::FilterWordChoices
void FilterWordChoices(int debug_level)
Definition: pageres.cpp:509
TBOX::overlap_fraction
double overlap_fraction(const TBOX &box) const
Definition: rect.h:381
tesseract::Wordrec::chop_one_blob
SEAM * chop_one_blob(const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:369
tesseract::Wordrec::repair_unchopped_blobs
int repair_unchopped_blobs
Definition: wordrec.h:202
tesseract::Wordrec::select_blob_to_split_from_fixpt
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:622
WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:206
SEAM::Print
void Print(const char *label) const
Definition: seam.cpp:152
IRR_CORRECT
Definition: blamer.h:54
tesseract
Definition: baseapi.h:65
WERD_RES::best_choices
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
tprintf.h
callcpp.h
TOP_CHOICE_PERM
Definition: ratngs.h:233
WERD_RES::seam_array
GenericVector< SEAM * > seam_array
Definition: pageres.h:208
BLOB_CHOICE::rating
float rating() const
Definition: ratngs.h:78
tesseract::Wordrec::chop_numbered_blob
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
Definition: chopper.cpp:263
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
GenericVector
Definition: baseapi.h:40
EDGEPT::flags
char flags[EDGEPTFLAGS]
Definition: blobs.h:189
CHAR_FRAGMENT
Definition: unicharset.h:48
tesseract::Wordrec::improve_by_chopping
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
Definition: chopper.cpp:452
tesseract::Wordrec::select_blob_to_split
int select_blob_to_split(const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:535
WERD_RES::RebuildBestState
void RebuildBestState()
Definition: pageres.cpp:804
tesseract::Wordrec::wordrec_debug_blamer
bool wordrec_debug_blamer
Definition: wordrec.h:231
BandTriMatrix::bandwidth
int bandwidth() const
Definition: matrix.h:534
tesseract::Dict::has_hyphen_end
bool has_hyphen_end(const UNICHARSET *unicharset, UNICHAR_ID unichar_id, bool first_pos) const
Check whether the word has a hyphen at the end.
Definition: dict.h:152
BLOB_CHOICE
Definition: ratngs.h:49
MATRIX_COORD
Definition: matrix.h:604
TBLOB
Definition: blobs.h:282
tesseract::Dict::reset_hyphen_vars
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:42
TBOX::left
int16_t left() const
Definition: rect.h:71
GenericVector::clear
void clear()
Definition: genericvector.h:857
TBOX::right
int16_t right() const
Definition: rect.h:78
TBOX::almost_equal
bool almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:250
GENERIC_2D_ARRAY::put
void put(ICOORD pos, const T &thing)
Definition: matrix.h:219
EDGEPT
Definition: blobs.h:97
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::Dict::getUnicharset
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
UNICHARSET::get_fragment
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:724
SEAM::Finalize
void Finalize()
Definition: seam.h:108
seam.h
WERD_RES::word
WERD * word
Definition: pageres.h:180
lm_pain_points.h
BlamerBundle::incorrect_result_reason
IncorrectResultReason incorrect_result_reason() const
Definition: blamer.h:121
CHAR_FRAGMENT::is_beginning
bool is_beginning() const
Definition: unicharset.h:105
tesseract::Wordrec::language_model_
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:471
SEAM::SharesPosition
bool SharesPosition(const SEAM &other) const
Definition: seam.h:87
tesseract::Wordrec::ProcessSegSearchPainPoint
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:247
BlamerBundle
Definition: blamer.h:103
tesseract::Classify::allow_blob_division
bool allow_blob_division
Definition: classify.h:423
blamer.h
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::Wordrec::wordrec_max_join_chunks
int wordrec_max_join_chunks
Definition: wordrec.h:228
tesseract::Wordrec::CallFillLattice
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:259
tesseract::Wordrec::UpdateSegSearchNodes
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:179
EDGEPT::pos
TPOINT pos
Definition: blobs.h:184
TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:446
EDGEPT::next
EDGEPT * next
Definition: blobs.h:190
BandTriMatrix::dimension
int dimension() const
Definition: matrix.h:532
lm_state.h
TBOX
Definition: rect.h:33