tesseract  5.0.0-alpha-619-ge9db
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

Public Member Functions

 WERD_RES ()=default
 
 WERD_RES (WERD *the_word)
 
 WERD_RES (const WERD_RES &source)
 
 ~WERD_RES ()
 
const char * BestUTF8 (int blob_index, bool in_rtl_context) const
 
const char * RawUTF8 (int blob_index) const
 
UNICHARSET::Direction SymbolDirection (int blob_index) const
 
bool AnyRtlCharsInWord () const
 
bool AnyLtrCharsInWord () const
 
bool UnicharsInReadingOrder () const
 
void Clear ()
 
void ClearResults ()
 
void ClearWordChoices ()
 
void ClearRatings ()
 
WERD_RESoperator= (const WERD_RES &source)
 
void CopySimpleFields (const WERD_RES &source)
 
void InitForRetryRecognition (const WERD_RES &source)
 
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
 
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
 
void SetupFake (const UNICHARSET &uch)
 
void SetupWordScript (const UNICHARSET &unicharset_in)
 
void SetupBlamerBundle ()
 
void SetupBlobWidthsAndGaps ()
 
void InsertSeam (int blob_number, SEAM *seam)
 
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
 
bool IsAmbiguous ()
 
bool StatesAllValid ()
 
void DebugWordChoices (bool debug, const char *word_to_debug)
 
void DebugTopChoice (const char *msg) const
 
void FilterWordChoices (int debug_level)
 
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
 
bool LogNewRawChoice (WERD_CHOICE *word_choice)
 
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
 
void PrintBestChoices () const
 
int GetBlobsWidth (int start_blob, int last_blob)
 
int GetBlobsGap (int blob_index)
 
BLOB_CHOICEGetBlobChoice (int index) const
 
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
 
void ConsumeWordResults (WERD_RES *word)
 
void ReplaceBestChoice (WERD_CHOICE *choice)
 
void RebuildBestState ()
 
void CloneChoppedToRebuild ()
 
void SetupBoxWord ()
 
void SetScriptPositions ()
 
void SetAllScriptPositions (tesseract::ScriptPos position)
 
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
 
void FakeWordFromRatings (PermuterType permuter)
 
void BestChoiceToCorrectText ()
 
bool ConditionalBlobMerge (std::function< UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> class_cb, std::function< bool(const TBOX &, const TBOX &)> box_cb)
 
void MergeAdjacentBlobs (int index)
 
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
 
void fix_quotes ()
 
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
 
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
 
void fix_hyphens ()
 
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
 
void merge_tess_fails ()
 
void copy_on (WERD_RES *word_res)
 
bool PiecesAllNatural (int start, int count) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)
 

Public Attributes

WERDword = nullptr
 
tesseract::BoxWordbln_boxes = nullptr
 
ROWblob_row = nullptr
 
DENORM denorm
 
const UNICHARSETuch_set = nullptr
 
TWERDchopped_word = nullptr
 
GenericVector< SEAM * > seam_array
 
GenericVector< int > blob_widths
 
GenericVector< int > blob_gaps
 
std::vector< std::vector< std::pair< const char *, float > > > timesteps
 
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmented_timesteps
 
std::vector< std::vector< std::pair< const char *, float > > > CTC_symbol_choices
 
bool leading_space = false
 
int end = 0
 
MATRIXratings = nullptr
 
WERD_CHOICEbest_choice = nullptr
 
WERD_CHOICEraw_choice = nullptr
 
WERD_CHOICE_LIST best_choices
 
BlamerBundleblamer_bundle = nullptr
 
TWERDrebuild_word = nullptr
 
tesseract::BoxWordbox_word = nullptr
 
tesseract::Tesseracttesseract = nullptr
 
GenericVector< int > best_state
 
GenericVector< STRINGcorrect_text
 
WERD_CHOICEep_choice = nullptr
 
REJMAP reject_map
 
bool tess_failed = false
 
bool tess_accepted = false
 
bool tess_would_adapt = false
 
bool done = false
 
bool small_caps = false
 
bool odd_size = false
 
const FontInfofontinfo = nullptr
 
const FontInfofontinfo2 = nullptr
 
int8_t fontinfo_id_count = 0
 
int8_t fontinfo_id2_count = 0
 
bool guessed_x_ht = true
 
bool guessed_caps_ht = true
 
CRUNCH_MODE unlv_crunch_mode = CR_NONE
 
float x_height = 0.0f
 
float caps_height = 0.0f
 
float baseline_shift = 0.0f
 
float space_certainty = 0.0f
 
bool combination = false
 
bool part_of_combo = false
 
bool reject_spaces = false
 

Detailed Description

Definition at line 160 of file pageres.h.

Constructor & Destructor Documentation

◆ WERD_RES() [1/3]

WERD_RES::WERD_RES ( )
default

◆ WERD_RES() [2/3]

WERD_RES::WERD_RES ( WERD the_word)
inline

Definition at line 339 of file pageres.h.

344  {

◆ WERD_RES() [3/3]

WERD_RES::WERD_RES ( const WERD_RES source)
inline

Definition at line 344 of file pageres.h.

344  {
345  word = the_word;
346  }
347  // Deep copies everything except the ratings MATRIX.
348  // To get that use deep_copy below.

◆ ~WERD_RES()

WERD_RES::~WERD_RES ( )

Definition at line 1086 of file pageres.cpp.

Member Function Documentation

◆ AlternativeChoiceAdjustmentsWorseThan()

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 435 of file pageres.cpp.

438  {
439  // The choices are not changed by this iteration.
440  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
441  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
442  WERD_CHOICE* choice = wc_it.data();
443  if (choice->adjust_factor() <= threshold)
444  return false;

◆ AnyLtrCharsInWord()

bool WERD_RES::AnyLtrCharsInWord ( ) const
inline

Definition at line 403 of file pageres.h.

408  {
409  if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
410  return false;
411  for (int id = 0; id < best_choice->length(); id++) {
412  int unichar_id = best_choice->unichar_id(id);
413  if (unichar_id < 0 || unichar_id >= uch_set->size())
414  continue; // Ignore illegal chars.
415  UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
416  if (dir == UNICHARSET::U_LEFT_TO_RIGHT ||

◆ AnyRtlCharsInWord()

bool WERD_RES::AnyRtlCharsInWord ( ) const
inline

Definition at line 387 of file pageres.h.

392  {
393  if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
394  return false;
395  for (int id = 0; id < best_choice->length(); id++) {
396  int unichar_id = best_choice->unichar_id(id);
397  if (unichar_id < 0 || unichar_id >= uch_set->size())
398  continue; // Ignore illegal chars.
400  uch_set->get_direction(unichar_id);
401  if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||

◆ BestChoiceToCorrectText()

void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 920 of file pageres.cpp.

923  {
925  ASSERT_HOST(best_choice != nullptr);
926  for (int i = 0; i < best_choice->length(); ++i) {
927  UNICHAR_ID choice_id = best_choice->unichar_id(i);
928  const char* blob_choice = uch_set->id_to_unichar(choice_id);

◆ BestUTF8()

const char* WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const
inline

Definition at line 357 of file pageres.h.

362  {
363  if (blob_index < 0 || best_choice == nullptr ||
364  blob_index >= best_choice->length())
365  return nullptr;
366  UNICHAR_ID id = best_choice->unichar_id(blob_index);
367  if (id < 0 || id >= uch_set->size())
368  return nullptr;

◆ BothHyphens()

UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1025 of file pageres.cpp.

1028  {
1029  const char *ch = uch_set->id_to_unichar(id1);
1030  const char *next_ch = uch_set->id_to_unichar(id2);
1031  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1032  (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))

◆ BothQuotes()

UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1003 of file pageres.cpp.

1006  {
1007  const char *ch = uch_set->id_to_unichar(id1);
1008  const char *next_ch = uch_set->id_to_unichar(id2);
1009  if (is_simple_quote(ch, strlen(ch)) &&
1010  is_simple_quote(next_ch, strlen(next_ch)))

◆ BothSpaces()

UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1054 of file pageres.cpp.

1057  {
1058  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
1059  return id1;

◆ Clear()

void WERD_RES::Clear ( )

Definition at line 1090 of file pageres.cpp.

1093  {
1094  if (combination) {
1095  delete word;
1096  }
1097  word = nullptr;
1098  delete blamer_bundle;

◆ ClearRatings()

void WERD_RES::ClearRatings ( )

Definition at line 1133 of file pageres.cpp.

1136  {
1137  if (ratings != nullptr) {
1139  delete ratings;

◆ ClearResults()

void WERD_RES::ClearResults ( )

Definition at line 1100 of file pageres.cpp.

1103  {
1104  done = false;
1105  fontinfo = nullptr;
1106  fontinfo2 = nullptr;
1107  fontinfo_id_count = 0;
1108  fontinfo_id2_count = 0;
1109  delete bln_boxes;
1110  bln_boxes = nullptr;
1111  blob_row = nullptr;
1112  delete chopped_word;
1113  chopped_word = nullptr;
1114  delete rebuild_word;
1115  rebuild_word = nullptr;
1116  delete box_word;
1117  box_word = nullptr;
1118  best_state.clear();
1119  correct_text.clear();
1121  seam_array.clear();
1122  blob_widths.clear();
1123  blob_gaps.clear();
1124  ClearRatings();

◆ ClearWordChoices()

void WERD_RES::ClearWordChoices ( )

Definition at line 1125 of file pageres.cpp.

1128  {
1129  best_choice = nullptr;
1130  delete raw_choice;
1131  raw_choice = nullptr;
1132  best_choices.clear();

◆ CloneChoppedToRebuild()

void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 831 of file pageres.cpp.

834  {
835  delete rebuild_word;
837  SetupBoxWord();
838  int word_len = box_word->length();
839  best_state.reserve(word_len);
840  correct_text.reserve(word_len);
841  for (int i = 0; i < word_len; ++i) {

◆ ComputeAdaptionThresholds()

void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 557 of file pageres.cpp.

564  {
565  int chunk = 0;
566  int end_chunk = best_choice->state(0);
567  int end_raw_chunk = raw_choice->state(0);
568  int raw_blob = 0;
569  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
570  float avg_rating = 0.0f;
571  int num_error_chunks = 0;
572 
573  // For each chunk in best choice blob i, count non-matching raw results.
574  while (chunk < end_chunk) {
575  if (chunk >= end_raw_chunk) {
576  ++raw_blob;
577  end_raw_chunk += raw_choice->state(raw_blob);
578  }
579  if (best_choice->unichar_id(i) !=
580  raw_choice->unichar_id(raw_blob)) {
581  avg_rating += raw_choice->certainty(raw_blob);
582  ++num_error_chunks;
583  }
584  ++chunk;
585  }
586 
587  if (num_error_chunks > 0) {
588  avg_rating /= num_error_chunks;
589  *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
590  } else {
591  *thresholds = max_rating;
592  }
593 
594  if (*thresholds > max_rating)
595  *thresholds = max_rating;
596  if (*thresholds < min_rating)

◆ ConditionalBlobMerge()

bool WERD_RES::ConditionalBlobMerge ( std::function< UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)>  class_cb,
std::function< bool(const TBOX &, const TBOX &)>  box_cb 
)

Definition at line 935 of file pageres.cpp.

940  {
941  ASSERT_HOST(best_choice->length() == 0 || ratings != nullptr);
942  bool modified = false;
943  for (int i = 0; i + 1 < best_choice->length(); ++i) {
944  UNICHAR_ID new_id = class_cb(best_choice->unichar_id(i),
945  best_choice->unichar_id(i+1));
946  if (new_id != INVALID_UNICHAR_ID &&
947  (box_cb == nullptr || box_cb(box_word->BlobBox(i),
948  box_word->BlobBox(i + 1)))) {
949  // Raw choice should not be fixed.
950  best_choice->set_unichar_id(new_id, i);
951  modified = true;
953  const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
954  if (!coord.Valid(*ratings)) {
955  ratings->IncreaseBandSize(coord.row + 1 - coord.col);
956  }
957  BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
958  if (FindMatchingChoice(new_id, blob_choices) == nullptr) {
959  // Insert a fake result.
960  auto* blob_choice = new BLOB_CHOICE;
961  blob_choice->set_unichar_id(new_id);
962  BLOB_CHOICE_IT bc_it(blob_choices);
963  bc_it.add_before_then_move(blob_choice);
964  }
965  }

◆ ConsumeWordResults()

void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 761 of file pageres.cpp.

764  {
765  denorm = word->denorm;
766  blob_row = word->blob_row;
767  MovePointerData(&chopped_word, &word->chopped_word);
768  MovePointerData(&rebuild_word, &word->rebuild_word);
769  MovePointerData(&box_word, &word->box_word);
771  seam_array = word->seam_array;
772  word->seam_array.clear();
773  best_state.move(&word->best_state);
774  correct_text.move(&word->correct_text);
775  blob_widths.move(&word->blob_widths);
776  blob_gaps.move(&word->blob_gaps);
777  if (ratings != nullptr) ratings->delete_matrix_pointers();
778  MovePointerData(&ratings, &word->ratings);
779  best_choice = word->best_choice;
780  MovePointerData(&raw_choice, &word->raw_choice);
781  best_choices.clear();
782  WERD_CHOICE_IT wc_it(&best_choices);
783  wc_it.add_list_after(&word->best_choices);
784  reject_map = word->reject_map;
785  if (word->blamer_bundle != nullptr) {
786  assert(blamer_bundle != nullptr);
787  blamer_bundle->CopyResults(*(word->blamer_bundle));

◆ copy_on()

void WERD_RES::copy_on ( WERD_RES word_res)
inline

Definition at line 654 of file pageres.h.

659  { //from this word

◆ CopySimpleFields()

void WERD_RES::CopySimpleFields ( const WERD_RES source)

Definition at line 247 of file pageres.cpp.

250  {
251  tess_failed = source.tess_failed;
252  tess_accepted = source.tess_accepted;
254  done = source.done;
256  small_caps = source.small_caps;
257  odd_size = source.odd_size;
258  fontinfo = source.fontinfo;
259  fontinfo2 = source.fontinfo2;
262  x_height = source.x_height;
263  caps_height = source.caps_height;
265  guessed_x_ht = source.guessed_x_ht;
267  reject_spaces = source.reject_spaces;

◆ DebugTopChoice()

void WERD_RES::DebugTopChoice ( const char *  msg) const

Definition at line 495 of file pageres.cpp.

498  {
499  tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
501  if (best_choice == nullptr)
502  tprintf("<Null choice>\n");

◆ DebugWordChoices()

void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 476 of file pageres.cpp.

479  {
480  if (debug ||
481  (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr &&
482  best_choice->unichar_string() == STRING(word_to_debug))) {
483  if (raw_choice != nullptr)
484  raw_choice->print("\nBest Raw Choice");
485 
486  WERD_CHOICE_IT it(&best_choices);
487  int index = 0;
488  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
489  WERD_CHOICE* choice = it.data();
490  STRING label;
491  label.add_str_int("\nCooked Choice #", index);
492  choice->print(label.c_str());

◆ deep_copy()

static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src)
inlinestatic

Definition at line 643 of file pageres.h.

648  {
649  auto* result = new WERD_RES(*src);
650  // That didn't copy the ratings, but we want a copy if there is one to

◆ FakeClassifyWord()

void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 873 of file pageres.cpp.

876  {
877  // Setup the WERD_RES.
878  ASSERT_HOST(box_word != nullptr);
879  ASSERT_HOST(blob_count == box_word->length());
881  ClearRatings();
882  ratings = new MATRIX(blob_count, 1);
883  for (int c = 0; c < blob_count; ++c) {
884  auto* choice_list = new BLOB_CHOICE_LIST;
885  BLOB_CHOICE_IT choice_it(choice_list);
886  choice_it.add_after_then_move(choices[c]);
887  ratings->put(c, c, choice_list);
888  }
890  reject_map.initialise(blob_count);

◆ FakeWordFromRatings()

void WERD_RES::FakeWordFromRatings ( PermuterType  permuter)

Definition at line 894 of file pageres.cpp.

897  {
898  int num_blobs = ratings->dimension();
899  auto* word_choice = new WERD_CHOICE(uch_set, num_blobs);
900  word_choice->set_permuter(permuter);
901  for (int b = 0; b < num_blobs; ++b) {
902  UNICHAR_ID unichar_id = UNICHAR_SPACE;
903  // Initialize rating and certainty like in WERD_CHOICE::make_bad().
904  float rating = WERD_CHOICE::kBadRating;
905  float certainty = -FLT_MAX;
906  BLOB_CHOICE_LIST* choices = ratings->get(b, b);
907  if (choices != nullptr && !choices->empty()) {
908  BLOB_CHOICE_IT bc_it(choices);
909  BLOB_CHOICE* choice = bc_it.data();
910  unichar_id = choice->unichar_id();
911  rating = choice->rating();
912  certainty = choice->certainty();
913  }
914  word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
915  certainty);
916  }
917  LogNewRawChoice(word_choice);

◆ FilterWordChoices()

void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 509 of file pageres.cpp.

512  {
513  if (best_choice == nullptr || best_choices.singleton())
514  return;
515 
516  if (debug_level >= 2)
517  best_choice->print("\nFiltering against best choice");
518  WERD_CHOICE_IT it(&best_choices);
519  int index = 0;
520  for (it.forward(); !it.at_first(); it.forward(), ++index) {
521  WERD_CHOICE* choice = it.data();
522  float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
523  choice->adjust_factor());
524  // i, j index the blob choice in choice, best_choice.
525  // chunk is an index into the chopped_word blobs (AKA chunks).
526  // Since the two words may use different segmentations of the chunks, we
527  // iterate over the chunks to find out whether a comparable blob
528  // classification is much worse than the best result.
529  int i = 0, j = 0, chunk = 0;
530  // Each iteration of the while deals with 1 chunk. On entry choice_chunk
531  // and best_chunk are the indices of the first chunk in the NEXT blob,
532  // i.e. we don't have to increment i, j while chunk < choice_chunk and
533  // best_chunk respectively.
534  int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
535  while (i < choice->length() && j < best_choice->length()) {
536  if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
537  choice->certainty(i) - best_choice->certainty(j) < threshold) {
538  if (debug_level >= 2) {
539  choice->print("WorstCertaintyDiffWorseThan");
540  tprintf(
541  "i %d j %d Choice->Blob[i].Certainty %.4g"
542  " WorstOtherChoiceCertainty %g Threshold %g\n",
543  i, j, choice->certainty(i), best_choice->certainty(j), threshold);
544  tprintf("Discarding bad choice #%d\n", index);
545  }
546  delete it.extract();
547  break;
548  }
549  ++chunk;
550  // If needed, advance choice_chunk to keep up with chunk.
551  while (choice_chunk < chunk && ++i < choice->length())
552  choice_chunk += choice->state(i);
553  // If needed, advance best_chunk to keep up with chunk.
554  while (best_chunk < chunk && ++j < best_choice->length())
555  best_chunk += best_choice->state(j);

◆ fix_hyphens()

void WERD_RES::fix_hyphens ( )

Definition at line 1042 of file pageres.cpp.

1045  {
1046  if (!uch_set->contains_unichar("-") ||
1048  return; // Don't create it if it is disallowed.
1049 
1050  using namespace std::placeholders; // for _1, _2

◆ fix_quotes()

void WERD_RES::fix_quotes ( )

Definition at line 1013 of file pageres.cpp.

1016  {
1017  if (!uch_set->contains_unichar("\"") ||
1019  return; // Don't create it if it is disallowed.
1020 
1021  using namespace std::placeholders; // for _1, _2

◆ GetBlobChoice()

BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 746 of file pageres.cpp.

749  {
750  if (index < 0 || index >= best_choice->length()) return nullptr;

◆ GetBlobChoices()

BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 755 of file pageres.cpp.

◆ GetBlobsGap()

int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 736 of file pageres.cpp.

739  {
740  if (blob_index < 0 || blob_index >= blob_gaps.size())

◆ GetBlobsWidth()

int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 726 of file pageres.cpp.

729  {
730  int result = 0;
731  for (int b = start_blob; b <= last_blob; ++b) {
732  result += blob_widths[b];
733  if (b < last_blob)
734  result += blob_gaps[b];

◆ HyphenBoxesOverlap()

bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 1036 of file pageres.cpp.

◆ InitForRetryRecognition()

void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 273 of file pageres.cpp.

276  {
277  word = source.word;
278  CopySimpleFields(source);
279  if (source.blamer_bundle != nullptr) {
280  blamer_bundle = new BlamerBundle();

◆ InsertSeam()

void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 414 of file pageres.cpp.

417  {
418  // Insert the seam into the SEAMS array.
419  seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
420  seam_array.insert(seam, blob_number);
421  if (ratings != nullptr) {
422  // Expand the ratings matrix.
423  ratings = ratings->ConsumeAndMakeBigger(blob_number);
424  // Fix all the segmentation states.
425  if (raw_choice != nullptr)
426  raw_choice->UpdateStateForSplit(blob_number);
427  WERD_CHOICE_IT wc_it(&best_choices);
428  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
429  WERD_CHOICE* choice = wc_it.data();
430  choice->UpdateStateForSplit(blob_number);
431  }

◆ IsAmbiguous()

bool WERD_RES::IsAmbiguous ( )

Definition at line 448 of file pageres.cpp.

◆ LogNewCookedChoice()

bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 616 of file pageres.cpp.

620  {
621  if (best_choice != nullptr) {
622  // Throw out obviously bad choices to save some work.
623  // TODO(rays) Get rid of this! This piece of code produces different
624  // results according to the order in which words are found, which is an
625  // undesirable behavior. It would be better to keep all the choices and
626  // prune them later when more information is available.
627  float max_certainty_delta =
628  StopperAmbigThreshold(best_choice->adjust_factor(),
629  word_choice->adjust_factor());
630  if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
631  max_certainty_delta = -kStopperAmbiguityThresholdOffset;
632  if (word_choice->certainty() - best_choice->certainty() <
633  max_certainty_delta) {
634  if (debug) {
635  STRING bad_string;
636  word_choice->string_and_lengths(&bad_string, nullptr);
637  tprintf("Discarding choice \"%s\" with an overly low certainty"
638  " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
639  bad_string.c_str(), word_choice->certainty(),
641  max_certainty_delta + best_choice->certainty());
642  }
643  delete word_choice;
644  return false;
645  }
646  }
647 
648  // Insert in the list in order of increasing rating, but knock out worse
649  // string duplicates.
650  WERD_CHOICE_IT it(&best_choices);
651  const STRING& new_str = word_choice->unichar_string();
652  bool inserted = false;
653  int num_choices = 0;
654  if (!it.empty()) {
655  do {
656  WERD_CHOICE* choice = it.data();
657  if (choice->rating() > word_choice->rating() && !inserted) {
658  // Time to insert.
659  it.add_before_stay_put(word_choice);
660  inserted = true;
661  if (num_choices == 0)
662  best_choice = word_choice; // This is the new best.
663  ++num_choices;
664  }
665  if (choice->unichar_string() == new_str) {
666  if (inserted) {
667  // New is better.
668  delete it.extract();
669  } else {
670  // Old is better.
671  if (debug) {
672  tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
673  new_str.c_str(), word_choice->rating(), choice->rating());
674  }
675  delete word_choice;
676  return false;
677  }
678  } else {
679  ++num_choices;
680  if (num_choices > max_num_choices)
681  delete it.extract();
682  }
683  it.forward();
684  } while (!it.at_first());
685  }
686  if (!inserted && num_choices < max_num_choices) {
687  it.add_to_end(word_choice);
688  inserted = true;
689  if (num_choices == 0)
690  best_choice = word_choice; // This is the new best.
691  }
692  if (debug) {
693  if (inserted)
694  tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
695  else
696  tprintf("Poor");
697  word_choice->print(" Word Choice");
698  }
699  if (!inserted) {
700  delete word_choice;
701  return false;

◆ LogNewRawChoice()

bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 600 of file pageres.cpp.

603  {
604  if (raw_choice == nullptr || word_choice->rating() < raw_choice->rating()) {
605  delete raw_choice;
606  raw_choice = new WERD_CHOICE(*word_choice);
608  return true;

◆ merge_tess_fails()

void WERD_RES::merge_tess_fails ( )

Definition at line 1062 of file pageres.cpp.

1065  {
1066  using namespace std::placeholders; // for _1, _2
1068  this, _1, _2), nullptr)) {
1069  int len = best_choice->length();
1070  ASSERT_HOST(reject_map.length() == len);

◆ MergeAdjacentBlobs()

void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 969 of file pageres.cpp.

972  {
973  if (reject_map.length() == best_choice->length())
974  reject_map.remove_pos(index);
975  best_choice->remove_unichar_id(index + 1);
976  rebuild_word->MergeBlobs(index, index + 2);
977  box_word->MergeBoxes(index, index + 2);
978  if (index + 1 < best_state.size()) {
979  best_state[index] += best_state[index + 1];

◆ operator=()

WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 184 of file pageres.cpp.

187  {
188  this->ELIST_LINK::operator=(source);
189  Clear();
190  if (source.combination) {
191  word = new WERD;
192  *word = *(source.word); // deep copy
193  } else {
194  word = source.word; // pt to same word
195  }
196  if (source.bln_boxes != nullptr)
197  bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
198  if (source.chopped_word != nullptr)
199  chopped_word = new TWERD(*source.chopped_word);
200  if (source.rebuild_word != nullptr)
201  rebuild_word = new TWERD(*source.rebuild_word);
202  // TODO(rays) Do we ever need to copy the seam_array?
203  blob_row = source.blob_row;
204  denorm = source.denorm;
205  if (source.box_word != nullptr)
206  box_word = new tesseract::BoxWord(*source.box_word);
207  best_state = source.best_state;
208  correct_text = source.correct_text;
209  blob_widths = source.blob_widths;
210  blob_gaps = source.blob_gaps;
211  // None of the uses of operator= require the ratings matrix to be copied,
212  // so don't as it would be really slow.
213 
214  // Copy the cooked choices.
215  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
216  WERD_CHOICE_IT wc_dest_it(&best_choices);
217  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
218  const WERD_CHOICE *choice = wc_it.data();
219  wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
220  }
221  if (!wc_dest_it.empty()) {
222  wc_dest_it.move_to_first();
223  best_choice = wc_dest_it.data();
224  } else {
225  best_choice = nullptr;
226  }
227 
228  if (source.raw_choice != nullptr) {
229  raw_choice = new WERD_CHOICE(*source.raw_choice);
230  } else {
231  raw_choice = nullptr;
232  }
233  if (source.ep_choice != nullptr) {
234  ep_choice = new WERD_CHOICE(*source.ep_choice);
235  } else {
236  ep_choice = nullptr;
237  }
238  reject_map = source.reject_map;
239  combination = source.combination;
240  part_of_combo = source.part_of_combo;
241  CopySimpleFields(source);
242  if (source.blamer_bundle != nullptr) {
243  blamer_bundle = new BlamerBundle(*(source.blamer_bundle));

◆ PiecesAllNatural()

bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1074 of file pageres.cpp.

1077  {
1078  // all seams must have no splits.
1079  for (int index = start; index < start + count - 1; ++index) {
1080  if (index >= 0 && index < seam_array.size()) {
1081  SEAM* seam = seam_array[index];
1082  if (seam != nullptr && seam->HasAnySplits()) return false;
1083  }

◆ PrintBestChoices()

void WERD_RES::PrintBestChoices ( ) const

Definition at line 713 of file pageres.cpp.

716  {
717  STRING alternates_str;
718  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
719  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
720  if (!it.at_first()) alternates_str += "\", \"";
721  alternates_str += it.data()->unichar_string();
722  }

◆ RawUTF8()

const char* WERD_RES::RawUTF8 ( int  blob_index) const
inline

Definition at line 370 of file pageres.h.

375  {
376  if (blob_index < 0 || blob_index >= raw_choice->length())
377  return nullptr;

◆ RebuildBestState()

void WERD_RES::RebuildBestState ( )

Definition at line 804 of file pageres.cpp.

807  {
808  ASSERT_HOST(best_choice != nullptr);
809  delete rebuild_word;
810  rebuild_word = new TWERD;
811  if (seam_array.empty())
813  best_state.truncate(0);
814  int start = 0;
815  for (int i = 0; i < best_choice->length(); ++i) {
816  int length = best_choice->state(i);
817  best_state.push_back(length);
818  if (length > 1) {
820  start + length - 1);
821  }
822  TBLOB* blob = chopped_word->blobs[start];
823  rebuild_word->blobs.push_back(new TBLOB(*blob));
824  if (length > 1) {
826  start + length - 1);
827  }

◆ ReplaceBestChoice()

void WERD_RES::ReplaceBestChoice ( WERD_CHOICE choice)

Definition at line 791 of file pageres.cpp.

794  {
795  best_choice = choice;
797  SetupBoxWord();
798  // Make up a fake reject map of the right length to keep the
799  // rejection pass happy.

◆ SetAllScriptPositions()

void WERD_RES::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 861 of file pageres.cpp.

864  {
866  WERD_CHOICE_IT wc_it(&best_choices);

◆ SetScriptPositions()

void WERD_RES::SetScriptPositions ( )

Definition at line 854 of file pageres.cpp.

◆ SetupBasicsFromChoppedWord()

void WERD_RES::SetupBasicsFromChoppedWord ( const UNICHARSET unicharset_in)

◆ SetupBlamerBundle()

void WERD_RES::SetupBlamerBundle ( )

Definition at line 389 of file pageres.cpp.

392  {
393  if (blamer_bundle != nullptr) {

◆ SetupBlobWidthsAndGaps()

void WERD_RES::SetupBlobWidthsAndGaps ( )

Definition at line 396 of file pageres.cpp.

399  {
401  blob_gaps.truncate(0);
402  int num_blobs = chopped_word->NumBlobs();
403  for (int b = 0; b < num_blobs; ++b) {
404  TBLOB *blob = chopped_word->blobs[b];
405  TBOX box = blob->bounding_box();
406  blob_widths.push_back(box.width());
407  if (b + 1 < num_blobs) {
409  chopped_word->blobs[b + 1]->bounding_box().left() - box.right());

◆ SetupBoxWord()

void WERD_RES::SetupBoxWord ( )

Definition at line 845 of file pageres.cpp.

848  {
849  delete box_word;

◆ SetupFake()

void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 348 of file pageres.cpp.

351  {
352  ClearResults();
353  SetupWordScript(unicharset_in);
354  chopped_word = new TWERD;
355  rebuild_word = new TWERD;
358  int blob_count = word->cblob_list()->length();
359  if (blob_count > 0) {
360  auto** fake_choices = new BLOB_CHOICE*[blob_count];
361  // For non-text blocks, just pass any blobs through to the box_word
362  // and call the word failed with a fake classification.
363  C_BLOB_IT b_it(word->cblob_list());
364  int blob_id = 0;
365  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
366  TBOX box = b_it.data()->bounding_box();
367  box_word->InsertBox(box_word->length(), box);
368  fake_choices[blob_id++] = new BLOB_CHOICE;
369  }
370  FakeClassifyWord(blob_count, fake_choices);
371  delete [] fake_choices;
372  } else {
373  auto* word = new WERD_CHOICE(&unicharset_in);
374  word->make_bad();
376  // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
377  LogNewCookedChoice(1, false, word);
378  }

◆ SetupForRecognition()

bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 298 of file pageres.cpp.

308  {
309  auto norm_mode_hint =
310  static_cast<tesseract::OcrEngineMode>(norm_mode);
311  tesseract = tess;
312  POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
313  if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
314  word->cblob_list()->empty()) ||
315  (pb != nullptr && !pb->IsText())) {
316  // Empty words occur when all the blobs have been moved to the rej_blobs
317  // list, which seems to occur frequently in junk.
318  SetupFake(unicharset_in);
319  word->set_flag(W_REP_CHAR, false);
320  return false;
321  }
322  ClearResults();
323  SetupWordScript(unicharset_in);
324  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
325  float word_xheight = use_body_size && row != nullptr && row->body_size() > 0.0f
326  ? row->body_size() : x_height;
327  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
328  word_xheight, baseline_shift, numeric_mode,
329  norm_mode_hint, norm_box, &denorm);
330  blob_row = row;
331  SetupBasicsFromChoppedWord(unicharset_in);
333  int num_blobs = chopped_word->NumBlobs();
334  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);

◆ SetupWordScript()

void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 380 of file pageres.cpp.

383  {
384  uch_set = &uch;
385  int script = uch.default_sid();
386  word->set_script_id(script);

◆ StatesAllValid()

bool WERD_RES::StatesAllValid ( )

Definition at line 454 of file pageres.cpp.

457  {
458  int ratings_dim = ratings->dimension();
459  if (raw_choice->TotalOfStates() != ratings_dim) {
460  tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
461  raw_choice->TotalOfStates(), ratings_dim);
462  return false;
463  }
464  WERD_CHOICE_IT it(&best_choices);
465  int index = 0;
466  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
467  WERD_CHOICE* choice = it.data();
468  if (choice->TotalOfStates() != ratings_dim) {
469  tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
470  index, choice->TotalOfStates(), ratings_dim);
471  return false;
472  }

◆ SymbolDirection()

UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const
inline

Definition at line 379 of file pageres.h.

384  {
385  if (best_choice == nullptr ||

◆ UnicharsInReadingOrder()

bool WERD_RES::UnicharsInReadingOrder ( ) const
inline

Definition at line 421 of file pageres.h.

426  {

Member Data Documentation

◆ baseline_shift

float WERD_RES::baseline_shift = 0.0f

Definition at line 312 of file pageres.h.

◆ best_choice

WERD_CHOICE* WERD_RES::best_choice = nullptr

Definition at line 235 of file pageres.h.

◆ best_choices

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 243 of file pageres.h.

◆ best_state

GenericVector<int> WERD_RES::best_state

Definition at line 279 of file pageres.h.

◆ blamer_bundle

BlamerBundle* WERD_RES::blamer_bundle = nullptr

Definition at line 246 of file pageres.h.

◆ bln_boxes

tesseract::BoxWord* WERD_RES::bln_boxes = nullptr

Definition at line 189 of file pageres.h.

◆ blob_gaps

GenericVector<int> WERD_RES::blob_gaps

Definition at line 213 of file pageres.h.

◆ blob_row

ROW* WERD_RES::blob_row = nullptr

Definition at line 191 of file pageres.h.

◆ blob_widths

GenericVector<int> WERD_RES::blob_widths

Definition at line 210 of file pageres.h.

◆ box_word

tesseract::BoxWord* WERD_RES::box_word = nullptr

Definition at line 266 of file pageres.h.

◆ caps_height

float WERD_RES::caps_height = 0.0f

Definition at line 311 of file pageres.h.

◆ chopped_word

TWERD* WERD_RES::chopped_word = nullptr

Definition at line 206 of file pageres.h.

◆ combination

bool WERD_RES::combination = false

Definition at line 333 of file pageres.h.

◆ correct_text

GenericVector<STRING> WERD_RES::correct_text

Definition at line 283 of file pageres.h.

◆ CTC_symbol_choices

std::vector<std::vector<std::pair<const char*, float> > > WERD_RES::CTC_symbol_choices

Definition at line 220 of file pageres.h.

◆ denorm

DENORM WERD_RES::denorm

Definition at line 195 of file pageres.h.

◆ done

bool WERD_RES::done = false

Definition at line 299 of file pageres.h.

◆ end

int WERD_RES::end = 0

Definition at line 224 of file pageres.h.

◆ ep_choice

WERD_CHOICE* WERD_RES::ep_choice = nullptr

Definition at line 287 of file pageres.h.

◆ fontinfo

const FontInfo* WERD_RES::fontinfo = nullptr

Definition at line 303 of file pageres.h.

◆ fontinfo2

const FontInfo* WERD_RES::fontinfo2 = nullptr

Definition at line 304 of file pageres.h.

◆ fontinfo_id2_count

int8_t WERD_RES::fontinfo_id2_count = 0

Definition at line 306 of file pageres.h.

◆ fontinfo_id_count

int8_t WERD_RES::fontinfo_id_count = 0

Definition at line 305 of file pageres.h.

◆ guessed_caps_ht

bool WERD_RES::guessed_caps_ht = true

Definition at line 308 of file pageres.h.

◆ guessed_x_ht

bool WERD_RES::guessed_x_ht = true

Definition at line 307 of file pageres.h.

◆ leading_space

bool WERD_RES::leading_space = false

Definition at line 222 of file pageres.h.

◆ odd_size

bool WERD_RES::odd_size = false

Definition at line 301 of file pageres.h.

◆ part_of_combo

bool WERD_RES::part_of_combo = false

Definition at line 334 of file pageres.h.

◆ ratings

MATRIX* WERD_RES::ratings = nullptr

Definition at line 231 of file pageres.h.

◆ raw_choice

WERD_CHOICE* WERD_RES::raw_choice = nullptr

Definition at line 240 of file pageres.h.

◆ rebuild_word

TWERD* WERD_RES::rebuild_word = nullptr

Definition at line 260 of file pageres.h.

◆ reject_map

REJMAP WERD_RES::reject_map

Definition at line 288 of file pageres.h.

◆ reject_spaces

bool WERD_RES::reject_spaces = false

Definition at line 335 of file pageres.h.

◆ seam_array

GenericVector<SEAM*> WERD_RES::seam_array

Definition at line 208 of file pageres.h.

◆ segmented_timesteps

std::vector<std::vector<std::vector< std::pair<const char*, float> > > > WERD_RES::segmented_timesteps

Definition at line 218 of file pageres.h.

◆ small_caps

bool WERD_RES::small_caps = false

Definition at line 300 of file pageres.h.

◆ space_certainty

float WERD_RES::space_certainty = 0.0f

Definition at line 315 of file pageres.h.

◆ tess_accepted

bool WERD_RES::tess_accepted = false

Definition at line 297 of file pageres.h.

◆ tess_failed

bool WERD_RES::tess_failed = false

Definition at line 289 of file pageres.h.

◆ tess_would_adapt

bool WERD_RES::tess_would_adapt = false

Definition at line 298 of file pageres.h.

◆ tesseract

tesseract::Tesseract* WERD_RES::tesseract = nullptr

Definition at line 274 of file pageres.h.

◆ timesteps

std::vector<std::vector<std::pair<const char*, float> > > WERD_RES::timesteps

Definition at line 215 of file pageres.h.

◆ uch_set

const UNICHARSET* WERD_RES::uch_set = nullptr

Definition at line 197 of file pageres.h.

◆ unlv_crunch_mode

CRUNCH_MODE WERD_RES::unlv_crunch_mode = CR_NONE

Definition at line 309 of file pageres.h.

◆ word

WERD* WERD_RES::word = nullptr

Definition at line 180 of file pageres.h.

◆ x_height

float WERD_RES::x_height = 0.0f

Definition at line 310 of file pageres.h.


The documentation for this class was generated from the following files:
WERD_RES::done
bool done
Definition: pageres.h:299
UNICHARSET::get_direction
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:680
WERD_RES::SetupBoxWord
void SetupBoxWord()
Definition: pageres.cpp:845
WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:529
GenericVector::delete_data_pointers
void delete_data_pointers()
Definition: genericvector.h:872
WERD_RES::FakeWordFromRatings
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:894
TWERD::MergeBlobs
void MergeBlobs(int start, int end)
Definition: blobs.cpp:870
MATRIX::ConsumeAndMakeBigger
MATRIX * ConsumeAndMakeBigger(int ind)
Definition: matrix.cpp:56
WERD_RES::box_word
tesseract::BoxWord * box_word
Definition: pageres.h:266
WERD_RES::blob_widths
GenericVector< int > blob_widths
Definition: pageres.h:210
STRING::add_str_int
void add_str_int(const char *str, int number)
Definition: strngs.cpp:370
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:116
WERD::set_script_id
void set_script_id(int id)
Definition: werd.h:103
W_REP_CHAR
repeated character
Definition: werd.h:52
WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:303
POLY_BLOCK::IsText
bool IsText() const
Definition: polyblk.h:62
WERD_RES::rebuild_word
TWERD * rebuild_word
Definition: pageres.h:260
WERD_CHOICE
Definition: ratngs.h:261
REJMAP::initialise
void initialise(int16_t length)
Definition: rejctmap.cpp:272
tesseract::BoxWord::CopyFromNormalized
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:56
TWERD
Definition: blobs.h:416
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
WERD_RES::LogNewRawChoice
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:600
WERD_RES::bln_boxes
tesseract::BoxWord * bln_boxes
Definition: pageres.h:189
GenericVector::insert
void insert(const T &t, int index)
Definition: genericvector.h:750
WERD_RES::odd_size
bool odd_size
Definition: pageres.h:301
WERD_RES::denorm
DENORM denorm
Definition: pageres.h:195
WERD_RES::GetBlobChoices
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:755
BLOB_CHOICE::certainty
float certainty() const
Definition: ratngs.h:81
WERD_CHOICE::TotalOfStates
int TotalOfStates() const
Definition: ratngs.cpp:713
WERD_RES::BothSpaces
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1054
WERD_RES::ConditionalBlobMerge
bool ConditionalBlobMerge(std::function< UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> class_cb, std::function< bool(const TBOX &, const TBOX &)> box_cb)
Definition: pageres.cpp:935
FindMatchingChoice
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:182
WERD_RES::unlv_crunch_mode
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:309
MATRIX_COORD::Valid
bool Valid(const MATRIX &m) const
Definition: matrix.h:614
MATRIX
Definition: matrix.h:574
WERD_CHOICE::certainty
float certainty() const
Definition: ratngs.h:318
WERD_RES::combination
bool combination
Definition: pageres.h:333
STRING
Definition: strngs.h:45
WERD_RES::x_height
float x_height
Definition: pageres.h:310
WERD_RES::fontinfo
const FontInfo * fontinfo
Definition: pageres.h:303
tesseract::OEM_LSTM_ONLY
Definition: publictypes.h:267
MATRIX::IncreaseBandSize
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:47
UNICHARSET::U_LEFT_TO_RIGHT
Definition: unicharset.h:157
WERD_RES::ratings
MATRIX * ratings
Definition: pageres.h:231
WERD_RES::best_state
GenericVector< int > best_state
Definition: pageres.h:279
WERD_CHOICE::kBadRating
static const float kBadRating
Definition: ratngs.h:263
WERD_CHOICE::state
int state(int index) const
Definition: ratngs.h:307
BLOB_CHOICE::unichar_id
UNICHAR_ID unichar_id() const
Definition: ratngs.h:75
TWERD::ComputeBoundingBoxes
void ComputeBoundingBoxes()
Definition: blobs.cpp:853
WERD_RES::SetupBasicsFromChoppedWord
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:339
WERD_RES::fontinfo_id_count
int8_t fontinfo_id_count
Definition: pageres.h:305
SEAM
Definition: seam.h:36
GenericVector::move
void move(GenericVector< T > *from)
Definition: genericvector.h:1087
WERD_RES::tess_failed
bool tess_failed
Definition: pageres.h:289
WERD_RES::uch_set
const UNICHARSET * uch_set
Definition: pageres.h:197
WERD_RES::ep_choice
WERD_CHOICE * ep_choice
Definition: pageres.h:287
WERD::cblob_list
C_BLOB_LIST * cblob_list()
Definition: werd.h:94
GENERIC_2D_ARRAY::delete_matrix_pointers
void delete_matrix_pointers()
Definition: matrix.h:454
WERD_RES::blamer_bundle
BlamerBundle * blamer_bundle
Definition: pageres.h:246
WERD_CHOICE::string_and_lengths
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:451
REJMAP::remove_pos
void remove_pos(int16_t pos)
Definition: rejctmap.cpp:308
kWordrecMaxNumJoinChunks
const int kWordrecMaxNumJoinChunks
Definition: pageres.cpp:52
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
REJMAP::length
int32_t length() const
Definition: rejctmap.h:222
WERD_CHOICE::set_unichar_id
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:347
SEAM::HasAnySplits
bool HasAnySplits() const
Definition: seam.h:59
tesseract::BoxWord::BlobBox
const TBOX & BlobBox(int index) const
Definition: boxword.h:83
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
WERD_RES::LogNewCookedChoice
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:616
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
UNICHARSET::get_enabled
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:868
SEAM::PrepareToInsertSeam
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:74
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
WERD_CHOICE::MatrixCoord
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:304
WERD_RES::fontinfo2
const FontInfo * fontinfo2
Definition: pageres.h:304
WERD_RES::Clear
void Clear()
Definition: pageres.cpp:1090
WERD::set_flag
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:117
WERD_RES::ClearResults
void ClearResults()
Definition: pageres.cpp:1100
WERD_RES::baseline_shift
float baseline_shift
Definition: pageres.h:312
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
UNICHAR_SPACE
Definition: unicharset.h:34
GENERIC_2D_ARRAY::get
T get(ICOORD pos) const
Definition: matrix.h:227
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:457
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
WERD_CHOICE::adjust_factor
float adjust_factor() const
Definition: ratngs.h:294
WERD_CHOICE::UpdateStateForSplit
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:701
TBOX::width
int16_t width() const
Definition: rect.h:114
WERD_RES::tess_accepted
bool tess_accepted
Definition: pageres.h:297
TWERD::PolygonalCopy
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
Definition: blobs.cpp:774
start_seam_list
void start_seam_list(TWERD *word, GenericVector< SEAM * > *seam_array)
Definition: seam.cpp:261
WERD_RES::raw_choice
WERD_CHOICE * raw_choice
Definition: pageres.h:240
SEAM::BreakPieces
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:186
WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:206
WERD_RES::reject_map
REJMAP reject_map
Definition: pageres.h:288
TWERD::BLNormalize
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
Definition: blobs.cpp:788
tesseract
Definition: baseapi.h:65
WERD_RES::CopySimpleFields
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:247
ELIST_LINK::operator=
void operator=(const ELIST_LINK &)
Definition: elst.h:134
WERD_RES::best_choices
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
TOP_CHOICE_PERM
Definition: ratngs.h:233
WERD_RES::seam_array
GenericVector< SEAM * > seam_array
Definition: pageres.h:208
BLOB_CHOICE::rating
float rating() const
Definition: ratngs.h:78
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
GenericVector::reserve
void reserve(int size)
Definition: genericvector.h:679
WERD_RES::caps_height
float caps_height
Definition: pageres.h:311
WERD_RES::tess_would_adapt
bool tess_would_adapt
Definition: pageres.h:298
tesseract::BoxWord::InsertBox
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
W_INVERSE
white on black
Definition: werd.h:55
WERD_RES::SetupWordScript
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:380
WERD_RES::RebuildBestState
void RebuildBestState()
Definition: pageres.cpp:804
WERD_CHOICE::print
void print() const
Definition: ratngs.h:568
WERD_RES::SetupFake
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:348
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
count
int count(LIST var_list)
Definition: oldlist.cpp:79
BLOB_CHOICE
Definition: ratngs.h:49
tesseract::BoxWord::length
int length() const
Definition: boxword.h:82
MATRIX_COORD
Definition: matrix.h:604
TBLOB
Definition: blobs.h:282
WERD_RES::blob_row
ROW * blob_row
Definition: pageres.h:191
WERD_CHOICE::SetAllScriptPositions
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:625
WERD_RES::SetupBlamerBundle
void SetupBlamerBundle()
Definition: pageres.cpp:389
WERD
Definition: werd.h:55
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:132
UNICHARSET::Direction
Direction
Definition: unicharset.h:156
UNICHARSET::contains_unichar
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670
MATRIX_COORD::col
int col
Definition: matrix.h:632
WERD_RES::FakeClassifyWord
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:873
GenericVector::clear
void clear()
Definition: genericvector.h:857
WERD_RES::fontinfo_id2_count
int8_t fontinfo_id2_count
Definition: pageres.h:306
TBOX::right
int16_t right() const
Definition: rect.h:78
BlamerBundle::CopyResults
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:211
WERD_RES::correct_text
GenericVector< STRING > correct_text
Definition: pageres.h:283
GENERIC_2D_ARRAY::put
void put(ICOORD pos, const T &thing)
Definition: matrix.h:219
WERD_RES::WERD_RES
WERD_RES()=default
WERD_RES::part_of_combo
bool part_of_combo
Definition: pageres.h:334
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
WERD_RES::blob_gaps
GenericVector< int > blob_gaps
Definition: pageres.h:213
POLY_BLOCK
Definition: polyblk.h:26
WERD_RES::ClearWordChoices
void ClearWordChoices()
Definition: pageres.cpp:1125
WERD_RES::reject_spaces
bool reject_spaces
Definition: pageres.h:335
MATRIX_COORD::row
int row
Definition: matrix.h:633
WERD_RES::word
WERD * word
Definition: pageres.h:180
WERD_RES::guessed_caps_ht
bool guessed_caps_ht
Definition: pageres.h:308
WERD_CHOICE::rating
float rating() const
Definition: ratngs.h:315
SEAM::JoinPieces
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:208
WERD_RES::MergeAdjacentBlobs
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:969
WERD_CHOICE::remove_unichar_id
void remove_unichar_id(int index)
Definition: ratngs.h:472
UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
tesseract::BoxWord::MergeBoxes
void MergeBoxes(int start, int end)
Definition: boxword.cpp:131
BlamerBundle
Definition: blamer.h:103
GenericVector::size
int size() const
Definition: genericvector.h:71
WERD_RES::guessed_x_ht
bool guessed_x_ht
Definition: pageres.h:307
WERD_CHOICE::set_permuter
void set_permuter(uint8_t perm)
Definition: ratngs.h:363
BLOB_CHOICE::set_unichar_id
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:139
ROW::body_size
float body_size() const
Definition: ocrrow.h:72
UNICHARSET::size
int size() const
Definition: unicharset.h:341
WERD_RES::small_caps
bool small_caps
Definition: pageres.h:300
TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:446
UNICHARSET::default_sid
int default_sid() const
Definition: unicharset.h:884
UNICHARSET::U_RIGHT_TO_LEFT
Definition: unicharset.h:158
WERD_RES::ClearRatings
void ClearRatings()
Definition: pageres.cpp:1133
BandTriMatrix::dimension
int dimension() const
Definition: matrix.h:532
TBOX
Definition: rect.h:33
tesseract::BoxWord
Definition: boxword.h:36