tesseract  4.0.0-1-g2a2b
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

Public Member Functions

 WERD_RES ()
 
 WERD_RES (WERD *the_word)
 
 WERD_RES (const WERD_RES &source)
 
 ~WERD_RES ()
 
const char * BestUTF8 (int blob_index, bool in_rtl_context) const
 
const char * RawUTF8 (int blob_index) const
 
UNICHARSET::Direction SymbolDirection (int blob_index) const
 
bool AnyRtlCharsInWord () const
 
bool AnyLtrCharsInWord () const
 
bool UnicharsInReadingOrder () const
 
void InitNonPointers ()
 
void InitPointers ()
 
void Clear ()
 
void ClearResults ()
 
void ClearWordChoices ()
 
void ClearRatings ()
 
WERD_RESoperator= (const WERD_RES &source)
 
void CopySimpleFields (const WERD_RES &source)
 
void InitForRetryRecognition (const WERD_RES &source)
 
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
 
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
 
void SetupFake (const UNICHARSET &uch)
 
void SetupWordScript (const UNICHARSET &unicharset_in)
 
void SetupBlamerBundle ()
 
void SetupBlobWidthsAndGaps ()
 
void InsertSeam (int blob_number, SEAM *seam)
 
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
 
bool IsAmbiguous ()
 
bool StatesAllValid ()
 
void DebugWordChoices (bool debug, const char *word_to_debug)
 
void DebugTopChoice (const char *msg) const
 
void FilterWordChoices (int debug_level)
 
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
 
bool LogNewRawChoice (WERD_CHOICE *word_choice)
 
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
 
void PrintBestChoices () const
 
int GetBlobsWidth (int start_blob, int last_blob)
 
int GetBlobsGap (int blob_index)
 
BLOB_CHOICEGetBlobChoice (int index) const
 
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
 
void ConsumeWordResults (WERD_RES *word)
 
void ReplaceBestChoice (WERD_CHOICE *choice)
 
void RebuildBestState ()
 
void CloneChoppedToRebuild ()
 
void SetupBoxWord ()
 
void SetScriptPositions ()
 
void SetAllScriptPositions (tesseract::ScriptPos position)
 
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
 
void FakeWordFromRatings (PermuterType permuter)
 
void BestChoiceToCorrectText ()
 
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
 
void MergeAdjacentBlobs (int index)
 
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
 
void fix_quotes ()
 
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
 
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
 
void fix_hyphens ()
 
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
 
void merge_tess_fails ()
 
void copy_on (WERD_RES *word_res)
 
bool PiecesAllNatural (int start, int count) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)
 

Public Attributes

WERDword
 
tesseract::BoxWordbln_boxes
 
ROWblob_row
 
DENORM denorm
 
const UNICHARSETuch_set
 
TWERDchopped_word
 
GenericVector< SEAM * > seam_array
 
GenericVector< int > blob_widths
 
GenericVector< int > blob_gaps
 
std::vector< std::vector< std::pair< const char *, float > > > timesteps
 
MATRIXratings
 
WERD_CHOICEbest_choice
 
WERD_CHOICEraw_choice
 
WERD_CHOICE_LIST best_choices
 
BlamerBundleblamer_bundle
 
TWERDrebuild_word
 
tesseract::BoxWordbox_word
 
GenericVector< int > best_state
 
GenericVector< STRINGcorrect_text
 
tesseract::Tesseracttesseract
 
WERD_CHOICEep_choice
 
REJMAP reject_map
 
bool tess_failed
 
bool tess_accepted
 
bool tess_would_adapt
 
bool done
 
bool small_caps
 
bool odd_size
 
int8_t italic
 
int8_t bold
 
const FontInfofontinfo
 
const FontInfofontinfo2
 
int8_t fontinfo_id_count
 
int8_t fontinfo_id2_count
 
bool guessed_x_ht
 
bool guessed_caps_ht
 
CRUNCH_MODE unlv_crunch_mode
 
float x_height
 
float caps_height
 
float baseline_shift
 
float space_certainty
 
bool combination
 
bool part_of_combo
 
bool reject_spaces
 

Detailed Description

Definition at line 169 of file pageres.h.

Constructor & Destructor Documentation

◆ WERD_RES() [1/3]

WERD_RES::WERD_RES ( )
inline

Definition at line 338 of file pageres.h.

338  {
339  InitNonPointers();
340  InitPointers();
341  }
void InitNonPointers()
Definition: pageres.cpp:1100
void InitPointers()
Definition: pageres.cpp:1128

◆ WERD_RES() [2/3]

WERD_RES::WERD_RES ( WERD the_word)
inline

Definition at line 342 of file pageres.h.

342  {
343  InitNonPointers();
344  InitPointers();
345  word = the_word;
346  }
void InitNonPointers()
Definition: pageres.cpp:1100
void InitPointers()
Definition: pageres.cpp:1128
WERD * word
Definition: pageres.h:189

◆ WERD_RES() [3/3]

WERD_RES::WERD_RES ( const WERD_RES source)
inline

Definition at line 349 of file pageres.h.

349  : ELIST_LINK(source) {
350  InitPointers();
351  *this = source; // see operator=
352  }
void InitPointers()
Definition: pageres.cpp:1128
ELIST_LINK()
Definition: elst.h:92

◆ ~WERD_RES()

WERD_RES::~WERD_RES ( )

Definition at line 1096 of file pageres.cpp.

1096  {
1097  Clear();
1098 }
void Clear()
Definition: pageres.cpp:1143

Member Function Documentation

◆ AlternativeChoiceAdjustmentsWorseThan()

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 445 of file pageres.cpp.

445  {
446  // The choices are not changed by this iteration.
447  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
448  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
449  WERD_CHOICE* choice = wc_it.data();
450  if (choice->adjust_factor() <= threshold)
451  return false;
452  }
453  return true;
454 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
float adjust_factor() const
Definition: ratngs.h:306

◆ AnyLtrCharsInWord()

bool WERD_RES::AnyLtrCharsInWord ( ) const
inline

Definition at line 408 of file pageres.h.

408  {
409  if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
410  return false;
411  for (int id = 0; id < best_choice->length(); id++) {
412  int unichar_id = best_choice->unichar_id(id);
413  if (unichar_id < 0 || unichar_id >= uch_set->size())
414  continue; // Ignore illegal chars.
415  UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
416  if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
417  return true;
418  }
419  return false;
420  }
int size() const
Definition: unicharset.h:336
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:685
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
const UNICHARSET * uch_set
Definition: pageres.h:206
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ AnyRtlCharsInWord()

bool WERD_RES::AnyRtlCharsInWord ( ) const
inline

Definition at line 391 of file pageres.h.

391  {
392  if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
393  return false;
394  for (int id = 0; id < best_choice->length(); id++) {
395  int unichar_id = best_choice->unichar_id(id);
396  if (unichar_id < 0 || unichar_id >= uch_set->size())
397  continue; // Ignore illegal chars.
399  uch_set->get_direction(unichar_id);
400  if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
403  return true;
404  }
405  return false;
406  }
int size() const
Definition: unicharset.h:336
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:685
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
const UNICHARSET * uch_set
Definition: pageres.h:206
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ BestChoiceToCorrectText()

void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 929 of file pageres.cpp.

929  {
931  ASSERT_HOST(best_choice != nullptr);
932  for (int i = 0; i < best_choice->length(); ++i) {
933  UNICHAR_ID choice_id = best_choice->unichar_id(i);
934  const char* blob_choice = uch_set->id_to_unichar(choice_id);
935  correct_text.push_back(STRING(blob_choice));
936  }
937 }
int UNICHAR_ID
Definition: unichar.h:35
GenericVector< STRING > correct_text
Definition: pageres.h:275
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
int push_back(T object)
Definition: strngs.h:45
const UNICHARSET * uch_set
Definition: pageres.h:206
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
WERD_CHOICE * best_choice
Definition: pageres.h:235
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ BestUTF8()

const char* WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const
inline

Definition at line 361 of file pageres.h.

361  {
362  if (blob_index < 0 || best_choice == nullptr ||
363  blob_index >= best_choice->length())
364  return nullptr;
365  UNICHAR_ID id = best_choice->unichar_id(blob_index);
366  if (id < 0 || id >= uch_set->size())
367  return nullptr;
368  UNICHAR_ID mirrored = uch_set->get_mirror(id);
369  if (in_rtl_context && mirrored > 0)
370  id = mirrored;
371  return uch_set->id_to_unichar_ext(id);
372  }
int UNICHAR_ID
Definition: unichar.h:35
int size() const
Definition: unicharset.h:336
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:298
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:692
const UNICHARSET * uch_set
Definition: pageres.h:206
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ BothHyphens()

UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1036 of file pageres.cpp.

1036  {
1037  const char *ch = uch_set->id_to_unichar(id1);
1038  const char *next_ch = uch_set->id_to_unichar(id2);
1039  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1040  (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
1041  return uch_set->unichar_to_id("-");
1042  return INVALID_UNICHAR_ID;
1043 }
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
const UNICHARSET * uch_set
Definition: pageres.h:206
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290

◆ BothQuotes()

UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1014 of file pageres.cpp.

1014  {
1015  const char *ch = uch_set->id_to_unichar(id1);
1016  const char *next_ch = uch_set->id_to_unichar(id2);
1017  if (is_simple_quote(ch, strlen(ch)) &&
1018  is_simple_quote(next_ch, strlen(next_ch)))
1019  return uch_set->unichar_to_id("\"");
1020  return INVALID_UNICHAR_ID;
1021 }
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
const UNICHARSET * uch_set
Definition: pageres.h:206
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290

◆ BothSpaces()

UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1065 of file pageres.cpp.

1065  {
1066  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
1067  return id1;
1068  else
1069  return INVALID_UNICHAR_ID;
1070 }
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
const UNICHARSET * uch_set
Definition: pageres.h:206

◆ Clear()

void WERD_RES::Clear ( )

Definition at line 1143 of file pageres.cpp.

1143  {
1144  if (combination) {
1145  delete word;
1146  }
1147  word = nullptr;
1148  delete blamer_bundle;
1149  blamer_bundle = nullptr;
1150  ClearResults();
1151 }
bool combination
Definition: pageres.h:334
void ClearResults()
Definition: pageres.cpp:1153
BlamerBundle * blamer_bundle
Definition: pageres.h:246
WERD * word
Definition: pageres.h:189

◆ ClearRatings()

void WERD_RES::ClearRatings ( )

Definition at line 1186 of file pageres.cpp.

1186  {
1187  if (ratings != nullptr) {
1189  delete ratings;
1190  ratings = nullptr;
1191  }
1192 }
void delete_matrix_pointers()
Definition: matrix.h:455
MATRIX * ratings
Definition: pageres.h:231

◆ ClearResults()

void WERD_RES::ClearResults ( )

Definition at line 1153 of file pageres.cpp.

1153  {
1154  done = false;
1155  fontinfo = nullptr;
1156  fontinfo2 = nullptr;
1157  fontinfo_id_count = 0;
1158  fontinfo_id2_count = 0;
1159  delete bln_boxes;
1160  bln_boxes = nullptr;
1161  blob_row = nullptr;
1162  delete chopped_word;
1163  chopped_word = nullptr;
1164  delete rebuild_word;
1165  rebuild_word = nullptr;
1166  delete box_word;
1167  box_word = nullptr;
1168  best_state.clear();
1169  correct_text.clear();
1171  seam_array.clear();
1172  blob_widths.clear();
1173  blob_gaps.clear();
1174  ClearRatings();
1175  ClearWordChoices();
1176  if (blamer_bundle != nullptr) blamer_bundle->ClearResults();
1177 }
TWERD * rebuild_word
Definition: pageres.h:260
tesseract::BoxWord * bln_boxes
Definition: pageres.h:198
GenericVector< int > blob_widths
Definition: pageres.h:219
const FontInfo * fontinfo
Definition: pageres.h:304
int8_t fontinfo_id2_count
Definition: pageres.h:307
GenericVector< STRING > correct_text
Definition: pageres.h:275
void ClearResults()
Definition: blamer.h:185
GenericVector< int > best_state
Definition: pageres.h:271
int8_t fontinfo_id_count
Definition: pageres.h:306
GenericVector< SEAM * > seam_array
Definition: pageres.h:217
GenericVector< int > blob_gaps
Definition: pageres.h:222
bool done
Definition: pageres.h:298
void delete_data_pointers()
void ClearRatings()
Definition: pageres.cpp:1186
BlamerBundle * blamer_bundle
Definition: pageres.h:246
void ClearWordChoices()
Definition: pageres.cpp:1178
ROW * blob_row
Definition: pageres.h:200
TWERD * chopped_word
Definition: pageres.h:215
tesseract::BoxWord * box_word
Definition: pageres.h:266
const FontInfo * fontinfo2
Definition: pageres.h:305

◆ ClearWordChoices()

void WERD_RES::ClearWordChoices ( )

Definition at line 1178 of file pageres.cpp.

1178  {
1179  best_choice = nullptr;
1180  delete raw_choice;
1181  raw_choice = nullptr;
1182  best_choices.clear();
1183  delete ep_choice;
1184  ep_choice = nullptr;
1185 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
WERD_CHOICE * ep_choice
Definition: pageres.h:286
WERD_CHOICE * raw_choice
Definition: pageres.h:240
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ CloneChoppedToRebuild()

void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 841 of file pageres.cpp.

841  {
842  delete rebuild_word;
844  SetupBoxWord();
845  int word_len = box_word->length();
846  best_state.reserve(word_len);
847  correct_text.reserve(word_len);
848  for (int i = 0; i < word_len; ++i) {
851  }
852 }
TWERD * rebuild_word
Definition: pageres.h:260
Definition: blobs.h:402
void reserve(int size)
GenericVector< STRING > correct_text
Definition: pageres.h:275
void SetupBoxWord()
Definition: pageres.cpp:855
GenericVector< int > best_state
Definition: pageres.h:271
int push_back(T object)
Definition: strngs.h:45
int length() const
Definition: boxword.h:83
TWERD * chopped_word
Definition: pageres.h:215
tesseract::BoxWord * box_word
Definition: pageres.h:266

◆ ComputeAdaptionThresholds()

void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 567 of file pageres.cpp.

571  {
572  int chunk = 0;
573  int end_chunk = best_choice->state(0);
574  int end_raw_chunk = raw_choice->state(0);
575  int raw_blob = 0;
576  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
577  float avg_rating = 0.0f;
578  int num_error_chunks = 0;
579 
580  // For each chunk in best choice blob i, count non-matching raw results.
581  while (chunk < end_chunk) {
582  if (chunk >= end_raw_chunk) {
583  ++raw_blob;
584  end_raw_chunk += raw_choice->state(raw_blob);
585  }
586  if (best_choice->unichar_id(i) !=
587  raw_choice->unichar_id(raw_blob)) {
588  avg_rating += raw_choice->certainty(raw_blob);
589  ++num_error_chunks;
590  }
591  ++chunk;
592  }
593 
594  if (num_error_chunks > 0) {
595  avg_rating /= num_error_chunks;
596  *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
597  } else {
598  *thresholds = max_rating;
599  }
600 
601  if (*thresholds > max_rating)
602  *thresholds = max_rating;
603  if (*thresholds < min_rating)
604  *thresholds = min_rating;
605  }
606 }
int state(int index) const
Definition: ratngs.h:319
float certainty() const
Definition: ratngs.h:330
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
WERD_CHOICE * raw_choice
Definition: pageres.h:240
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ ConditionalBlobMerge()

bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX &> *  box_cb 
)

Definition at line 944 of file pageres.cpp.

946  {
947  ASSERT_HOST(best_choice->length() == 0 || ratings != nullptr);
948  bool modified = false;
949  for (int i = 0; i + 1 < best_choice->length(); ++i) {
950  UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
951  best_choice->unichar_id(i+1));
952  if (new_id != INVALID_UNICHAR_ID &&
953  (box_cb == nullptr || box_cb->Run(box_word->BlobBox(i),
954  box_word->BlobBox(i + 1)))) {
955  // Raw choice should not be fixed.
956  best_choice->set_unichar_id(new_id, i);
957  modified = true;
959  const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
960  if (!coord.Valid(*ratings)) {
961  ratings->IncreaseBandSize(coord.row + 1 - coord.col);
962  }
963  BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
964  if (FindMatchingChoice(new_id, blob_choices) == nullptr) {
965  // Insert a fake result.
966  BLOB_CHOICE* blob_choice = new BLOB_CHOICE;
967  blob_choice->set_unichar_id(new_id);
968  BLOB_CHOICE_IT bc_it(blob_choices);
969  bc_it.add_before_then_move(blob_choice);
970  }
971  }
972  }
973  delete class_cb;
974  delete box_cb;
975  return modified;
976 }
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:359
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:980
int UNICHAR_ID
Definition: unichar.h:35
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:302
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:765
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
bool Valid(const MATRIX &m) const
Definition: matrix.h:615
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:54
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:145
virtual R Run(A1, A2)=0
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
MATRIX * ratings
Definition: pageres.h:231
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:180
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::BoxWord * box_word
Definition: pageres.h:266
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ConsumeWordResults()

void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 771 of file pageres.cpp.

771  {
772  denorm = word->denorm;
773  blob_row = word->blob_row;
774  MovePointerData(&chopped_word, &word->chopped_word);
775  MovePointerData(&rebuild_word, &word->rebuild_word);
776  MovePointerData(&box_word, &word->box_word);
778  seam_array = word->seam_array;
779  word->seam_array.clear();
780  best_state.move(&word->best_state);
781  correct_text.move(&word->correct_text);
782  blob_widths.move(&word->blob_widths);
783  blob_gaps.move(&word->blob_gaps);
784  if (ratings != nullptr) ratings->delete_matrix_pointers();
785  MovePointerData(&ratings, &word->ratings);
786  best_choice = word->best_choice;
787  MovePointerData(&raw_choice, &word->raw_choice);
788  best_choices.clear();
789  WERD_CHOICE_IT wc_it(&best_choices);
790  wc_it.add_list_after(&word->best_choices);
791  reject_map = word->reject_map;
792  if (word->blamer_bundle != nullptr) {
793  assert(blamer_bundle != nullptr);
794  blamer_bundle->CopyResults(*(word->blamer_bundle));
795  }
797 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
TWERD * rebuild_word
Definition: pageres.h:260
void delete_matrix_pointers()
Definition: matrix.h:455
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:206
GenericVector< int > blob_widths
Definition: pageres.h:219
void move(GenericVector< T > *from)
REJMAP reject_map
Definition: pageres.h:287
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:255
GenericVector< STRING > correct_text
Definition: pageres.h:275
DENORM denorm
Definition: pageres.h:204
GenericVector< int > best_state
Definition: pageres.h:271
GenericVector< SEAM * > seam_array
Definition: pageres.h:217
GenericVector< int > blob_gaps
Definition: pageres.h:222
MATRIX * ratings
Definition: pageres.h:231
void delete_data_pointers()
BlamerBundle * blamer_bundle
Definition: pageres.h:246
ROW * blob_row
Definition: pageres.h:200
WERD_CHOICE * raw_choice
Definition: pageres.h:240
TWERD * chopped_word
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::BoxWord * box_word
Definition: pageres.h:266
WERD * word
Definition: pageres.h:189

◆ copy_on()

void WERD_RES::copy_on ( WERD_RES word_res)
inline

Definition at line 660 of file pageres.h.

660  { //from this word
661  word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
662  word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
663  word->copy_on(word_res->word);
664  }
Definition: werd.h:35
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:127
bool flag(WERD_FLAGS mask) const
Definition: werd.h:126
Definition: werd.h:34
void copy_on(WERD *other)
Definition: werd.cpp:233
WERD * word
Definition: pageres.h:189

◆ CopySimpleFields()

void WERD_RES::CopySimpleFields ( const WERD_RES source)

Definition at line 255 of file pageres.cpp.

255  {
256  tess_failed = source.tess_failed;
257  tess_accepted = source.tess_accepted;
259  done = source.done;
261  small_caps = source.small_caps;
262  odd_size = source.odd_size;
263  italic = source.italic;
264  bold = source.bold;
265  fontinfo = source.fontinfo;
266  fontinfo2 = source.fontinfo2;
269  x_height = source.x_height;
270  caps_height = source.caps_height;
272  guessed_x_ht = source.guessed_x_ht;
274  reject_spaces = source.reject_spaces;
275  uch_set = source.uch_set;
276  tesseract = source.tesseract;
277 }
bool tess_failed
Definition: pageres.h:288
bool guessed_x_ht
Definition: pageres.h:308
bool guessed_caps_ht
Definition: pageres.h:309
int8_t italic
Definition: pageres.h:301
bool odd_size
Definition: pageres.h:300
const FontInfo * fontinfo
Definition: pageres.h:304
float baseline_shift
Definition: pageres.h:313
int8_t fontinfo_id2_count
Definition: pageres.h:307
bool small_caps
Definition: pageres.h:299
bool tess_would_adapt
Definition: pageres.h:297
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:310
int8_t bold
Definition: pageres.h:302
bool tess_accepted
Definition: pageres.h:296
int8_t fontinfo_id_count
Definition: pageres.h:306
float caps_height
Definition: pageres.h:312
bool done
Definition: pageres.h:298
bool reject_spaces
Definition: pageres.h:336
tesseract::Tesseract * tesseract
Definition: pageres.h:282
float x_height
Definition: pageres.h:311
const UNICHARSET * uch_set
Definition: pageres.h:206
const FontInfo * fontinfo2
Definition: pageres.h:305

◆ DebugTopChoice()

void WERD_RES::DebugTopChoice ( const char *  msg) const

Definition at line 505 of file pageres.cpp.

505  {
506  tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
508  if (best_choice == nullptr)
509  tprintf("<Null choice>\n");
510  else
511  best_choice->print(msg);
512 }
void print() const
Definition: ratngs.h:580
bool tess_would_adapt
Definition: pageres.h:297
bool tess_accepted
Definition: pageres.h:296
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool done
Definition: pageres.h:298
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ DebugWordChoices()

void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 486 of file pageres.cpp.

486  {
487  if (debug ||
488  (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr &&
489  best_choice->unichar_string() == STRING(word_to_debug))) {
490  if (raw_choice != nullptr)
491  raw_choice->print("\nBest Raw Choice");
492 
493  WERD_CHOICE_IT it(&best_choices);
494  int index = 0;
495  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
496  WERD_CHOICE* choice = it.data();
497  STRING label;
498  label.add_str_int("\nCooked Choice #", index);
499  choice->print(label.string());
500  }
501  }
502 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
const char * string() const
Definition: strngs.cpp:196
void print() const
Definition: ratngs.h:580
void add_str_int(const char *str, int number)
Definition: strngs.cpp:379
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE * raw_choice
Definition: pageres.h:240
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ deep_copy()

static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src)
inlinestatic

Definition at line 649 of file pageres.h.

649  {
650  WERD_RES* result = new WERD_RES(*src);
651  // That didn't copy the ratings, but we want a copy if there is one to
652  // begin with.
653  if (src->ratings != nullptr)
654  result->ratings = src->ratings->DeepCopy();
655  return result;
656  }
WERD_RES()
Definition: pageres.h:338
MATRIX * DeepCopy() const
Definition: matrix.cpp:99
MATRIX * ratings
Definition: pageres.h:231

◆ FakeClassifyWord()

void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 883 of file pageres.cpp.

883  {
884  // Setup the WERD_RES.
885  ASSERT_HOST(box_word != nullptr);
886  ASSERT_HOST(blob_count == box_word->length());
888  ClearRatings();
889  ratings = new MATRIX(blob_count, 1);
890  for (int c = 0; c < blob_count; ++c) {
891  BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
892  BLOB_CHOICE_IT choice_it(choice_list);
893  choice_it.add_after_then_move(choices[c]);
894  ratings->put(c, c, choice_list);
895  }
897  reject_map.initialise(blob_count);
898  best_state.init_to_size(blob_count, 1);
899  done = true;
900 }
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:904
REJMAP reject_map
Definition: pageres.h:287
void init_to_size(int size, const T &t)
GenericVector< int > best_state
Definition: pageres.h:271
void put(ICOORD pos, const T &thing)
Definition: matrix.h:220
bool done
Definition: pageres.h:298
MATRIX * ratings
Definition: pageres.h:231
void ClearRatings()
Definition: pageres.cpp:1186
int length() const
Definition: boxword.h:83
void ClearWordChoices()
Definition: pageres.cpp:1178
Definition: matrix.h:575
tesseract::BoxWord * box_word
Definition: pageres.h:266
void initialise(int16_t length)
Definition: rejctmap.cpp:275
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ FakeWordFromRatings()

void WERD_RES::FakeWordFromRatings ( PermuterType  permuter)

Definition at line 904 of file pageres.cpp.

904  {
905  int num_blobs = ratings->dimension();
906  WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
907  word_choice->set_permuter(permuter);
908  for (int b = 0; b < num_blobs; ++b) {
909  UNICHAR_ID unichar_id = UNICHAR_SPACE;
910  float rating = INT32_MAX;
911  float certainty = -INT32_MAX;
912  BLOB_CHOICE_LIST* choices = ratings->get(b, b);
913  if (choices != nullptr && !choices->empty()) {
914  BLOB_CHOICE_IT bc_it(choices);
915  BLOB_CHOICE* choice = bc_it.data();
916  unichar_id = choice->unichar_id();
917  rating = choice->rating();
918  certainty = choice->certainty();
919  }
920  word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
921  certainty);
922  }
923  LogNewRawChoice(word_choice);
924  // Ownership of word_choice taken by word here.
925  LogNewCookedChoice(1, false, word_choice);
926 }
float certainty() const
Definition: ratngs.h:83
int UNICHAR_ID
Definition: unichar.h:35
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:452
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:610
int dimension() const
Definition: matrix.h:533
float rating() const
Definition: ratngs.h:80
MATRIX * ratings
Definition: pageres.h:231
const UNICHARSET * uch_set
Definition: pageres.h:206
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:626
T get(ICOORD pos) const
Definition: matrix.h:228
void set_permuter(uint8_t perm)
Definition: ratngs.h:375

◆ FilterWordChoices()

void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 519 of file pageres.cpp.

519  {
520  if (best_choice == nullptr || best_choices.singleton())
521  return;
522 
523  if (debug_level >= 2)
524  best_choice->print("\nFiltering against best choice");
525  WERD_CHOICE_IT it(&best_choices);
526  int index = 0;
527  for (it.forward(); !it.at_first(); it.forward(), ++index) {
528  WERD_CHOICE* choice = it.data();
529  float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
530  choice->adjust_factor());
531  // i, j index the blob choice in choice, best_choice.
532  // chunk is an index into the chopped_word blobs (AKA chunks).
533  // Since the two words may use different segmentations of the chunks, we
534  // iterate over the chunks to find out whether a comparable blob
535  // classification is much worse than the best result.
536  int i = 0, j = 0, chunk = 0;
537  // Each iteration of the while deals with 1 chunk. On entry choice_chunk
538  // and best_chunk are the indices of the first chunk in the NEXT blob,
539  // i.e. we don't have to increment i, j while chunk < choice_chunk and
540  // best_chunk respectively.
541  int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
542  while (i < choice->length() && j < best_choice->length()) {
543  if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
544  choice->certainty(i) - best_choice->certainty(j) < threshold) {
545  if (debug_level >= 2) {
546  choice->print("WorstCertaintyDiffWorseThan");
547  tprintf(
548  "i %d j %d Choice->Blob[i].Certainty %.4g"
549  " WorstOtherChoiceCertainty %g Threshold %g\n",
550  i, j, choice->certainty(i), best_choice->certainty(j), threshold);
551  tprintf("Discarding bad choice #%d\n", index);
552  }
553  delete it.extract();
554  break;
555  }
556  ++chunk;
557  // If needed, advance choice_chunk to keep up with chunk.
558  while (choice_chunk < chunk && ++i < choice->length())
559  choice_chunk += choice->state(i);
560  // If needed, advance best_chunk to keep up with chunk.
561  while (best_chunk < chunk && ++j < best_choice->length())
562  best_chunk += best_choice->state(j);
563  }
564  }
565 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
void print() const
Definition: ratngs.h:580
int state(int index) const
Definition: ratngs.h:319
float certainty() const
Definition: ratngs.h:330
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float adjust_factor() const
Definition: ratngs.h:306
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ fix_hyphens()

void WERD_RES::fix_hyphens ( )

Definition at line 1053 of file pageres.cpp.

1053  {
1054  if (!uch_set->contains_unichar("-") ||
1056  return; // Don't create it if it is disallowed.
1057 
1061 }
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1036
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
Definition: pageres.cpp:1047
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:944
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:873
const UNICHARSET * uch_set
Definition: pageres.h:206

◆ fix_quotes()

void WERD_RES::fix_quotes ( )

Definition at line 1024 of file pageres.cpp.

1024  {
1025  if (!uch_set->contains_unichar("\"") ||
1027  return; // Don't create it if it is disallowed.
1028 
1031  nullptr);
1032 }
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:944
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:873
const UNICHARSET * uch_set
Definition: pageres.h:206
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1014

◆ GetBlobChoice()

BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 756 of file pageres.cpp.

756  {
757  if (index < 0 || index >= best_choice->length()) return nullptr;
758  BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
759  return FindMatchingChoice(best_choice->unichar_id(index), choices);
760 }
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:765
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:180
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ GetBlobChoices()

BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 765 of file pageres.cpp.

765  {
766  return best_choice->blob_choices(index, ratings);
767 }
MATRIX * ratings
Definition: pageres.h:231
WERD_CHOICE * best_choice
Definition: pageres.h:235
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
Definition: ratngs.cpp:290

◆ GetBlobsGap()

int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 746 of file pageres.cpp.

746  {
747  if (blob_index < 0 || blob_index >= blob_gaps.size())
748  return 0;
749  return blob_gaps[blob_index];
750 }
int size() const
Definition: genericvector.h:71
GenericVector< int > blob_gaps
Definition: pageres.h:222

◆ GetBlobsWidth()

int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 736 of file pageres.cpp.

736  {
737  int result = 0;
738  for (int b = start_blob; b <= last_blob; ++b) {
739  result += blob_widths[b];
740  if (b < last_blob)
741  result += blob_gaps[b];
742  }
743  return result;
744 }
GenericVector< int > blob_widths
Definition: pageres.h:219
GenericVector< int > blob_gaps
Definition: pageres.h:222

◆ HyphenBoxesOverlap()

bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 1047 of file pageres.cpp.

1047  {
1048  return box1.right() >= box2.left();
1049 }
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79

◆ InitForRetryRecognition()

void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 283 of file pageres.cpp.

283  {
284  word = source.word;
285  CopySimpleFields(source);
286  if (source.blamer_bundle != nullptr) {
287  blamer_bundle = new BlamerBundle();
289  }
290 }
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:255
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:199
BlamerBundle * blamer_bundle
Definition: pageres.h:246
WERD * word
Definition: pageres.h:189

◆ InitNonPointers()

void WERD_RES::InitNonPointers ( )

Definition at line 1100 of file pageres.cpp.

1100  {
1101  tess_failed = false;
1102  tess_accepted = false;
1103  tess_would_adapt = false;
1104  done = false;
1106  small_caps = false;
1107  odd_size = false;
1108  italic = FALSE;
1109  bold = FALSE;
1110  // The fontinfos and tesseract count as non-pointers as they point to
1111  // data owned elsewhere.
1112  fontinfo = nullptr;
1113  fontinfo2 = nullptr;
1114  tesseract = nullptr;
1115  fontinfo_id_count = 0;
1116  fontinfo_id2_count = 0;
1117  x_height = 0.0;
1118  caps_height = 0.0;
1119  baseline_shift = 0.0f;
1120  space_certainty = 0.0f;
1121  guessed_x_ht = true;
1122  guessed_caps_ht = true;
1123  combination = false;
1124  part_of_combo = false;
1125  reject_spaces = false;
1126 }
float space_certainty
Definition: pageres.h:316
bool tess_failed
Definition: pageres.h:288
bool guessed_x_ht
Definition: pageres.h:308
bool guessed_caps_ht
Definition: pageres.h:309
int8_t italic
Definition: pageres.h:301
bool odd_size
Definition: pageres.h:300
const FontInfo * fontinfo
Definition: pageres.h:304
float baseline_shift
Definition: pageres.h:313
int8_t fontinfo_id2_count
Definition: pageres.h:307
bool small_caps
Definition: pageres.h:299
bool tess_would_adapt
Definition: pageres.h:297
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:310
#define FALSE
Definition: capi.h:52
int8_t bold
Definition: pageres.h:302
bool tess_accepted
Definition: pageres.h:296
int8_t fontinfo_id_count
Definition: pageres.h:306
float caps_height
Definition: pageres.h:312
bool done
Definition: pageres.h:298
bool reject_spaces
Definition: pageres.h:336
bool combination
Definition: pageres.h:334
float x_height
Definition: pageres.h:311
bool part_of_combo
Definition: pageres.h:335
const FontInfo * fontinfo2
Definition: pageres.h:305

◆ InitPointers()

void WERD_RES::InitPointers ( )

Definition at line 1128 of file pageres.cpp.

1128  {
1129  word = nullptr;
1130  bln_boxes = nullptr;
1131  blob_row = nullptr;
1132  uch_set = nullptr;
1133  chopped_word = nullptr;
1134  rebuild_word = nullptr;
1135  box_word = nullptr;
1136  ratings = nullptr;
1137  best_choice = nullptr;
1138  raw_choice = nullptr;
1139  ep_choice = nullptr;
1140  blamer_bundle = nullptr;
1141 }
TWERD * rebuild_word
Definition: pageres.h:260
tesseract::BoxWord * bln_boxes
Definition: pageres.h:198
MATRIX * ratings
Definition: pageres.h:231
const UNICHARSET * uch_set
Definition: pageres.h:206
WERD_CHOICE * ep_choice
Definition: pageres.h:286
BlamerBundle * blamer_bundle
Definition: pageres.h:246
ROW * blob_row
Definition: pageres.h:200
WERD_CHOICE * raw_choice
Definition: pageres.h:240
TWERD * chopped_word
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::BoxWord * box_word
Definition: pageres.h:266
WERD * word
Definition: pageres.h:189

◆ InsertSeam()

void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 424 of file pageres.cpp.

424  {
425  // Insert the seam into the SEAMS array.
426  seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
427  seam_array.insert(seam, blob_number);
428  if (ratings != nullptr) {
429  // Expand the ratings matrix.
430  ratings = ratings->ConsumeAndMakeBigger(blob_number);
431  // Fix all the segmentation states.
432  if (raw_choice != nullptr)
433  raw_choice->UpdateStateForSplit(blob_number);
434  WERD_CHOICE_IT wc_it(&best_choices);
435  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
436  WERD_CHOICE* choice = wc_it.data();
437  choice->UpdateStateForSplit(blob_number);
438  }
440  }
441 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:406
MATRIX * ConsumeAndMakeBigger(int ind)
Definition: matrix.cpp:63
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:702
bool PrepareToInsertSeam(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int insert_index, bool modify)
Definition: seam.cpp:82
void insert(const T &t, int index)
GenericVector< SEAM * > seam_array
Definition: pageres.h:217
GenericVector< TBLOB * > blobs
Definition: blobs.h:443
MATRIX * ratings
Definition: pageres.h:231
WERD_CHOICE * raw_choice
Definition: pageres.h:240
TWERD * chopped_word
Definition: pageres.h:215

◆ IsAmbiguous()

bool WERD_RES::IsAmbiguous ( )

Definition at line 458 of file pageres.cpp.

458  {
459  return !best_choices.singleton() || best_choice->dangerous_ambig_found();
460 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
bool dangerous_ambig_found() const
Definition: ratngs.h:363
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ LogNewCookedChoice()

bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 626 of file pageres.cpp.

627  {
628  if (best_choice != nullptr) {
629  // Throw out obviously bad choices to save some work.
630  // TODO(rays) Get rid of this! This piece of code produces different
631  // results according to the order in which words are found, which is an
632  // undesirable behavior. It would be better to keep all the choices and
633  // prune them later when more information is available.
634  float max_certainty_delta =
635  StopperAmbigThreshold(best_choice->adjust_factor(),
636  word_choice->adjust_factor());
637  if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
638  max_certainty_delta = -kStopperAmbiguityThresholdOffset;
639  if (word_choice->certainty() - best_choice->certainty() <
640  max_certainty_delta) {
641  if (debug) {
642  STRING bad_string;
643  word_choice->string_and_lengths(&bad_string, nullptr);
644  tprintf("Discarding choice \"%s\" with an overly low certainty"
645  " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
646  bad_string.string(), word_choice->certainty(),
648  max_certainty_delta + best_choice->certainty());
649  }
650  delete word_choice;
651  return false;
652  }
653  }
654 
655  // Insert in the list in order of increasing rating, but knock out worse
656  // string duplicates.
657  WERD_CHOICE_IT it(&best_choices);
658  const STRING& new_str = word_choice->unichar_string();
659  bool inserted = false;
660  int num_choices = 0;
661  if (!it.empty()) {
662  do {
663  WERD_CHOICE* choice = it.data();
664  if (choice->rating() > word_choice->rating() && !inserted) {
665  // Time to insert.
666  it.add_before_stay_put(word_choice);
667  inserted = true;
668  if (num_choices == 0)
669  best_choice = word_choice; // This is the new best.
670  ++num_choices;
671  }
672  if (choice->unichar_string() == new_str) {
673  if (inserted) {
674  // New is better.
675  delete it.extract();
676  } else {
677  // Old is better.
678  if (debug) {
679  tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
680  new_str.string(), word_choice->rating(), choice->rating());
681  }
682  delete word_choice;
683  return false;
684  }
685  } else {
686  ++num_choices;
687  if (num_choices > max_num_choices)
688  delete it.extract();
689  }
690  it.forward();
691  } while (!it.at_first());
692  }
693  if (!inserted && num_choices < max_num_choices) {
694  it.add_to_end(word_choice);
695  inserted = true;
696  if (num_choices == 0)
697  best_choice = word_choice; // This is the new best.
698  }
699  if (debug) {
700  if (inserted)
701  tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
702  else
703  tprintf("Poor");
704  word_choice->print(" Word Choice");
705  }
706  if (!inserted) {
707  delete word_choice;
708  return false;
709  }
710  return true;
711 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
const char * string() const
Definition: strngs.cpp:196
void print() const
Definition: ratngs.h:580
float rating() const
Definition: ratngs.h:327
float certainty() const
Definition: ratngs.h:330
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float adjust_factor() const
Definition: ratngs.h:306
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:449
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ LogNewRawChoice()

bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 610 of file pageres.cpp.

610  {
611  if (raw_choice == nullptr || word_choice->rating() < raw_choice->rating()) {
612  delete raw_choice;
613  raw_choice = new WERD_CHOICE(*word_choice);
615  return true;
616  }
617  return false;
618 }
float rating() const
Definition: ratngs.h:327
WERD_CHOICE * raw_choice
Definition: pageres.h:240
void set_permuter(uint8_t perm)
Definition: ratngs.h:375

◆ merge_tess_fails()

void WERD_RES::merge_tess_fails ( )

Definition at line 1073 of file pageres.cpp.

1073  {
1075  NewPermanentTessCallback(this, &WERD_RES::BothSpaces), nullptr)) {
1076  int len = best_choice->length();
1077  ASSERT_HOST(reject_map.length() == len);
1078  ASSERT_HOST(box_word->length() == len);
1079  }
1080 }
REJMAP reject_map
Definition: pageres.h:287
int32_t length() const
Definition: rejctmap.h:223
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1065
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:944
int length() const
Definition: ratngs.h:303
int length() const
Definition: boxword.h:83
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::BoxWord * box_word
Definition: pageres.h:266
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ MergeAdjacentBlobs()

void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 980 of file pageres.cpp.

980  {
981  if (reject_map.length() == best_choice->length())
982  reject_map.remove_pos(index);
983  best_choice->remove_unichar_id(index + 1);
984  rebuild_word->MergeBlobs(index, index + 2);
985  box_word->MergeBoxes(index, index + 2);
986  if (index + 1 < best_state.length()) {
987  best_state[index] += best_state[index + 1];
988  best_state.remove(index + 1);
989  }
990 }
TWERD * rebuild_word
Definition: pageres.h:260
void remove_unichar_id(int index)
Definition: ratngs.h:484
REJMAP reject_map
Definition: pageres.h:287
void MergeBoxes(int start, int end)
Definition: boxword.cpp:131
void remove(int index)
int32_t length() const
Definition: rejctmap.h:223
int length() const
Definition: genericvector.h:85
GenericVector< int > best_state
Definition: pageres.h:271
int length() const
Definition: ratngs.h:303
void MergeBlobs(int start, int end)
Definition: blobs.cpp:882
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::BoxWord * box_word
Definition: pageres.h:266
void remove_pos(int16_t pos)
Definition: rejctmap.cpp:311

◆ operator=()

WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 192 of file pageres.cpp.

192  {
193  this->ELIST_LINK::operator=(source);
194  Clear();
195  if (source.combination) {
196  word = new WERD;
197  *word = *(source.word); // deep copy
198  } else {
199  word = source.word; // pt to same word
200  }
201  if (source.bln_boxes != nullptr)
202  bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
203  if (source.chopped_word != nullptr)
204  chopped_word = new TWERD(*source.chopped_word);
205  if (source.rebuild_word != nullptr)
206  rebuild_word = new TWERD(*source.rebuild_word);
207  // TODO(rays) Do we ever need to copy the seam_array?
208  blob_row = source.blob_row;
209  denorm = source.denorm;
210  if (source.box_word != nullptr)
211  box_word = new tesseract::BoxWord(*source.box_word);
212  best_state = source.best_state;
213  correct_text = source.correct_text;
214  blob_widths = source.blob_widths;
215  blob_gaps = source.blob_gaps;
216  // None of the uses of operator= require the ratings matrix to be copied,
217  // so don't as it would be really slow.
218 
219  // Copy the cooked choices.
220  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
221  WERD_CHOICE_IT wc_dest_it(&best_choices);
222  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
223  const WERD_CHOICE *choice = wc_it.data();
224  wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
225  }
226  if (!wc_dest_it.empty()) {
227  wc_dest_it.move_to_first();
228  best_choice = wc_dest_it.data();
229  } else {
230  best_choice = nullptr;
231  }
232 
233  if (source.raw_choice != nullptr) {
234  raw_choice = new WERD_CHOICE(*source.raw_choice);
235  } else {
236  raw_choice = nullptr;
237  }
238  if (source.ep_choice != nullptr) {
239  ep_choice = new WERD_CHOICE(*source.ep_choice);
240  } else {
241  ep_choice = nullptr;
242  }
243  reject_map = source.reject_map;
244  combination = source.combination;
245  part_of_combo = source.part_of_combo;
246  CopySimpleFields(source);
247  if (source.blamer_bundle != nullptr) {
248  blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
249  }
250  return *this;
251 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
TWERD * rebuild_word
Definition: pageres.h:260
Definition: blobs.h:402
tesseract::BoxWord * bln_boxes
Definition: pageres.h:198
GenericVector< int > blob_widths
Definition: pageres.h:219
REJMAP reject_map
Definition: pageres.h:287
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:255
void Clear()
Definition: pageres.cpp:1143
GenericVector< STRING > correct_text
Definition: pageres.h:275
DENORM denorm
Definition: pageres.h:204
void operator=(const ELIST_LINK &)
Definition: elst.h:101
GenericVector< int > best_state
Definition: pageres.h:271
Definition: werd.h:59
GenericVector< int > blob_gaps
Definition: pageres.h:222
bool combination
Definition: pageres.h:334
WERD_CHOICE * ep_choice
Definition: pageres.h:286
BlamerBundle * blamer_bundle
Definition: pageres.h:246
bool part_of_combo
Definition: pageres.h:335
ROW * blob_row
Definition: pageres.h:200
WERD_CHOICE * raw_choice
Definition: pageres.h:240
TWERD * chopped_word
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::BoxWord * box_word
Definition: pageres.h:266
WERD * word
Definition: pageres.h:189

◆ PiecesAllNatural()

bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1084 of file pageres.cpp.

1084  {
1085  // all seams must have no splits.
1086  for (int index = start; index < start + count - 1; ++index) {
1087  if (index >= 0 && index < seam_array.size()) {
1088  SEAM* seam = seam_array[index];
1089  if (seam != nullptr && seam->HasAnySplits()) return false;
1090  }
1091  }
1092  return true;
1093 }
int size() const
Definition: genericvector.h:71
int count(LIST var_list)
Definition: oldlist.cpp:98
Definition: seam.h:44
GenericVector< SEAM * > seam_array
Definition: pageres.h:217
bool HasAnySplits() const
Definition: seam.h:67

◆ PrintBestChoices()

void WERD_RES::PrintBestChoices ( ) const

Definition at line 723 of file pageres.cpp.

723  {
724  STRING alternates_str;
725  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
726  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
727  if (!it.at_first()) alternates_str += "\", \"";
728  alternates_str += it.data()->unichar_string();
729  }
730  tprintf("Alternates for \"%s\": {\"%s\"}\n",
731  best_choice->unichar_string().string(), alternates_str.string());
732 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
const char * string() const
Definition: strngs.cpp:196
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ RawUTF8()

const char* WERD_RES::RawUTF8 ( int  blob_index) const
inline

Definition at line 374 of file pageres.h.

374  {
375  if (blob_index < 0 || blob_index >= raw_choice->length())
376  return nullptr;
377  UNICHAR_ID id = raw_choice->unichar_id(blob_index);
378  if (id < 0 || id >= uch_set->size())
379  return nullptr;
380  return uch_set->id_to_unichar(id);
381  }
int UNICHAR_ID
Definition: unichar.h:35
int size() const
Definition: unicharset.h:336
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
const UNICHARSET * uch_set
Definition: pageres.h:206
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
WERD_CHOICE * raw_choice
Definition: pageres.h:240

◆ RebuildBestState()

void WERD_RES::RebuildBestState ( )

Definition at line 814 of file pageres.cpp.

814  {
815  ASSERT_HOST(best_choice != nullptr);
816  delete rebuild_word;
817  rebuild_word = new TWERD;
818  if (seam_array.empty())
820  best_state.truncate(0);
821  int start = 0;
822  for (int i = 0; i < best_choice->length(); ++i) {
823  int length = best_choice->state(i);
824  best_state.push_back(length);
825  if (length > 1) {
827  start + length - 1);
828  }
829  TBLOB* blob = chopped_word->blobs[start];
830  rebuild_word->blobs.push_back(new TBLOB(*blob));
831  if (length > 1) {
833  start + length - 1);
834  }
835  start += length;
836  }
837 }
TWERD * rebuild_word
Definition: pageres.h:260
Definition: blobs.h:402
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:269
int state(int index) const
Definition: ratngs.h:319
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:216
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:194
GenericVector< int > best_state
Definition: pageres.h:271
bool empty() const
Definition: genericvector.h:90
int length() const
Definition: ratngs.h:303
GenericVector< SEAM * > seam_array
Definition: pageres.h:217
int push_back(T object)
GenericVector< TBLOB * > blobs
Definition: blobs.h:443
void truncate(int size)
Definition: blobs.h:268
TWERD * chopped_word
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:235
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ReplaceBestChoice()

void WERD_RES::ReplaceBestChoice ( WERD_CHOICE choice)

Definition at line 801 of file pageres.cpp.

801  {
802  best_choice = choice;
804  SetupBoxWord();
805  // Make up a fake reject map of the right length to keep the
806  // rejection pass happy.
810 }
REJMAP reject_map
Definition: pageres.h:287
void SetScriptPositions()
Definition: pageres.cpp:864
void SetupBoxWord()
Definition: pageres.cpp:855
bool tess_would_adapt
Definition: pageres.h:297
bool tess_accepted
Definition: pageres.h:296
int length() const
Definition: genericvector.h:85
GenericVector< int > best_state
Definition: pageres.h:271
bool done
Definition: pageres.h:298
void RebuildBestState()
Definition: pageres.cpp:814
WERD_CHOICE * best_choice
Definition: pageres.h:235
void initialise(int16_t length)
Definition: rejctmap.cpp:275

◆ SetAllScriptPositions()

void WERD_RES::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 871 of file pageres.cpp.

871  {
873  WERD_CHOICE_IT wc_it(&best_choices);
874  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
875  wc_it.data()->SetAllScriptPositions(position);
876 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:626
WERD_CHOICE * raw_choice
Definition: pageres.h:240

◆ SetScriptPositions()

void WERD_RES::SetScriptPositions ( )

Definition at line 864 of file pageres.cpp.

864  {
866 }
bool small_caps
Definition: pageres.h:299
void SetScriptPositions(bool small_caps, TWERD *word, int debug=0)
Definition: ratngs.cpp:550
TWERD * chopped_word
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ SetupBasicsFromChoppedWord()

void WERD_RES::SetupBasicsFromChoppedWord ( const UNICHARSET unicharset_in)

Definition at line 349 of file pageres.cpp.

349  {
354 }
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:406
tesseract::BoxWord * bln_boxes
Definition: pageres.h:198
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:269
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:56
GenericVector< SEAM * > seam_array
Definition: pageres.h:217
void ClearWordChoices()
Definition: pageres.cpp:1178
TWERD * chopped_word
Definition: pageres.h:215

◆ SetupBlamerBundle()

void WERD_RES::SetupBlamerBundle ( )

Definition at line 399 of file pageres.cpp.

399  {
400  if (blamer_bundle != nullptr) {
402  }
403 }
DENORM denorm
Definition: pageres.h:204
BlamerBundle * blamer_bundle
Definition: pageres.h:246
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:150

◆ SetupBlobWidthsAndGaps()

void WERD_RES::SetupBlobWidthsAndGaps ( )

Definition at line 406 of file pageres.cpp.

406  {
408  blob_gaps.truncate(0);
409  int num_blobs = chopped_word->NumBlobs();
410  for (int b = 0; b < num_blobs; ++b) {
411  TBLOB *blob = chopped_word->blobs[b];
412  TBOX box = blob->bounding_box();
413  blob_widths.push_back(box.width());
414  if (b + 1 < num_blobs) {
416  chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
417  }
418  }
419 }
GenericVector< int > blob_widths
Definition: pageres.h:219
Definition: rect.h:34
int NumBlobs() const
Definition: blobs.h:432
int16_t width() const
Definition: rect.h:115
TBOX bounding_box() const
Definition: blobs.cpp:478
GenericVector< int > blob_gaps
Definition: pageres.h:222
int push_back(T object)
GenericVector< TBLOB * > blobs
Definition: blobs.h:443
int16_t right() const
Definition: rect.h:79
void truncate(int size)
Definition: blobs.h:268
TWERD * chopped_word
Definition: pageres.h:215

◆ SetupBoxWord()

void WERD_RES::SetupBoxWord ( )

Definition at line 855 of file pageres.cpp.

855  {
856  delete box_word;
860 }
TWERD * rebuild_word
Definition: pageres.h:260
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:92
const BLOCK * block() const
Definition: normalis.h:273
DENORM denorm
Definition: pageres.h:204
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:56
void ComputeBoundingBoxes()
Definition: blobs.cpp:865
tesseract::BoxWord * box_word
Definition: pageres.h:266
WERD * word
Definition: pageres.h:189

◆ SetupFake()

void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 358 of file pageres.cpp.

358  {
359  ClearResults();
360  SetupWordScript(unicharset_in);
361  chopped_word = new TWERD;
362  rebuild_word = new TWERD;
365  int blob_count = word->cblob_list()->length();
366  if (blob_count > 0) {
367  BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
368  // For non-text blocks, just pass any blobs through to the box_word
369  // and call the word failed with a fake classification.
370  C_BLOB_IT b_it(word->cblob_list());
371  int blob_id = 0;
372  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
373  TBOX box = b_it.data()->bounding_box();
374  box_word->InsertBox(box_word->length(), box);
375  fake_choices[blob_id++] = new BLOB_CHOICE;
376  }
377  FakeClassifyWord(blob_count, fake_choices);
378  delete [] fake_choices;
379  } else {
380  WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in);
381  word->make_bad();
383  // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
384  LogNewCookedChoice(1, false, word);
385  }
386  tess_failed = true;
387  done = true;
388 }
bool tess_failed
Definition: pageres.h:288
TWERD * rebuild_word
Definition: pageres.h:260
Definition: blobs.h:402
tesseract::BoxWord * bln_boxes
Definition: pageres.h:198
Definition: rect.h:34
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:883
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:610
C_BLOB_LIST * cblob_list()
Definition: werd.h:98
bool done
Definition: pageres.h:298
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:390
void ClearResults()
Definition: pageres.cpp:1153
int length() const
Definition: boxword.h:83
TWERD * chopped_word
Definition: pageres.h:215
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:626
tesseract::BoxWord * box_word
Definition: pageres.h:266
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
WERD * word
Definition: pageres.h:189

◆ SetupForRecognition()

bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 308 of file pageres.cpp.

315  {
316  tesseract::OcrEngineMode norm_mode_hint =
317  static_cast<tesseract::OcrEngineMode>(norm_mode);
318  tesseract = tess;
319  POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
320  if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
321  word->cblob_list()->empty()) ||
322  (pb != nullptr && !pb->IsText())) {
323  // Empty words occur when all the blobs have been moved to the rej_blobs
324  // list, which seems to occur frequently in junk.
325  SetupFake(unicharset_in);
326  word->set_flag(W_REP_CHAR, false);
327  return false;
328  }
329  ClearResults();
330  SetupWordScript(unicharset_in);
331  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
332  float word_xheight = use_body_size && row != nullptr && row->body_size() > 0.0f
333  ? row->body_size() : x_height;
334  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
335  word_xheight, baseline_shift, numeric_mode,
336  norm_mode_hint, norm_box, &denorm);
337  blob_row = row;
338  SetupBasicsFromChoppedWord(unicharset_in);
340  int num_blobs = chopped_word->NumBlobs();
341  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
342  tess_failed = false;
343  return true;
344 }
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
Definition: blobs.cpp:800
Definition: werd.h:43
bool tess_failed
Definition: pageres.h:288
const int kWordrecMaxNumJoinChunks
Definition: pageres.cpp:55
int NumBlobs() const
Definition: blobs.h:432
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:127
float body_size() const
Definition: ocrrow.h:73
float baseline_shift
Definition: pageres.h:313
void SetupBlamerBundle()
Definition: pageres.cpp:399
bool flag(WERD_FLAGS mask) const
Definition: werd.h:126
DENORM denorm
Definition: pageres.h:204
POLY_BLOCK * poly_block() const
Definition: pdblock.h:56
bool IsText() const
Definition: polyblk.h:49
C_BLOB_LIST * cblob_list()
Definition: werd.h:98
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:358
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:390
float x_height
Definition: pageres.h:311
void ClearResults()
Definition: pageres.cpp:1153
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:349
MATRIX * ratings
Definition: pageres.h:231
ROW * blob_row
Definition: pageres.h:200
Definition: matrix.h:575
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
Definition: blobs.cpp:786
TWERD * chopped_word
Definition: pageres.h:215
PDBLK pdblk
Definition: ocrblock.h:192
WERD * word
Definition: pageres.h:189

◆ SetupWordScript()

void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 390 of file pageres.cpp.

390  {
391  uch_set = &uch;
392  int script = uch.default_sid();
393  word->set_script_id(script);
394  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
395  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
396 }
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:127
void set_script_id(int id)
Definition: werd.h:111
int default_sid() const
Definition: unicharset.h:888
const UNICHARSET * uch_set
Definition: pageres.h:206
WERD * word
Definition: pageres.h:189

◆ StatesAllValid()

bool WERD_RES::StatesAllValid ( )

Definition at line 464 of file pageres.cpp.

464  {
465  int ratings_dim = ratings->dimension();
466  if (raw_choice->TotalOfStates() != ratings_dim) {
467  tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
468  raw_choice->TotalOfStates(), ratings_dim);
469  return false;
470  }
471  WERD_CHOICE_IT it(&best_choices);
472  int index = 0;
473  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
474  WERD_CHOICE* choice = it.data();
475  if (choice->TotalOfStates() != ratings_dim) {
476  tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
477  index, choice->TotalOfStates(), ratings_dim);
478  return false;
479  }
480  }
481  return true;
482 }
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
int dimension() const
Definition: matrix.h:533
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
MATRIX * ratings
Definition: pageres.h:231
WERD_CHOICE * raw_choice
Definition: pageres.h:240
int TotalOfStates() const
Definition: ratngs.cpp:714

◆ SymbolDirection()

UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const
inline

Definition at line 383 of file pageres.h.

383  {
384  if (best_choice == nullptr ||
385  blob_index >= best_choice->length() ||
386  blob_index < 0)
388  return uch_set->get_direction(best_choice->unichar_id(blob_index));
389  }
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:685
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
const UNICHARSET * uch_set
Definition: pageres.h:206
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ UnicharsInReadingOrder()

bool WERD_RES::UnicharsInReadingOrder ( ) const
inline

Definition at line 425 of file pageres.h.

425  {
427  }
bool unichars_in_script_order() const
Definition: ratngs.h:535
WERD_CHOICE * best_choice
Definition: pageres.h:235

Member Data Documentation

◆ baseline_shift

float WERD_RES::baseline_shift

Definition at line 313 of file pageres.h.

◆ best_choice

WERD_CHOICE* WERD_RES::best_choice

Definition at line 235 of file pageres.h.

◆ best_choices

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 243 of file pageres.h.

◆ best_state

GenericVector<int> WERD_RES::best_state

Definition at line 271 of file pageres.h.

◆ blamer_bundle

BlamerBundle* WERD_RES::blamer_bundle

Definition at line 246 of file pageres.h.

◆ bln_boxes

tesseract::BoxWord* WERD_RES::bln_boxes

Definition at line 198 of file pageres.h.

◆ blob_gaps

GenericVector<int> WERD_RES::blob_gaps

Definition at line 222 of file pageres.h.

◆ blob_row

ROW* WERD_RES::blob_row

Definition at line 200 of file pageres.h.

◆ blob_widths

GenericVector<int> WERD_RES::blob_widths

Definition at line 219 of file pageres.h.

◆ bold

int8_t WERD_RES::bold

Definition at line 302 of file pageres.h.

◆ box_word

tesseract::BoxWord* WERD_RES::box_word

Definition at line 266 of file pageres.h.

◆ caps_height

float WERD_RES::caps_height

Definition at line 312 of file pageres.h.

◆ chopped_word

TWERD* WERD_RES::chopped_word

Definition at line 215 of file pageres.h.

◆ combination

bool WERD_RES::combination

Definition at line 334 of file pageres.h.

◆ correct_text

GenericVector<STRING> WERD_RES::correct_text

Definition at line 275 of file pageres.h.

◆ denorm

DENORM WERD_RES::denorm

Definition at line 204 of file pageres.h.

◆ done

bool WERD_RES::done

Definition at line 298 of file pageres.h.

◆ ep_choice

WERD_CHOICE* WERD_RES::ep_choice

Definition at line 286 of file pageres.h.

◆ fontinfo

const FontInfo* WERD_RES::fontinfo

Definition at line 304 of file pageres.h.

◆ fontinfo2

const FontInfo* WERD_RES::fontinfo2

Definition at line 305 of file pageres.h.

◆ fontinfo_id2_count

int8_t WERD_RES::fontinfo_id2_count

Definition at line 307 of file pageres.h.

◆ fontinfo_id_count

int8_t WERD_RES::fontinfo_id_count

Definition at line 306 of file pageres.h.

◆ guessed_caps_ht

bool WERD_RES::guessed_caps_ht

Definition at line 309 of file pageres.h.

◆ guessed_x_ht

bool WERD_RES::guessed_x_ht

Definition at line 308 of file pageres.h.

◆ italic

int8_t WERD_RES::italic

Definition at line 301 of file pageres.h.

◆ odd_size

bool WERD_RES::odd_size

Definition at line 300 of file pageres.h.

◆ part_of_combo

bool WERD_RES::part_of_combo

Definition at line 335 of file pageres.h.

◆ ratings

MATRIX* WERD_RES::ratings

Definition at line 231 of file pageres.h.

◆ raw_choice

WERD_CHOICE* WERD_RES::raw_choice

Definition at line 240 of file pageres.h.

◆ rebuild_word

TWERD* WERD_RES::rebuild_word

Definition at line 260 of file pageres.h.

◆ reject_map

REJMAP WERD_RES::reject_map

Definition at line 287 of file pageres.h.

◆ reject_spaces

bool WERD_RES::reject_spaces

Definition at line 336 of file pageres.h.

◆ seam_array

GenericVector<SEAM*> WERD_RES::seam_array

Definition at line 217 of file pageres.h.

◆ small_caps

bool WERD_RES::small_caps

Definition at line 299 of file pageres.h.

◆ space_certainty

float WERD_RES::space_certainty

Definition at line 316 of file pageres.h.

◆ tess_accepted

bool WERD_RES::tess_accepted

Definition at line 296 of file pageres.h.

◆ tess_failed

bool WERD_RES::tess_failed

Definition at line 288 of file pageres.h.

◆ tess_would_adapt

bool WERD_RES::tess_would_adapt

Definition at line 297 of file pageres.h.

◆ tesseract

tesseract::Tesseract* WERD_RES::tesseract

Definition at line 282 of file pageres.h.

◆ timesteps

std::vector<std::vector<std::pair<const char*, float> > > WERD_RES::timesteps

Definition at line 224 of file pageres.h.

◆ uch_set

const UNICHARSET* WERD_RES::uch_set

Definition at line 206 of file pageres.h.

◆ unlv_crunch_mode

CRUNCH_MODE WERD_RES::unlv_crunch_mode

Definition at line 310 of file pageres.h.

◆ word

WERD* WERD_RES::word

Definition at line 189 of file pageres.h.

◆ x_height

float WERD_RES::x_height

Definition at line 311 of file pageres.h.


The documentation for this class was generated from the following files: