tesseract  5.0.0-alpha-619-ge9db
tesseract::ViterbiStateEntry Struct Reference

#include <lm_state.h>

Inheritance diagram for tesseract::ViterbiStateEntry:
ELIST_LINK

Public Member Functions

 ViterbiStateEntry (ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, const LMConsistencyInfo &ci, const AssociateStats &as, LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, const char *debug_uch)
 
 ~ViterbiStateEntry ()
 
bool Consistent () const
 
bool HasAlnumChoice (const UNICHARSET &unicharset)
 
void Print (const char *msg) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static int Compare (const void *e1, const void *e2)
 

Public Attributes

BLOB_CHOICEcurr_b
 Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this). More...
 
ViterbiStateEntryparent_vse
 
ViterbiStateEntrycompeting_vse
 
LanguageModelDawgInfodawg_info
 
LanguageModelNgramInfongram_info
 
STRINGdebug_str
 
float cost
 
float ratings_sum
 sum of ratings of character on the path More...
 
float min_certainty
 minimum certainty on the path More...
 
int adapted
 number of BLOB_CHOICES from adapted templates More...
 
int length
 number of characters on the path More...
 
float outline_length
 length of the outline so far More...
 
LMConsistencyInfo consistency_info
 path consistency info More...
 
AssociateStats associate_stats
 character widths/gaps/seams More...
 
LanguageModelFlagsType top_choice_flags
 
bool updated
 set to true if the entry has just been created/updated More...
 

Detailed Description

Struct for storing the information about a path in the segmentation graph explored by Viterbi search.

Definition at line 91 of file lm_state.h.

Constructor & Destructor Documentation

◆ ViterbiStateEntry()

tesseract::ViterbiStateEntry::ViterbiStateEntry ( ViterbiStateEntry pe,
BLOB_CHOICE b,
float  c,
float  ol,
const LMConsistencyInfo ci,
const AssociateStats as,
LanguageModelFlagsType  tcf,
LanguageModelDawgInfo d,
LanguageModelNgramInfo n,
const char *  debug_uch 
)
inline

Definition at line 92 of file lm_state.h.

100  : curr_b(b), parent_vse(pe), competing_vse(nullptr),
101  dawg_info(d), ngram_info(n),
102  cost(c),
103  ratings_sum(b->rating()),
104  min_certainty(b->certainty()),
105  adapted(b->IsAdapted()),
106  length(1),
107  outline_length(ol),
108  consistency_info(ci),
109  associate_stats(as),
110  top_choice_flags(tcf),
111  updated(true) {
112  debug_str = (debug_uch == nullptr) ? nullptr : new STRING();
113  if (pe != nullptr) {
114  ratings_sum += pe->ratings_sum;
115  if (pe->min_certainty < min_certainty) {
116  min_certainty = pe->min_certainty;
117  }
118  adapted += pe->adapted;
119  length += pe->length;
120  outline_length += pe->outline_length;
121  if (debug_uch != nullptr) *debug_str += *(pe->debug_str);
122  }
123  if (debug_str != nullptr && debug_uch != nullptr) *debug_str += debug_uch;
124  }

◆ ~ViterbiStateEntry()

tesseract::ViterbiStateEntry::~ViterbiStateEntry ( )
inline

Definition at line 125 of file lm_state.h.

125  {
126  delete dawg_info;
127  delete ngram_info;
128  delete debug_str;
129  }

Member Function Documentation

◆ Compare()

static int tesseract::ViterbiStateEntry::Compare ( const void *  e1,
const void *  e2 
)
inlinestatic

Comparator function for sorting ViterbiStateEntry_LISTs in non-increasing order of costs.

Definition at line 132 of file lm_state.h.

132  {
133  const ViterbiStateEntry *ve1 =
134  *static_cast<const ViterbiStateEntry *const *>(e1);
135  const ViterbiStateEntry *ve2 =
136  *static_cast<const ViterbiStateEntry *const *>(e2);
137  return (ve1->cost < ve2->cost) ? -1 : 1;
138  }

◆ Consistent()

bool tesseract::ViterbiStateEntry::Consistent ( ) const
inline

Definition at line 139 of file lm_state.h.

139  {
140  if (dawg_info != nullptr && consistency_info.NumInconsistentCase() == 0) {
141  return true;
142  }
143  return consistency_info.Consistent();
144  }

◆ HasAlnumChoice()

bool tesseract::ViterbiStateEntry::HasAlnumChoice ( const UNICHARSET unicharset)
inline

Returns true if this VSE has an alphanumeric character as its classifier result.

Definition at line 147 of file lm_state.h.

147  {
148  if (curr_b == nullptr) return false;
149  UNICHAR_ID unichar_id = curr_b->unichar_id();
150  if (unicharset.get_isalpha(unichar_id) ||
151  unicharset.get_isdigit(unichar_id))
152  return true;
153  return false;
154  }

◆ Print()

void tesseract::ViterbiStateEntry::Print ( const char *  msg) const

Definition at line 26 of file lm_state.cpp.

26  {
27  tprintf("%s ViterbiStateEntry", msg);
28  if (updated) tprintf("(NEW)");
29  if (this->debug_str != nullptr) {
30  tprintf(" str=%s", this->debug_str->c_str());
31  }
32  tprintf(" with ratings_sum=%.4f length=%d cost=%.6f",
33  this->ratings_sum, this->length, this->cost);
34  if (this->top_choice_flags) {
35  tprintf(" top_choice_flags=0x%x", this->top_choice_flags);
36  }
37  if (!this->Consistent()) {
38  tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)",
44  }
45  if (this->dawg_info) tprintf(" permuter=%d", this->dawg_info->permuter);
46  if (this->ngram_info) {
47  tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d",
49  this->ngram_info->context.c_str(),
50  this->ngram_info->pruned);
51  }
52  if (this->associate_stats.shape_cost > 0.0f) {
53  tprintf(" shape_cost=%g", this->associate_stats.shape_cost);
54  }
55  tprintf(" %s",
56  XHeightConsistencyEnumName[this->consistency_info.xht_decision]);
57 
58  tprintf("\n");
59 }

Member Data Documentation

◆ adapted

int tesseract::ViterbiStateEntry::adapted

number of BLOB_CHOICES from adapted templates

Definition at line 184 of file lm_state.h.

◆ associate_stats

AssociateStats tesseract::ViterbiStateEntry::associate_stats

character widths/gaps/seams

Definition at line 188 of file lm_state.h.

◆ competing_vse

ViterbiStateEntry* tesseract::ViterbiStateEntry::competing_vse

Pointer to a case-competing ViterbiStateEntry in the same list that represents a path ending in the same letter of the opposite case.

Definition at line 162 of file lm_state.h.

◆ consistency_info

LMConsistencyInfo tesseract::ViterbiStateEntry::consistency_info

path consistency info

Definition at line 187 of file lm_state.h.

◆ cost

float tesseract::ViterbiStateEntry::cost

The cost is an adjusted ratings sum, that is adjusted by all the language model components that use Viterbi search.

Definition at line 178 of file lm_state.h.

◆ curr_b

BLOB_CHOICE* tesseract::ViterbiStateEntry::curr_b

Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).

Definition at line 158 of file lm_state.h.

◆ dawg_info

LanguageModelDawgInfo* tesseract::ViterbiStateEntry::dawg_info

Extra information maintained by Dawg language model component (owned by ViterbiStateEntry).

Definition at line 166 of file lm_state.h.

◆ debug_str

STRING* tesseract::ViterbiStateEntry::debug_str

UTF8 string representing the path corresponding to this vse. Populated only in when language_model_debug_level > 0.

Definition at line 174 of file lm_state.h.

◆ length

int tesseract::ViterbiStateEntry::length

number of characters on the path

Definition at line 185 of file lm_state.h.

◆ min_certainty

float tesseract::ViterbiStateEntry::min_certainty

minimum certainty on the path

Definition at line 183 of file lm_state.h.

◆ ngram_info

LanguageModelNgramInfo* tesseract::ViterbiStateEntry::ngram_info

Extra information maintained by Ngram language model component (owned by ViterbiStateEntry).

Definition at line 170 of file lm_state.h.

◆ outline_length

float tesseract::ViterbiStateEntry::outline_length

length of the outline so far

Definition at line 186 of file lm_state.h.

◆ parent_vse

ViterbiStateEntry* tesseract::ViterbiStateEntry::parent_vse

Definition at line 159 of file lm_state.h.

◆ ratings_sum

float tesseract::ViterbiStateEntry::ratings_sum

sum of ratings of character on the path

Various information about the characters on the path represented by this ViterbiStateEntry.

Definition at line 182 of file lm_state.h.

◆ top_choice_flags

LanguageModelFlagsType tesseract::ViterbiStateEntry::top_choice_flags

Flags for marking the entry as a top choice path with the smallest rating or lower/upper case letters).

Definition at line 192 of file lm_state.h.

◆ updated

bool tesseract::ViterbiStateEntry::updated

set to true if the entry has just been created/updated

Definition at line 194 of file lm_state.h.


The documentation for this struct was generated from the following files:
tesseract::ViterbiStateEntry::top_choice_flags
LanguageModelFlagsType top_choice_flags
Definition: lm_state.h:192
tesseract::ViterbiStateEntry::dawg_info
LanguageModelDawgInfo * dawg_info
Definition: lm_state.h:166
tesseract::ViterbiStateEntry::length
int length
number of characters on the path
Definition: lm_state.h:185
tesseract::LanguageModelNgramInfo::pruned
bool pruned
Definition: lm_state.h:82
tesseract::ViterbiStateEntry::outline_length
float outline_length
length of the outline so far
Definition: lm_state.h:186
tesseract::ViterbiStateEntry::Consistent
bool Consistent() const
Definition: lm_state.h:139
UNICHARSET::get_isdigit
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:502
UNICHARSET::get_isalpha
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:481
BLOB_CHOICE::certainty
float certainty() const
Definition: ratngs.h:81
tesseract::LMConsistencyInfo::inconsistent_script
bool inconsistent_script
Definition: lm_consistency.h:136
STRING
Definition: strngs.h:45
tesseract::ViterbiStateEntry::cost
float cost
Definition: lm_state.h:178
tesseract::LMConsistencyInfo::Consistent
bool Consistent() const
Definition: lm_consistency.h:94
tesseract::ViterbiStateEntry::debug_str
STRING * debug_str
Definition: lm_state.h:174
tesseract::ViterbiStateEntry::competing_vse
ViterbiStateEntry * competing_vse
Definition: lm_state.h:162
BLOB_CHOICE::unichar_id
UNICHAR_ID unichar_id() const
Definition: ratngs.h:75
tesseract::LMConsistencyInfo::xht_decision
XHeightConsistencyEnum xht_decision
Definition: lm_consistency.h:123
tesseract::ViterbiStateEntry::ViterbiStateEntry
ViterbiStateEntry(ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, const LMConsistencyInfo &ci, const AssociateStats &as, LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, const char *debug_uch)
Definition: lm_state.h:92
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::ViterbiStateEntry::ngram_info
LanguageModelNgramInfo * ngram_info
Definition: lm_state.h:170
tesseract::LMConsistencyInfo::NumInconsistentCase
int NumInconsistentCase() const
Definition: lm_consistency.h:87
tesseract::ViterbiStateEntry::associate_stats
AssociateStats associate_stats
character widths/gaps/seams
Definition: lm_state.h:188
tesseract::ViterbiStateEntry::curr_b
BLOB_CHOICE * curr_b
Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
Definition: lm_state.h:158
tesseract::LanguageModelNgramInfo::ngram_and_classifier_cost
float ngram_and_classifier_cost
-[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
Definition: lm_state.h:86
tesseract::ViterbiStateEntry::consistency_info
LMConsistencyInfo consistency_info
path consistency info
Definition: lm_state.h:187
tesseract::ViterbiStateEntry::updated
bool updated
set to true if the entry has just been created/updated
Definition: lm_state.h:194
BLOB_CHOICE::rating
float rating() const
Definition: ratngs.h:78
tesseract::ViterbiStateEntry::parent_vse
ViterbiStateEntry * parent_vse
Definition: lm_state.h:159
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
tesseract::LanguageModelDawgInfo::permuter
PermuterType permuter
Definition: lm_state.h:65
tesseract::ViterbiStateEntry::adapted
int adapted
number of BLOB_CHOICES from adapted templates
Definition: lm_state.h:184
tesseract::ViterbiStateEntry::min_certainty
float min_certainty
minimum certainty on the path
Definition: lm_state.h:183
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::ViterbiStateEntry::ratings_sum
float ratings_sum
sum of ratings of character on the path
Definition: lm_state.h:182
BLOB_CHOICE::IsAdapted
bool IsAdapted() const
Definition: ratngs.h:130
tesseract::LMConsistencyInfo::NumInconsistentChartype
int NumInconsistentChartype() const
Definition: lm_consistency.h:90
tesseract::AssociateStats::shape_cost
float shape_cost
Definition: associate.h:52
tesseract::LMConsistencyInfo::inconsistent_font
bool inconsistent_font
Definition: lm_consistency.h:137
tesseract::LMConsistencyInfo::NumInconsistentPunc
int NumInconsistentPunc() const
Definition: lm_consistency.h:84