22 #ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_H_    23 #define TESSERACT_WORDREC_LANGUAGE_MODEL_H_    78                    bool fixed_pitch, 
float max_char_wh_ratio,
    79                    float rating_cert_scale);
    95       int curr_col, 
int curr_row,
    96       BLOB_CHOICE_LIST *curr_list,
   119       return 1.0f / (1.0f + exp(10.0f * cert));
   121       return (-1.0f / cert);
   126     if (num_problems == 0) 
return 0.0f;
   127     if (num_problems == 1) 
return penalty;
   129                        static_cast<float>(num_problems-1)));
   139     if (dawg_info != 
nullptr) {
   184       bool just_classified, 
bool mixed_alnum,
   187       ViterbiStateEntry_IT* vse_it,
   215                                           int curr_col, 
int curr_row,
   227       const char *unichar, 
float certainty, 
float denom,
   228       int curr_col, 
int curr_row, 
float outline_length,
   239                          const char *context, 
int *unichar_step_len,
   240                          bool *found_small_prob, 
float *ngram_prob);
   282                                     float max_char_wh_ratio,
   289         (parent_vse != 
nullptr) ? parent_vse->
length : 0,
   319              "Turn on/off the use of character ngram model");
   321             "Maximum order of the character ngram model");
   323             "Maximum number of prunable (those for which PrunablePath() is"   324             " true) entries in each viterbi list recorded in BLOB_CHOICEs");
   326             "Maximum size of viterbi lists recorded in BLOB_CHOICEs");
   328                "To avoid overly small denominators use this as the floor"   329                " of the probability returned by the ngram model");
   331                "Average classifier score of a non-matching unichar");
   333              "Use only the first UTF8 step of the given string"   334              " when computing log probabilities");
   336                "Strength of the character ngram model relative to the"   337                " character classifier ");
   339                "Factor to bring log-probs into the same range as ratings"   340                " when multiplied by outline length ");
   342              "Words are delimited by space");
   344             "Minimum length of compound words");
   347                "Penalty for words not in the frequent word dictionary");
   349                "Penalty for non-dictionary words");
   351                "Penalty for inconsistent punctuation");
   353                "Penalty for inconsistent case");
   355                "Penalty for inconsistent script");
   357                "Penalty for inconsistent character type");
   359                "Penalty for inconsistent font");
   361                "Penalty for inconsistent spacing");
   365              "Use sigmoidal score for certainty");
   427 #endif  // TESSERACT_WORDREC_LANGUAGE_MODEL_H_ 
void UpdateBestChoice(ViterbiStateEntry *vse, LMPainPoints *pain_points, WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
static void ComputeStats(int col, int row, const AssociateStats *parent_stats, int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, WERD_RES *word_res, bool debug, AssociateStats *stats)
 
#define INT_VAR_H(name, val, comment)
 
void FillConsistencyInfo(int curr_col, bool word_end, BLOB_CHOICE *b, ViterbiStateEntry *parent_vse, WERD_RES *word_res, LMConsistencyInfo *consistency_info)
 
float CertaintyScore(float cert)
 
bool acceptable_choice_found_
 
int language_model_viterbi_list_max_num_prunable
 
AssociateStats associate_stats
 
LanguageModelDawgInfo * GenerateDawgInfo(bool word_end, int curr_col, int curr_row, const BLOB_CHOICE &b, const ViterbiStateEntry *parent_vse)
 
int NumInconsistentSpaces() const
 
int prev_word_unichar_step_len_
 
bool language_model_ngram_space_delimited_language
 
LanguageModelNgramInfo * GenerateNgramInfo(const char *unichar, float certainty, float denom, int curr_col, int curr_row, float outline_length, const ViterbiStateEntry *parent_vse)
 
#define BOOL_VAR_H(name, val, comment)
 
static const LanguageModelFlagsType kXhtConsistentFlag
 
bool correct_segmentation_explored_
 
#define double_VAR_H(name, val, comment)
 
int SetTopParentLowerUpperDigit(LanguageModelState *parent_node) const
 
int language_model_min_compound_length
 
double language_model_penalty_chartype
 
Struct to store information maintained by various language model components. 
 
void InitForWord(const WERD_CHOICE *prev_word, bool fixed_pitch, float max_char_wh_ratio, float rating_cert_scale)
 
double language_model_penalty_case
 
void ComputeAssociateStats(int col, int row, float max_char_wh_ratio, ViterbiStateEntry *parent_vse, WERD_RES *word_res, AssociateStats *associate_stats)
 
double language_model_penalty_script
 
static void ExtractFeaturesFromPath(const ViterbiStateEntry &vse, float features[])
 
double language_model_ngram_scale_factor
 
int NumInconsistentCase() const
 
bool AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags, float denom, bool word_end, int curr_col, int curr_row, BLOB_CHOICE *b, LanguageModelState *curr_state, ViterbiStateEntry *parent_vse, LMPainPoints *pain_points, WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
static const LanguageModelFlagsType kSmallestRatingFlag
 
static const LanguageModelFlagsType kDigitFlag
 
LanguageModelNgramInfo * ngram_info
 
double language_model_penalty_non_freq_dict_word
 
float ComputeAdjustedPathCost(ViterbiStateEntry *vse)
 
unsigned char LanguageModelFlagsType
Used for expressing various language model flags. 
 
double language_model_ngram_small_prob
 
float ComputeNgramCost(const char *unichar, float certainty, float denom, const char *context, int *unichar_step_len, bool *found_small_prob, float *ngram_prob)
 
void SetAcceptableChoiceFound(bool val)
 
static const LanguageModelFlagsType kUpperCaseFlag
 
float ComputeAdjustment(int num_problems, float penalty)
 
ParamsModel params_model_
 
int NumInconsistentPunc() const
 
float ComputeConsistencyAdjustment(const LanguageModelDawgInfo *dawg_info, const LMConsistencyInfo &consistency_info)
 
double language_model_penalty_spacing
 
DawgPositionVector beginning_active_dawgs_
 
bool GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, BLOB_CHOICE **first_lower, BLOB_CHOICE **first_upper, BLOB_CHOICE **first_digit) const
 
bool AcceptableChoiceFound()
 
int language_model_ngram_order
 
float ComputeDenom(BLOB_CHOICE_LIST *curr_list)
 
double language_model_penalty_font
 
double language_model_ngram_rating_factor
 
bool language_model_ngram_on
 
bool AcceptablePath(const ViterbiStateEntry &vse)
 
double language_model_penalty_increment
 
void GenerateTopChoiceInfo(ViterbiStateEntry *new_vse, const ViterbiStateEntry *parent_vse, LanguageModelState *lms)
 
Bundle together all the things pertaining to the best choice/state. 
 
bool PrunablePath(const ViterbiStateEntry &vse)
 
double language_model_penalty_punc
 
int wordrec_display_segmentations
 
int language_model_debug_level
 
bool language_model_ngram_use_only_first_uft8_step
 
ViterbiStateEntry * GetNextParentVSE(bool just_classified, bool mixed_alnum, const BLOB_CHOICE *bc, LanguageModelFlagsType blob_choice_flags, const UNICHARSET &unicharset, WERD_RES *word_res, ViterbiStateEntry_IT *vse_it, LanguageModelFlagsType *top_choice_flags) const
 
ParamsModel & getParamsModel()
 
LanguageModelFlagsType top_choice_flags
 
const UnicityTable< FontInfo > * fontinfo_table_
 
DawgPositionVector very_beginning_active_dawgs_
 
double language_model_penalty_non_dict_word
 
static const LanguageModelFlagsType kLowerCaseFlag
 
double language_model_ngram_nonmatch_score
 
int NumInconsistentChartype() const
 
bool UpdateState(bool just_classified, int curr_col, int curr_row, BLOB_CHOICE_LIST *curr_list, LanguageModelState *parent_node, LMPainPoints *pain_points, WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
int language_model_viterbi_list_max_size
 
WERD_CHOICE * ConstructWord(ViterbiStateEntry *vse, WERD_RES *word_res, DANGERR *fixpt, BlamerBundle *blamer_bundle, bool *truth_path)
 
LanguageModel(const UnicityTable< FontInfo > *fontinfo_table, Dict *dict)
 
static const float kMaxAvgNgramCost
 
bool language_model_use_sigmoidal_certainty
 
LanguageModelDawgInfo * dawg_info