tesseract
4.0.0-1-g2a2b
|
#include <dict.h>
Public Member Functions | |
Dict (CCUtil *image_ptr) | |
~Dict () | |
const CCUtil * | getCCUtil () const |
CCUtil * | getCCUtil () |
const UNICHARSET & | getUnicharset () const |
UNICHARSET & | getUnicharset () |
const UnicharAmbigs & | getUnicharAmbigs () const |
bool | compound_marker (UNICHAR_ID unichar_id) |
bool | is_apostrophe (UNICHAR_ID unichar_id) |
bool | hyphenated () const |
Returns true if we've recorded the beginning of a hyphenated word. More... | |
int | hyphen_base_size () const |
Size of the base word (the part on the line before) of a hyphenated word. More... | |
void | copy_hyphen_info (WERD_CHOICE *word) const |
bool | has_hyphen_end (UNICHAR_ID unichar_id, bool first_pos) const |
Check whether the word has a hyphen at the end. More... | |
bool | has_hyphen_end (const WERD_CHOICE &word) const |
Same as above, but check the unichar at the end of the word. More... | |
void | reset_hyphen_vars (bool last_word_on_line) |
void | set_hyphen_word (const WERD_CHOICE &word, const DawgPositionVector &active_dawgs) |
void | update_best_choice (const WERD_CHOICE &word, WERD_CHOICE *best_choice) |
void | init_active_dawgs (DawgPositionVector *active_dawgs, bool ambigs_mode) const |
void | default_dawgs (DawgPositionVector *anylength_dawgs, bool suppress_patterns) const |
bool | NoDangerousAmbig (WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, MATRIX *ratings) |
void | ReplaceAmbig (int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings) |
int | LengthOfShortestAlphaRun (const WERD_CHOICE &WordChoice) const |
Returns the length of the shortest alpha run in WordChoice. More... | |
int | UniformCertainties (const WERD_CHOICE &word) |
bool | AcceptableChoice (const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency) |
Returns true if the given best_choice is good enough to stop. More... | |
bool | AcceptableResult (WERD_RES *word) const |
void | EndDangerousAmbigs () |
void | DebugWordChoices () |
Prints the current choices for this word to stdout. More... | |
void | SettupStopperPass1 () |
Sets up stopper variables in preparation for the first pass. More... | |
void | SettupStopperPass2 () |
Sets up stopper variables in preparation for the second pass. More... | |
int | case_ok (const WERD_CHOICE &word, const UNICHARSET &unicharset) const |
Check a string to see if it matches a set of lexical rules. More... | |
bool | absolute_garbage (const WERD_CHOICE &word, const UNICHARSET &unicharset) |
void | SetupForLoad (DawgCache *dawg_cache) |
void | Load (const STRING &lang, TessdataManager *data_file) |
void | LoadLSTM (const STRING &lang, TessdataManager *data_file) |
bool | FinishLoad () |
void | End () |
void | ResetDocumentDictionary () |
int | def_letter_is_okay (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const |
int | LetterIsOkay (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const |
Calls letter_is_okay_ member function. More... | |
double | ProbabilityInContext (const char *context, int context_bytes, const char *character, int character_bytes) |
Calls probability_in_context_ member function. More... | |
double | def_probability_in_context (const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
Default (no-op) implementation of probability in context function. More... | |
double | ngram_probability_in_context (const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
float | ParamsModelClassify (const char *lang, void *path) |
float | CallParamsModelClassify (void *path) |
void | SetWildcardID (UNICHAR_ID id) |
UNICHAR_ID | WildcardID () const |
int | NumDawgs () const |
Return the number of dawgs in the dawgs_ vector. More... | |
const Dawg * | GetDawg (int index) const |
Return i-th dawg pointer recorded in the dawgs_ vector. More... | |
const Dawg * | GetPuncDawg () const |
Return the points to the punctuation dawg. More... | |
const Dawg * | GetUnambigDawg () const |
Return the points to the unambiguous words dawg. More... | |
UNICHAR_ID | char_for_dawg (const UNICHARSET &unicharset, UNICHAR_ID ch, const Dawg *dawg) const |
void | ProcessPatternEdges (const Dawg *dawg, const DawgPosition &info, UNICHAR_ID unichar_id, bool word_end, DawgArgs *dawg_args, PermuterType *current_permuter) const |
int | valid_word (const WERD_CHOICE &word, bool numbers_ok) const |
int | valid_word (const WERD_CHOICE &word) const |
int | valid_word_or_number (const WERD_CHOICE &word) const |
int | valid_word (const char *string) const |
This function is used by api/tesseract_cube_combiner.cpp. More... | |
bool | valid_bigram (const WERD_CHOICE &word1, const WERD_CHOICE &word2) const |
bool | valid_punctuation (const WERD_CHOICE &word) |
int | good_choice (const WERD_CHOICE &choice) |
Returns true if a good answer is found for the unknown blob rating. More... | |
void | add_document_word (const WERD_CHOICE &best_choice) |
Adds a word found on this document to the document specific dictionary. More... | |
void | adjust_word (WERD_CHOICE *word, bool nonword, XHeightConsistencyEnum xheight_consistency, float additional_adjust, bool modify_rating, bool debug) |
Adjusts the rating of the given word. More... | |
void | SetWordsegRatingAdjustFactor (float f) |
Set wordseg_rating_adjust_factor_ to the given value. More... | |
bool | IsSpaceDelimitedLang () const |
Returns true if the language is space-delimited (not CJ, or T). More... | |
go_deeper_dawg_fxn | |
If the choice being composed so far could be a dictionary word keep exploring choices. | |
WERD_CHOICE * | dawg_permute_and_select (const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit) |
void | go_deeper_dawg_fxn (const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args) |
void | permute_choices (const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *more_args) |
void | append_choices (const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, const BLOB_CHOICE &blob_choice, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *more_args) |
fragment_state | |
Given the current char choice and information about previously seen fragments, determines whether adjacent character fragments are present and whether they can be concatenated. The given prev_char_frag_info contains:
The output char_frag_info is filled in as follows:
| |
bool | fragment_state_okay (UNICHAR_ID curr_unichar_id, float curr_rating, float curr_certainty, const CHAR_FRAGMENT_INFO *prev_char_frag_info, const char *debug, int word_ending, CHAR_FRAGMENT_INFO *char_frag_info) |
Static Public Member Functions | |
static DawgCache * | GlobalDawgCache () |
static NODE_REF | GetStartingNode (const Dawg *dawg, EDGE_REF edge_ref) |
Returns the appropriate next node given the EDGE_REF. More... | |
static bool | valid_word_permuter (uint8_t perm, bool numbers_ok) |
Check all the DAWGs to see if this word is in any of them. More... | |
Public Attributes | |
void(Dict::* | go_deeper_fxn_ )(const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args) |
Pointer to go_deeper function. More... | |
int(Dict::* | letter_is_okay_ )(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const |
double(Dict::* | probability_in_context_ )(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
Probability in context function used by the ngram permuter. More... | |
float(Dict::* | params_model_classify_ )(const char *lang, void *path) |
char * | user_words_file = "" |
char * | user_words_suffix = "" |
char * | user_patterns_file = "" |
char * | user_patterns_suffix = "" |
bool | load_system_dawg = true |
bool | load_freq_dawg = true |
bool | load_unambig_dawg = true |
bool | load_punc_dawg = true |
bool | load_number_dawg = true |
bool | load_bigram_dawg = true |
double | xheight_penalty_subscripts = 0.125 |
double | xheight_penalty_inconsistent = 0.25 |
double | segment_penalty_dict_frequent_word = 1.0 |
double | segment_penalty_dict_case_ok = 1.1 |
double | segment_penalty_dict_case_bad = 1.3125 |
double | segment_penalty_dict_nonword = 1.25 |
double | segment_penalty_garbage = 1.50 |
char * | output_ambig_words_file = "" |
int | dawg_debug_level = 0 |
int | hyphen_debug_level = 0 |
int | max_viterbi_list_size = 10 |
bool | use_only_first_uft8_step = false |
double | certainty_scale = 20.0 |
double | stopper_nondict_certainty_base = -2.50 |
double | stopper_phase2_certainty_rejection_offset = 1.0 |
int | stopper_smallword_size = 2 |
double | stopper_certainty_per_char = -0.50 |
double | stopper_allowable_character_badness = 3.0 |
int | stopper_debug_level = 0 |
bool | stopper_no_acceptable_choices = false |
int | tessedit_truncate_wordchoice_log = 10 |
char * | word_to_debug = "" |
char * | word_to_debug_lengths = "" |
int | fragments_debug = 0 |
bool | segment_nonalphabetic_script = false |
bool | save_doc_words = 0 |
double | doc_dict_pending_threshold = 0.0 |
double | doc_dict_certainty_threshold = -2.25 |
int | max_permuter_attempts = 10000 |
tesseract::Dict::Dict | ( | CCUtil * | image_ptr | ) |
Definition at line 30 of file dict.cpp.
bool tesseract::Dict::absolute_garbage | ( | const WERD_CHOICE & | word, |
const UNICHARSET & | unicharset | ||
) |
Returns true if the word looks like an absolute garbage (e.g. image mistakenly recognized as text).
Definition at line 70 of file context.cpp.
bool tesseract::Dict::AcceptableChoice | ( | const WERD_CHOICE & | best_choice, |
XHeightConsistencyEnum | xheight_consistency | ||
) |
Returns true if the given best_choice is good enough to stop.
Definition at line 41 of file stopper.cpp.
bool tesseract::Dict::AcceptableResult | ( | WERD_RES * | word | ) | const |
Returns false if the best choice for the current word is questionable and should be tried again on the second pass or should be flagged to the user.
Definition at line 101 of file stopper.cpp.
void tesseract::Dict::add_document_word | ( | const WERD_CHOICE & | best_choice | ) |
Adds a word found on this document to the document specific dictionary.
Definition at line 613 of file dict.cpp.
void tesseract::Dict::adjust_word | ( | WERD_CHOICE * | word, |
bool | nonword, | ||
XHeightConsistencyEnum | xheight_consistency, | ||
float | additional_adjust, | ||
bool | modify_rating, | ||
bool | debug | ||
) |
Adjusts the rating of the given word.
Definition at line 673 of file dict.cpp.
void tesseract::Dict::append_choices | ( | const char * | debug, |
const BLOB_CHOICE_LIST_VECTOR & | char_choices, | ||
const BLOB_CHOICE & | blob_choice, | ||
int | char_choice_index, | ||
const CHAR_FRAGMENT_INFO * | prev_char_frag_info, | ||
WERD_CHOICE * | word, | ||
float | certainties[], | ||
float * | limit, | ||
WERD_CHOICE * | best_choice, | ||
int * | attempts_left, | ||
void * | more_args | ||
) |
append_choices
Checks to see whether or not the next choice is worth appending to the word being generated. If so then keeps going deeper into the word.
This function assumes that Dict::go_deeper_fxn_ is set.
Definition at line 245 of file permdawg.cpp.
|
inline |
Definition at line 406 of file dict.h.
int tesseract::Dict::case_ok | ( | const WERD_CHOICE & | word, |
const UNICHARSET & | unicharset | ||
) | const |
Check a string to see if it matches a set of lexical rules.
Definition at line 52 of file context.cpp.
|
inline |
|
inline |
Definition at line 109 of file dict.h.
|
inline |
If this word is hyphenated copy the base word (the part on the line before) of a hyphenated word into the given word. This function assumes that word is not nullptr.
Definition at line 137 of file dict.h.
WERD_CHOICE * tesseract::Dict::dawg_permute_and_select | ( | const BLOB_CHOICE_LIST_VECTOR & | char_choices, |
float | rating_limit | ||
) |
Recursively explore all the possible character combinations in the given char_choices. Use go_deeper_dawg_fxn() to explore all the dawgs in the dawgs_ vector in parallel and discard invalid words.
Allocate and return a WERD_CHOICE with the best valid word found.
dawg_permute_and_select
Recursively explore all the possible character combinations in the given char_choices. Use go_deeper_dawg_fxn() to search all the dawgs in the dawgs_ vector in parallel and discard invalid words.
Allocate and return a WERD_CHOICE with the best valid word found.
Definition at line 174 of file permdawg.cpp.
void tesseract::Dict::DebugWordChoices | ( | ) |
Prints the current choices for this word to stdout.
int tesseract::Dict::def_letter_is_okay | ( | void * | void_dawg_args, |
const UNICHARSET & | unicharset, | ||
UNICHAR_ID | unichar_id, | ||
bool | word_end | ||
) | const |
Returns the maximal permuter code (from ccstruct/ratngs.h) if in light of the current state the letter at word_index in the given word is allowed according to at least one of the dawgs in dawgs_, otherwise returns NO_PERM.
The state is described by void_dawg_args, which are interpreted as DawgArgs and contain relevant active dawg positions. Each entry in the active_dawgs vector contains an index into the dawgs_ vector and an EDGE_REF that indicates the last edge followed in the dawg. It also may contain a position in the punctuation dawg which describes surrounding punctuation (see struct DawgPosition).
Input: At word_index 0 dawg_args->active_dawgs should contain an entry for each dawg that may start at the beginning of a word, with punc_ref and edge_ref initialized to NO_EDGE. Since the punctuation dawg includes the empty pattern " " (meaning anything without surrounding punctuation), having a single entry for the punctuation dawg will cover all dawgs reachable therefrom – that includes all number and word dawgs. The only dawg non-reachable from the punctuation_dawg is the pattern dawg. If hyphen state needs to be applied, initial dawg_args->active_dawgs can be copied from the saved hyphen state (maintained by Dict). For word_index > 0 the corresponding state (active_dawgs and punc position) can be obtained from dawg_args->updated_dawgs passed to def_letter_is_okay for word_index-1. Note: the function assumes that active_dawgs, and updated_dawgs member variables of dawg_args are not nullptr.
Output: The function fills in dawg_args->updated_dawgs vector with the entries for dawgs that contain the word up to the letter at word_index.
Definition at line 367 of file dict.cpp.
|
inline |
Default (no-op) implementation of probability in context function.
Definition at line 386 of file dict.h.
void tesseract::Dict::default_dawgs | ( | DawgPositionVector * | anylength_dawgs, |
bool | suppress_patterns | ||
) | const |
void tesseract::Dict::EndDangerousAmbigs | ( | ) |
Definition at line 358 of file stopper.cpp.
bool tesseract::Dict::fragment_state_okay | ( | UNICHAR_ID | curr_unichar_id, |
float | curr_rating, | ||
float | curr_certainty, | ||
const CHAR_FRAGMENT_INFO * | prev_char_frag_info, | ||
const char * | debug, | ||
int | word_ending, | ||
CHAR_FRAGMENT_INFO * | char_frag_info | ||
) |
Definition at line 320 of file permdawg.cpp.
|
inline |
|
inline |
|
inline |
|
inline |
|
inlinestatic |
Returns the appropriate next node given the EDGE_REF.
|
inline |
|
inline |
|
inline |
|
inline |
|
static |
Initialize Dict class - load dawgs from [lang].traineddata and user-specified wordlist and parttern list.
void tesseract::Dict::go_deeper_dawg_fxn | ( | const char * | debug, |
const BLOB_CHOICE_LIST_VECTOR & | char_choices, | ||
int | char_choice_index, | ||
const CHAR_FRAGMENT_INFO * | prev_char_frag_info, | ||
bool | word_ending, | ||
WERD_CHOICE * | word, | ||
float | certainties[], | ||
float * | limit, | ||
WERD_CHOICE * | best_choice, | ||
int * | attempts_left, | ||
void * | void_more_args | ||
) |
If the choice being composed so far could be a dictionary word and we have not reached the end of the word keep exploring the char_choices further.
Definition at line 50 of file permdawg.cpp.
int tesseract::Dict::good_choice | ( | const WERD_CHOICE & | choice | ) |
Returns true if a good answer is found for the unknown blob rating.
|
inline |
Check whether the word has a hyphen at the end.
Definition at line 144 of file dict.h.
|
inline |
Same as above, but check the unichar at the end of the word.
Definition at line 152 of file dict.h.
|
inline |
Size of the base word (the part on the line before) of a hyphenated word.
Definition at line 131 of file dict.h.
|
inline |
void tesseract::Dict::init_active_dawgs | ( | DawgPositionVector * | active_dawgs, |
bool | ambigs_mode | ||
) | const |
Fill the given active_dawgs vector with dawgs that could contain the beginning of the word. If hyphenated() returns true, copy the entries from hyphen_active_dawgs_ instead.
Definition at line 569 of file dict.cpp.
|
inline |
Definition at line 118 of file dict.h.
bool tesseract::Dict::IsSpaceDelimitedLang | ( | ) | const |
Returns true if the language is space-delimited (not CJ, or T).
Definition at line 857 of file dict.cpp.
int tesseract::Dict::LengthOfShortestAlphaRun | ( | const WERD_CHOICE & | WordChoice | ) | const |
Returns the length of the shortest alpha run in WordChoice.
Definition at line 442 of file stopper.cpp.
|
inline |
Calls letter_is_okay_ member function.
Definition at line 361 of file dict.h.
void tesseract::Dict::Load | ( | const STRING & | lang, |
TessdataManager * | data_file | ||
) |
Definition at line 219 of file dict.cpp.
void tesseract::Dict::LoadLSTM | ( | const STRING & | lang, |
TessdataManager * | data_file | ||
) |
Definition at line 302 of file dict.cpp.
double tesseract::Dict::ngram_probability_in_context | ( | const char * | lang, |
const char * | context, | ||
int | context_bytes, | ||
const char * | character, | ||
int | character_bytes | ||
) |
bool tesseract::Dict::NoDangerousAmbig | ( | WERD_CHOICE * | BestChoice, |
DANGERR * | fixpt, | ||
bool | fix_replaceable, | ||
MATRIX * | ratings | ||
) |
Definition at line 142 of file stopper.cpp.
|
inline |
float tesseract::Dict::ParamsModelClassify | ( | const char * | lang, |
void * | path | ||
) |
void tesseract::Dict::permute_choices | ( | const char * | debug, |
const BLOB_CHOICE_LIST_VECTOR & | char_choices, | ||
int | char_choice_index, | ||
const CHAR_FRAGMENT_INFO * | prev_char_frag_info, | ||
WERD_CHOICE * | word, | ||
float | certainties[], | ||
float * | limit, | ||
WERD_CHOICE * | best_choice, | ||
int * | attempts_left, | ||
void * | more_args | ||
) |
permute_choices
Call append_choices() for each BLOB_CHOICE in BLOB_CHOICE_LIST with the given char_choice_index in char_choices.
Definition at line 203 of file permdawg.cpp.
|
inline |
Calls probability_in_context_ member function.
Definition at line 375 of file dict.h.
void tesseract::Dict::ProcessPatternEdges | ( | const Dawg * | dawg, |
const DawgPosition & | info, | ||
UNICHAR_ID | unichar_id, | ||
bool | word_end, | ||
DawgArgs * | dawg_args, | ||
PermuterType * | current_permuter | ||
) | const |
For each of the character classes of the given unichar_id (and the unichar_id itself) finds the corresponding outgoing node or self-loop in the given dawg and (after checking that it is valid) records it in dawg_args->updated_ative_dawgs. Updates current_permuter if any valid edges were found.
Definition at line 531 of file dict.cpp.
void tesseract::Dict::ReplaceAmbig | ( | int | wrong_ngram_begin_index, |
int | wrong_ngram_size, | ||
UNICHAR_ID | correct_ngram_id, | ||
WERD_CHOICE * | werd_choice, | ||
MATRIX * | ratings | ||
) |
Definition at line 368 of file stopper.cpp.
void tesseract::Dict::reset_hyphen_vars | ( | bool | last_word_on_line | ) |
Unless the previous word was the last one on the line, and the current one is not (thus it is the first one on the line), erase hyphen_word_, clear hyphen_active_dawgs_, update last_word_on_line_.
Definition at line 28 of file hyphen.cpp.
void tesseract::Dict::set_hyphen_word | ( | const WERD_CHOICE & | word, |
const DawgPositionVector & | active_dawgs | ||
) |
Update hyphen_word_, and copy the given DawgPositionVectors into hyphen_active_dawgs_ .
Definition at line 45 of file hyphen.cpp.
void tesseract::Dict::SettupStopperPass1 | ( | ) |
Sets up stopper variables in preparation for the first pass.
Definition at line 360 of file stopper.cpp.
void tesseract::Dict::SettupStopperPass2 | ( | ) |
Sets up stopper variables in preparation for the second pass.
Definition at line 364 of file stopper.cpp.
void tesseract::Dict::SetupForLoad | ( | DawgCache * | dawg_cache | ) |
Definition at line 201 of file dict.cpp.
|
inline |
|
inline |
int tesseract::Dict::UniformCertainties | ( | const WERD_CHOICE & | word | ) |
Returns true if the certainty of the BestChoice word is within a reasonable range of the average certainties for the best choices for each character in the segmentation. This test is used to catch words in which one character is much worse than the other characters in the word (i.e. false will be returned in that case). The algorithm computes the mean and std deviation of the certainties in the word with the worst certainty thrown out.
Definition at line 461 of file stopper.cpp.
|
inline |
bool tesseract::Dict::valid_bigram | ( | const WERD_CHOICE & | word1, |
const WERD_CHOICE & | word2 | ||
) | const |
Definition at line 787 of file dict.cpp.
bool tesseract::Dict::valid_punctuation | ( | const WERD_CHOICE & | word | ) |
Returns true if the word contains a valid punctuation pattern. Note: Since the domains of punctuation symbols and symblos used in numbers are not disjoint, a valid number might contain an invalid punctuation pattern (e.g. .99).
Definition at line 830 of file dict.cpp.
int tesseract::Dict::valid_word | ( | const WERD_CHOICE & | word, |
bool | numbers_ok | ||
) | const |
Definition at line 753 of file dict.cpp.
|
inline |
Definition at line 466 of file dict.h.
|
inline |
This function is used by api/tesseract_cube_combiner.cpp.
Definition at line 473 of file dict.h.
|
inline |
Definition at line 469 of file dict.h.
|
inlinestatic |
Check all the DAWGs to see if this word is in any of them.
Read/Write/Access special purpose dawgs which contain words only of a certain length (used for phrase search for non-space-delimited languages).
Definition at line 459 of file dict.h.
|
inline |
double tesseract::Dict::certainty_scale = 20.0 |
int tesseract::Dict::dawg_debug_level = 0 |
double tesseract::Dict::doc_dict_certainty_threshold = -2.25 |
double tesseract::Dict::doc_dict_pending_threshold = 0.0 |
int tesseract::Dict::fragments_debug = 0 |
void(Dict::* tesseract::Dict::go_deeper_fxn_) (const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args) |
int tesseract::Dict::hyphen_debug_level = 0 |
int(Dict::* tesseract::Dict::letter_is_okay_) (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const |
bool tesseract::Dict::load_bigram_dawg = true |
bool tesseract::Dict::load_freq_dawg = true |
bool tesseract::Dict::load_number_dawg = true |
bool tesseract::Dict::load_punc_dawg = true |
bool tesseract::Dict::load_system_dawg = true |
bool tesseract::Dict::load_unambig_dawg = true |
int tesseract::Dict::max_permuter_attempts = 10000 |
int tesseract::Dict::max_viterbi_list_size = 10 |
char* tesseract::Dict::output_ambig_words_file = "" |
float(Dict::* tesseract::Dict::params_model_classify_) (const char *lang, void *path) |
double(Dict::* tesseract::Dict::probability_in_context_) (const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
bool tesseract::Dict::save_doc_words = 0 |
bool tesseract::Dict::segment_nonalphabetic_script = false |
double tesseract::Dict::segment_penalty_dict_case_bad = 1.3125 |
double tesseract::Dict::segment_penalty_dict_case_ok = 1.1 |
double tesseract::Dict::segment_penalty_dict_frequent_word = 1.0 |
double tesseract::Dict::segment_penalty_dict_nonword = 1.25 |
double tesseract::Dict::segment_penalty_garbage = 1.50 |
double tesseract::Dict::stopper_allowable_character_badness = 3.0 |
double tesseract::Dict::stopper_certainty_per_char = -0.50 |
int tesseract::Dict::stopper_debug_level = 0 |
bool tesseract::Dict::stopper_no_acceptable_choices = false |
double tesseract::Dict::stopper_nondict_certainty_base = -2.50 |
double tesseract::Dict::stopper_phase2_certainty_rejection_offset = 1.0 |
int tesseract::Dict::stopper_smallword_size = 2 |
int tesseract::Dict::tessedit_truncate_wordchoice_log = 10 |
bool tesseract::Dict::use_only_first_uft8_step = false |
char* tesseract::Dict::user_patterns_file = "" |
char* tesseract::Dict::user_patterns_suffix = "" |
char* tesseract::Dict::user_words_file = "" |
char* tesseract::Dict::user_words_suffix = "" |
char* tesseract::Dict::word_to_debug = "" |
char* tesseract::Dict::word_to_debug_lengths = "" |
double tesseract::Dict::xheight_penalty_inconsistent = 0.25 |
double tesseract::Dict::xheight_penalty_subscripts = 0.125 |