#include <tesseractclass.h>
Public Member Functions | ||||||||||
Tesseract () | ||||||||||
~Tesseract () | ||||||||||
void | Clear () | |||||||||
void | ResetAdaptiveClassifier () | |||||||||
void | ResetDocumentDictionary () | |||||||||
void | SetEquationDetect (EquationDetect *detector) | |||||||||
const FCOORD & | reskew () const | |||||||||
Pix ** | mutable_pix_binary () | |||||||||
Pix * | pix_binary () const | |||||||||
Pix * | pix_grey () const | |||||||||
void | set_pix_grey (Pix *grey_pix) | |||||||||
Pix * | BestPix () const | |||||||||
void | set_pix_thresholds (Pix *thresholds) | |||||||||
int | source_resolution () const | |||||||||
void | set_source_resolution (int ppi) | |||||||||
int | ImageWidth () const | |||||||||
int | ImageHeight () const | |||||||||
Pix * | scaled_color () const | |||||||||
int | scaled_factor () const | |||||||||
void | SetScaledColor (int factor, Pix *color) | |||||||||
const Textord & | textord () const | |||||||||
Textord * | mutable_textord () | |||||||||
bool | right_to_left () const | |||||||||
int | num_sub_langs () const | |||||||||
Tesseract * | get_sub_lang (int index) const | |||||||||
bool | AnyTessLang () const | |||||||||
void | SetBlackAndWhitelist () | |||||||||
void | PrepareForPageseg () | |||||||||
void | PrepareForTessOCR (BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr) | |||||||||
int | SegmentPage (const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr) | |||||||||
void | SetupWordScripts (BLOCK_LIST *blocks) | |||||||||
int | AutoPageSeg (PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr) | |||||||||
ColumnFinder * | SetupPageSegAndDetectOrientation (PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix) | |||||||||
void | PrerecAllWordsPar (const GenericVector< WordData > &words) | |||||||||
bool | ProcessTargetWord (const TBOX &word_box, const TBOX &target_word_box, const char *word_config, int pass) | |||||||||
void | SetupAllWordsPassN (int pass_n, const TBOX *target_word_box, const char *word_config, PAGE_RES *page_res, GenericVector< WordData > *words) | |||||||||
void | SetupWordPassN (int pass_n, WordData *word) | |||||||||
bool | RecogAllWordsPassN (int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT *pr_it, GenericVector< WordData > *words) | |||||||||
bool | recog_all_words (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses) | |||||||||
void | rejection_passes (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config) | |||||||||
void | bigram_correction_pass (PAGE_RES *page_res) | |||||||||
void | blamer_pass (PAGE_RES *page_res) | |||||||||
void | script_pos_pass (PAGE_RES *page_res) | |||||||||
int | RetryWithLanguage (const WordData &word_data, WordRecognizer recognizer, WERD_RES **in_word, PointerVector< WERD_RES > *best_words) | |||||||||
bool | ReassignDiacritics (int pass, PAGE_RES_IT *pr_it, bool *make_next_word_fuzzy) | |||||||||
void | AssignDiacriticsToOverlappingBlobs (const GenericVector< C_OUTLINE * > &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< bool > *overlapped_any_blob, GenericVector< C_BLOB * > *target_blobs) | |||||||||
void | AssignDiacriticsToNewBlobs (const GenericVector< C_OUTLINE * > &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< C_BLOB * > *target_blobs) | |||||||||
bool | SelectGoodDiacriticOutlines (int pass, float certainty_threshold, PAGE_RES_IT *pr_it, C_BLOB *blob, const GenericVector< C_OUTLINE * > &outlines, int num_outlines, GenericVector< bool > *ok_outlines) | |||||||||
float | ClassifyBlobPlusOutlines (const GenericVector< bool > &ok_outlines, const GenericVector< C_OUTLINE * > &outlines, int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str) | |||||||||
float | ClassifyBlobAsWord (int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str, float *c2) | |||||||||
void | classify_word_and_language (int pass_n, PAGE_RES_IT *pr_it, WordData *word_data) | |||||||||
void | classify_word_pass1 (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) | |||||||||
void | recog_pseudo_word (PAGE_RES *page_res, TBOX &selection_box) | |||||||||
void | fix_rep_char (PAGE_RES_IT *page_res_it) | |||||||||
ACCEPTABLE_WERD_TYPE | acceptable_word_string (const UNICHARSET &char_set, const char *s, const char *lengths) | |||||||||
void | match_word_pass_n (int pass_n, WERD_RES *word, ROW *row, BLOCK *block) | |||||||||
void | classify_word_pass2 (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) | |||||||||
void | ReportXhtFixResult (bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word) | |||||||||
bool | RunOldFixXht (WERD_RES *word, BLOCK *block, ROW *row) | |||||||||
bool | TrainedXheightFix (WERD_RES *word, BLOCK *block, ROW *row) | |||||||||
bool | TestNewNormalization (int original_misfits, float baseline_shift, float new_x_ht, WERD_RES *word, BLOCK *block, ROW *row) | |||||||||
BOOL8 | recog_interactive (PAGE_RES_IT *pr_it) | |||||||||
void | set_word_fonts (WERD_RES *word) | |||||||||
void | font_recognition_pass (PAGE_RES *page_res) | |||||||||
void | dictionary_correction_pass (PAGE_RES *page_res) | |||||||||
BOOL8 | check_debug_pt (WERD_RES *word, int location) | |||||||||
bool | SubAndSuperscriptFix (WERD_RES *word_res) | |||||||||
void | GetSubAndSuperscriptCandidates (const WERD_RES *word, int *num_rebuilt_leading, ScriptPos *leading_pos, float *leading_certainty, int *num_rebuilt_trailing, ScriptPos *trailing_pos, float *trailing_certainty, float *avg_certainty, float *unlikely_threshold) | |||||||||
WERD_RES * | TrySuperscriptSplits (int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, bool *is_good, int *retry_leading, int *retry_trailing) | |||||||||
bool | BelievableSuperscript (bool debug, const WERD_RES &word, float certainty_threshold, int *left_ok, int *right_ok) const | |||||||||
void | output_pass (PAGE_RES_IT &page_res_it, const TBOX *target_word_box) | |||||||||
void | write_results (PAGE_RES_IT &page_res_it, char newline_type, BOOL8 force_eol) | |||||||||
void | set_unlv_suspects (WERD_RES *word) | |||||||||
UNICHAR_ID | get_rep_char (WERD_RES *word) | |||||||||
BOOL8 | acceptable_number_string (const char *s, const char *lengths) | |||||||||
inT16 | count_alphanums (const WERD_CHOICE &word) | |||||||||
inT16 | count_alphas (const WERD_CHOICE &word) | |||||||||
void | read_config_file (const char *filename, SetParamConstraint constraint) | |||||||||
int | init_tesseract (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params) | |||||||||
int | init_tesseract (const char *datapath, const char *language, OcrEngineMode oem) | |||||||||
int | init_tesseract_internal (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params) | |||||||||
void | SetupUniversalFontIds () | |||||||||
int | init_tesseract_lm (const char *arg0, const char *textbase, const char *language) | |||||||||
void | recognize_page (STRING &image_name) | |||||||||
void | end_tesseract () | |||||||||
bool | init_tesseract_lang_data (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params) | |||||||||
void | ParseLanguageString (const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load) | |||||||||
SVMenuNode * | build_menu_new () | |||||||||
void | pgeditor_main (int width, int height, PAGE_RES *page_res) | |||||||||
void | process_image_event (const SVEvent &event) | |||||||||
BOOL8 | process_cmd_win_event (inT32 cmd_event, char *new_value) | |||||||||
void | debug_word (PAGE_RES *page_res, const TBOX &selection_box) | |||||||||
void | do_re_display (BOOL8(tesseract::Tesseract::*word_painter)(PAGE_RES_IT *pr_it)) | |||||||||
BOOL8 | word_display (PAGE_RES_IT *pr_it) | |||||||||
BOOL8 | word_bln_display (PAGE_RES_IT *pr_it) | |||||||||
BOOL8 | word_blank_and_set_display (PAGE_RES_IT *pr_its) | |||||||||
BOOL8 | word_set_display (PAGE_RES_IT *pr_it) | |||||||||
BOOL8 | word_dumper (PAGE_RES_IT *pr_it) | |||||||||
void | blob_feature_display (PAGE_RES *page_res, const TBOX &selection_box) | |||||||||
void | make_reject_map (WERD_RES *word, ROW *row, inT16 pass) | |||||||||
BOOL8 | one_ell_conflict (WERD_RES *word_res, BOOL8 update_map) | |||||||||
inT16 | first_alphanum_index (const char *word, const char *word_lengths) | |||||||||
inT16 | first_alphanum_offset (const char *word, const char *word_lengths) | |||||||||
inT16 | alpha_count (const char *word, const char *word_lengths) | |||||||||
BOOL8 | word_contains_non_1_digit (const char *word, const char *word_lengths) | |||||||||
void | dont_allow_1Il (WERD_RES *word) | |||||||||
inT16 | count_alphanums (WERD_RES *word) | |||||||||
void | flip_0O (WERD_RES *word) | |||||||||
BOOL8 | non_0_digit (const UNICHARSET &ch_set, UNICHAR_ID unichar_id) | |||||||||
BOOL8 | non_O_upper (const UNICHARSET &ch_set, UNICHAR_ID unichar_id) | |||||||||
BOOL8 | repeated_nonalphanum_wd (WERD_RES *word, ROW *row) | |||||||||
void | nn_match_word (WERD_RES *word, ROW *row) | |||||||||
void | nn_recover_rejects (WERD_RES *word, ROW *row) | |||||||||
void | set_done (WERD_RES *word, inT16 pass) | |||||||||
inT16 | safe_dict_word (const WERD_RES *werd_res) | |||||||||
void | flip_hyphens (WERD_RES *word) | |||||||||
void | reject_I_1_L (WERD_RES *word) | |||||||||
void | reject_edge_blobs (WERD_RES *word) | |||||||||
void | reject_mostly_rejects (WERD_RES *word) | |||||||||
BOOL8 | word_adaptable (WERD_RES *word, uinT16 mode) | |||||||||
void | recog_word_recursive (WERD_RES *word) | |||||||||
void | recog_word (WERD_RES *word) | |||||||||
void | split_and_recog_word (WERD_RES *word) | |||||||||
void | split_word (WERD_RES *word, int split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const | |||||||||
void | join_words (WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const | |||||||||
void | match_current_words (WERD_RES_LIST &words, ROW *row, BLOCK *block) | |||||||||
inT16 | fp_eval_word_spacing (WERD_RES_LIST &word_res_list) | |||||||||
void | dump_words (WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved) | |||||||||
BOOL8 | fixspace_thinks_word_done (WERD_RES *word) | |||||||||
GARBAGE_LEVEL | garbage_word (WERD_RES *word, BOOL8 ok_dict_word) | |||||||||
BOOL8 | potential_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level, BOOL8 ok_dict_word) | |||||||||
void | tilde_crunch (PAGE_RES_IT &page_res_it) | |||||||||
void | unrej_good_quality_words (PAGE_RES_IT &page_res_it) | |||||||||
void | doc_and_block_rejection (PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc) | |||||||||
void | quality_based_rejection (PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc) | |||||||||
void | convert_bad_unlv_chs (WERD_RES *word_res) | |||||||||
void | tilde_delete (PAGE_RES_IT &page_res_it) | |||||||||
inT16 | word_blob_quality (WERD_RES *word, ROW *row) | |||||||||
void | word_char_quality (WERD_RES *word, ROW *row, inT16 *match_count, inT16 *accepted_match_count) | |||||||||
void | unrej_good_chs (WERD_RES *word, ROW *row) | |||||||||
inT16 | count_outline_errs (char c, inT16 outline_count) | |||||||||
inT16 | word_outline_errs (WERD_RES *word) | |||||||||
BOOL8 | terrible_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level) | |||||||||
CRUNCH_MODE | word_deletable (WERD_RES *word, inT16 &delete_mode) | |||||||||
inT16 | failure_count (WERD_RES *word) | |||||||||
BOOL8 | noise_outlines (TWERD *word) | |||||||||
void | tess_segment_pass_n (int pass_n, WERD_RES *word) | |||||||||
PAGE_RES * | ApplyBoxes (const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list) | |||||||||
void | PreenXHeights (BLOCK_LIST *block_list) | |||||||||
PAGE_RES * | SetupApplyBoxes (const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list) | |||||||||
void | MaximallyChopWord (const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
bool | ResegmentCharBox (PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box, const TBOX &next_box, const char *correct_text) | |||||||||
bool | ResegmentWordBox (BLOCK_LIST *block_list, const TBOX &box, const TBOX &next_box, const char *correct_text) | |||||||||
void | ReSegmentByClassification (PAGE_RES *page_res) | |||||||||
bool | ConvertStringToUnichars (const char *utf8, GenericVector< UNICHAR_ID > *class_ids) | |||||||||
bool | FindSegmentation (const GenericVector< UNICHAR_ID > &target_text, WERD_RES *word_res) | |||||||||
void | SearchForText (const GenericVector< BLOB_CHOICE_LIST * > *choices, int choices_pos, int choices_length, const GenericVector< UNICHAR_ID > &target_text, int text_index, float rating, GenericVector< int > *segmentation, float *best_rating, GenericVector< int > *best_segmentation) | |||||||||
void | TidyUp (PAGE_RES *page_res) | |||||||||
void | ReportFailedBox (int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg) | |||||||||
void | CorrectClassifyWords (PAGE_RES *page_res) | |||||||||
void | ApplyBoxTraining (const STRING &fontname, PAGE_RES *page_res) | |||||||||
int | CountMisfitTops (WERD_RES *word_res) | |||||||||
float | ComputeCompatibleXheight (WERD_RES *word_res, float *baseline_shift) | |||||||||
FILE * | init_recog_training (const STRING &fname) | |||||||||
void | recog_training_segmented (const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) | |||||||||
void | ambigs_classify_and_output (const char *label, PAGE_RES_IT *pr_it, FILE *output_file) | |||||||||
CubeRecoContext * | GetCubeRecoContext () | |||||||||
init_cube_objects | ||||||||||
Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner. Returns false if cube context could not be created or if load_combiner is true, but the combiner could not be loaded. | ||||||||||
bool | init_cube_objects (bool load_combiner, TessdataManager *tessdata_manager) | |||||||||
run_cube_combiner | ||||||||||
Iterates through tesseract's results and calls cube on each word, combining the results with the existing tesseract result. | ||||||||||
void | run_cube_combiner (PAGE_RES *page_res) | |||||||||
cube_word_pass1 | ||||||||||
Recognizes a single word using (only) cube. Compatible with Tesseract's classify_word_pass1/classify_word_pass2. | ||||||||||
void | cube_word_pass1 (BLOCK *block, ROW *row, WERD_RES *word) | |||||||||
cube_recognize_word | ||||||||||
Cube recognizer to recognize a single word as with classify_word_pass1 but also returns the cube object in case the combiner is needed. | ||||||||||
CubeObject * | cube_recognize_word (BLOCK *block, WERD_RES *word) | |||||||||
cube_combine_word | ||||||||||
Combines the cube and tesseract results for a single word, leaving the result in tess_word. | ||||||||||
void | cube_combine_word (CubeObject *cube_obj, WERD_RES *cube_word, WERD_RES *tess_word) | |||||||||
cube_recognize | ||||||||||
Call cube on the current word, and write the result to word. Sets up a fake result and returns false if something goes wrong. | ||||||||||
bool | cube_recognize (CubeObject *cube_obj, BLOCK *block, WERD_RES *word) | |||||||||
fill_werd_res | ||||||||||
Fill Tesseract's word result fields with cube's. | ||||||||||
void | fill_werd_res (const BoxWord &cube_box_word, const char *cube_best_str, WERD_RES *tess_werd_res) | |||||||||
extract_cube_state | ||||||||||
Extract CharSamp objects and character bounding boxes from the CubeObject's state. The caller should free both structres. | ||||||||||
bool | extract_cube_state (CubeObject *cube_obj, int *num_chars, Boxa **char_boxes, CharSamp ***char_samples) | |||||||||
create_cube_box_word | ||||||||||
bool | create_cube_box_word (Boxa *char_boxes, int num_chars, TBOX word_box, BoxWord *box_word) | |||||||||
eval_word_spacing() | ||||||||||
The basic measure is the number of characters in contextually confirmed words. (I.e the word is done) If all words are contextually confirmed the evaluation is deemed perfect. Some fiddles are done to handle "1"s as these are VERY frequent causes of fuzzy spaces. The problem with the basic measure is that "561 63" would score the same as "56163", though given our knowledge that the space is fuzzy, and that there is a "1" next to the fuzzy space, we need to ensure that "56163" is prefered. The solution is to NOT COUNT the score of any word which has a digit at one end and a "1Il" as the character the other side of the space. Conversly, any character next to a "1" within a word is counted as a positive score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of the "1" joined). "56163" would score 7 - all chars in a numeric word + 2 sides of a "1" joined. The joined 1 rule is applied to any word REGARDLESS of contextual confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2. | ||||||||||
BOOL8 | digit_or_numeric_punct (WERD_RES *word, int char_position) | |||||||||
inT16 | eval_word_spacing (WERD_RES_LIST &word_res_list) | |||||||||
fix_sp_fp_word() | ||||||||||
Test the current word to see if it can be split by deleting noise blobs. If so, do the business. Return with the iterator pointing to the same place if the word is unchanged, or the last of the replacement words. | ||||||||||
void | fix_noisy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) | |||||||||
void | fix_sp_fp_word (WERD_RES_IT &word_res_it, ROW *row, BLOCK *block) | |||||||||
inT16 | worst_noise_blob (WERD_RES *word_res, float *worst_noise_score) | |||||||||
float | blob_noise_score (TBLOB *blob) | |||||||||
void | break_noisiest_blob_word (WERD_RES_LIST &words) | |||||||||
fix_fuzzy_spaces() | ||||||||||
Walk over the page finding sequences of words joined by fuzzy spaces. Extract them as a sublist, process the sublist to find the optimal arrangement of spaces then replace the sublist in the ROW_RES.
| ||||||||||
void | fix_fuzzy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) | |||||||||
void | fix_fuzzy_spaces (ETEXT_DESC *monitor, inT32 word_count, PAGE_RES *page_res) | |||||||||
process_selected_words() | ||||||||||
Walk the current block list applying the specified word processor function to each word that overlaps the selection_box. | ||||||||||
void | process_selected_words (PAGE_RES *page_res, TBOX &selection_box, BOOL8(tesseract::Tesseract::*word_processor)(PAGE_RES_IT *pr_it)) | |||||||||
tess_add_doc_word | ||||||||||
Add the given word to the document dictionary | ||||||||||
void | tess_add_doc_word (WERD_CHOICE *word_choice) | |||||||||
tess_acceptable_word | ||||||||||
| ||||||||||
bool | tess_acceptable_word (WERD_RES *word) | |||||||||
Public Member Functions inherited from tesseract::Wordrec | ||||||||||
Wordrec () | ||||||||||
virtual | ~Wordrec () | |||||||||
void | SaveAltChoices (const LIST &best_choices, WERD_RES *word) | |||||||||
void | FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | |||||||||
void | CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | |||||||||
void | SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | |||||||||
void | WordSearch (WERD_RES *word_res) | |||||||||
void | InitialSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | |||||||||
void | DoSegSearch (WERD_RES *word_res) | |||||||||
SEAM * | attempt_blob_chop (TWERD *word, TBLOB *blob, inT32 blob_number, bool italic_blob, const GenericVector< SEAM * > &seams) | |||||||||
SEAM * | chop_numbered_blob (TWERD *word, inT32 blob_number, bool italic_blob, const GenericVector< SEAM * > &seams) | |||||||||
SEAM * | chop_overlapping_blob (const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number) | |||||||||
void | add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams) | |||||||||
void | choose_best_seam (SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) | |||||||||
void | combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) | |||||||||
SEAM * | pick_good_seam (TBLOB *blob) | |||||||||
void | try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) | |||||||||
void | try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) | |||||||||
PRIORITY | grade_split_length (register SPLIT *split) | |||||||||
PRIORITY | grade_sharpness (register SPLIT *split) | |||||||||
bool | near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt) | |||||||||
virtual BLOB_CHOICE_LIST * | classify_piece (const GenericVector< SEAM * > &seams, inT16 start, inT16 end, const char *description, TWERD *word, BlamerBundle *blamer_bundle) | |||||||||
void | merge_fragments (MATRIX *ratings, inT16 num_blobs) | |||||||||
void | get_fragment_lists (inT16 current_frag, inT16 current_row, inT16 start, inT16 num_frag_parts, inT16 num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists) | |||||||||
void | merge_and_put_fragment_lists (inT16 row, inT16 column, inT16 num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) | |||||||||
void | fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) | |||||||||
void | program_editup (const char *textbase, bool init_classifier, bool init_permute) | |||||||||
void | cc_recog (WERD_RES *word) | |||||||||
void | program_editdown (inT32 elasped_time) | |||||||||
void | set_pass1 () | |||||||||
void | set_pass2 () | |||||||||
int | end_recog () | |||||||||
BLOB_CHOICE_LIST * | call_matcher (TBLOB *blob) | |||||||||
int | dict_word (const WERD_CHOICE &word) | |||||||||
BLOB_CHOICE_LIST * | classify_blob (TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle) | |||||||||
PRIORITY | point_priority (EDGEPT *point) | |||||||||
void | add_point_to_list (PointHeap *point_heap, EDGEPT *point) | |||||||||
bool | is_inside_angle (EDGEPT *pt) | |||||||||
int | angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) | |||||||||
EDGEPT * | pick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) | |||||||||
void | prioritize_points (TESSLINE *outline, PointHeap *points) | |||||||||
void | new_min_point (EDGEPT *local_min, PointHeap *points) | |||||||||
void | new_max_point (EDGEPT *local_max, PointHeap *points) | |||||||||
void | vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points) | |||||||||
SEAM * | improve_one_blob (const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number) | |||||||||
SEAM * | chop_one_blob (const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number) | |||||||||
void | chop_word_main (WERD_RES *word) | |||||||||
void | improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending) | |||||||||
int | select_blob_to_split (const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment) | |||||||||
int | select_blob_to_split_from_fixpt (DANGERR *fixpt) | |||||||||
Public Member Functions inherited from tesseract::Classify | ||||||||||
Classify () | ||||||||||
virtual | ~Classify () | |||||||||
Dict & | getDict () | |||||||||
const ShapeTable * | shape_table () const | |||||||||
void | SetStaticClassifier (ShapeClassifier *static_classifier) | |||||||||
void | AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices) | |||||||||
bool | LargeSpeckle (const TBLOB &blob) | |||||||||
ADAPT_TEMPLATES | NewAdaptedTemplates (bool InitFromUnicharset) | |||||||||
int | GetFontinfoId (ADAPT_CLASS Class, uinT8 ConfigId) | |||||||||
int | PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results) | |||||||||
void | ReadNewCutoffs (FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs) | |||||||||
void | PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) | |||||||||
void | WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) | |||||||||
ADAPT_TEMPLATES | ReadAdaptedTemplates (FILE *File) | |||||||||
FLOAT32 | ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch) | |||||||||
void | FreeNormProtos () | |||||||||
NORM_PROTOS * | ReadNormProtos (FILE *File, inT64 end_offset) | |||||||||
void | ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class) | |||||||||
INT_TEMPLATES | CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset) | |||||||||
void | LearnWord (const char *fontname, WERD_RES *word) | |||||||||
void | LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word) | |||||||||
void | InitAdaptiveClassifier (bool load_pre_trained_templates) | |||||||||
void | InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) | |||||||||
void | AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results) | |||||||||
void | MasterMatcher (INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results) | |||||||||
void | ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results) | |||||||||
double | ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors) | |||||||||
void | ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) | |||||||||
void | AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results) | |||||||||
int | GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) | |||||||||
void | DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results) | |||||||||
PROTO_ID | MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) | |||||||||
int | MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) | |||||||||
void | MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob) | |||||||||
void | PrintAdaptiveMatchResults (const ADAPT_RESULTS &results) | |||||||||
void | RemoveExtraPuncs (ADAPT_RESULTS *Results) | |||||||||
void | RemoveBadMatches (ADAPT_RESULTS *Results) | |||||||||
void | SetAdaptiveThreshold (FLOAT32 Threshold) | |||||||||
void | ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features) | |||||||||
STRING | ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const | |||||||||
int | ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const | |||||||||
int | ShapeIDToClassID (int shape_id) const | |||||||||
UNICHAR_ID * | BaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) | |||||||||
int | CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results) | |||||||||
int | CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results) | |||||||||
UNICHAR_ID * | GetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass) | |||||||||
void | DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results) | |||||||||
void | AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates) | |||||||||
void | DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class) | |||||||||
bool | AdaptableWord (WERD_RES *word) | |||||||||
void | EndAdaptiveClassifier () | |||||||||
void | SettupPass1 () | |||||||||
void | SettupPass2 () | |||||||||
void | AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices) | |||||||||
void | ClassifyAsNoise (ADAPT_RESULTS *Results) | |||||||||
void | ResetAdaptiveClassifierInternal () | |||||||||
void | SwitchAdaptiveClassifier () | |||||||||
void | StartBackupAdaptiveClassifier () | |||||||||
int | GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array) | |||||||||
void | ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array) | |||||||||
bool | TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config) | |||||||||
void | UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob) | |||||||||
bool | AdaptiveClassifierIsFull () const | |||||||||
bool | AdaptiveClassifierIsEmpty () const | |||||||||
bool | LooksLikeGarbage (TBLOB *blob) | |||||||||
void | RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox) | |||||||||
void | ClearCharNormArray (uinT8 *char_norm_array) | |||||||||
void | ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array) | |||||||||
void | ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) | |||||||||
INT_TEMPLATES | ReadIntTemplates (FILE *File) | |||||||||
void | WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset) | |||||||||
CLASS_ID | GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id) | |||||||||
void | ShowMatchDisplay () | |||||||||
UnicityTable< FontInfo > & | get_fontinfo_table () | |||||||||
const UnicityTable< FontInfo > & | get_fontinfo_table () const | |||||||||
UnicityTable< FontSet > & | get_fontset_table () | |||||||||
void | NormalizeOutlines (LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale) | |||||||||
FEATURE_SET | ExtractOutlineFeatures (TBLOB *Blob) | |||||||||
FEATURE_SET | ExtractPicoFeatures (TBLOB *Blob) | |||||||||
FEATURE_SET | ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) | |||||||||
FEATURE_SET | ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) | |||||||||
void | LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) | |||||||||
bool | WriteTRFile (const STRING &filename) | |||||||||
Public Member Functions inherited from tesseract::CCStruct | ||||||||||
CCStruct () | ||||||||||
~CCStruct () | ||||||||||
Public Member Functions inherited from tesseract::CUtil | ||||||||||
CUtil () | ||||||||||
~CUtil () | ||||||||||
void | read_variables (const char *filename, bool global_only) | |||||||||
Public Member Functions inherited from tesseract::CCUtil | ||||||||||
CCUtil () | ||||||||||
virtual | ~CCUtil () | |||||||||
void | main_setup (const char *argv0, const char *basename) | |||||||||
CCUtil::main_setup - set location of tessdata and name of image. More... | ||||||||||
ParamsVectors * | params () | |||||||||
Additional Inherited Members | |
Static Public Member Functions inherited from tesseract::Classify | |
static void | SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info) |
static void | ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts) |
Static Public Attributes inherited from tesseract::CCStruct | |
static const double | kDescenderFraction = 0.25 |
static const double | kXHeightFraction = 0.5 |
static const double | kAscenderFraction = 0.25 |
static const double | kXHeightCapRatio |
Protected Member Functions inherited from tesseract::Wordrec | |
bool | SegSearchDone (int num_futile_classifications) |
void | UpdateSegSearchNodes (float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) |
void | ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle) |
void | ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending) |
void | InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug) |
Protected Attributes inherited from tesseract::Classify | |
IntegerMatcher | im_ |
FEATURE_DEFS_STRUCT | feature_defs_ |
ShapeTable * | shape_table_ |
Definition at line 170 of file tesseractclass.h.
tesseract::Tesseract::Tesseract | ( | ) |
Definition at line 57 of file tesseractclass.cpp.
tesseract::Tesseract::~Tesseract | ( | ) |
Definition at line 623 of file tesseractclass.cpp.
BOOL8 tesseract::Tesseract::acceptable_number_string | ( | const char * | s, |
const char * | lengths | ||
) |
Definition at line 421 of file output.cpp.
ACCEPTABLE_WERD_TYPE tesseract::Tesseract::acceptable_word_string | ( | const UNICHARSET & | char_set, |
const char * | s, | ||
const char * | lengths | ||
) |
Definition at line 1663 of file control.cpp.
inT16 tesseract::Tesseract::alpha_count | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 495 of file reject.cpp.
void tesseract::Tesseract::ambigs_classify_and_output | ( | const char * | label, |
PAGE_RES_IT * | pr_it, | ||
FILE * | output_file | ||
) |
Definition at line 203 of file recogtraining.cpp.
|
inline |
Definition at line 258 of file tesseractclass.h.
PAGE_RES * tesseract::Tesseract::ApplyBoxes | ( | const STRING & | fname, |
bool | find_segmentation, | ||
BLOCK_LIST * | block_list | ||
) |
Definition at line 117 of file applybox.cpp.
Calls LearnWord to extract features for labelled blobs within each word. Features are stored in an internal buffer.
Definition at line 796 of file applybox.cpp.
void tesseract::Tesseract::AssignDiacriticsToNewBlobs | ( | const GenericVector< C_OUTLINE * > & | outlines, |
int | pass, | ||
WERD * | real_word, | ||
PAGE_RES_IT * | pr_it, | ||
GenericVector< bool > * | word_wanted, | ||
GenericVector< C_BLOB * > * | target_blobs | ||
) |
Definition at line 1029 of file control.cpp.
void tesseract::Tesseract::AssignDiacriticsToOverlappingBlobs | ( | const GenericVector< C_OUTLINE * > & | outlines, |
int | pass, | ||
WERD * | real_word, | ||
PAGE_RES_IT * | pr_it, | ||
GenericVector< bool > * | word_wanted, | ||
GenericVector< bool > * | overlapped_any_blob, | ||
GenericVector< C_BLOB * > * | target_blobs | ||
) |
Definition at line 976 of file control.cpp.
int tesseract::Tesseract::AutoPageSeg | ( | PageSegMode | pageseg_mode, |
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | to_blocks, | ||
BLOBNBOX_LIST * | diacritic_blobs, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Auto page segmentation. Divide the page image into blocks of uniform text linespacing and images.
Resolution (in ppi) is derived from the input image.
The output goes in the blocks list with corresponding TO_BLOCKs in the to_blocks list.
If !PSM_COL_FIND_ENABLED(pageseg_mode), then no attempt is made to divide the image into columns, but multiple blocks are still made if the text is of non-uniform linespacing.
If diacritic_blobs is non-null, then diacritics/noise blobs, that would confuse layout anaylsis by causing textline overlap, are placed there, with the expectation that they will be reassigned to words later and noise/diacriticness determined via classification.
If osd (orientation and script detection) is true then that is performed as well. If only_osd is true, then only orientation and script detection is performed. If osd is desired, (osd or only_osd) then osr_tess must be another Tesseract that was initialized especially for osd, and the results will be output into osr (orientation and script result).
Definition at line 232 of file pagesegmain.cpp.
bool tesseract::Tesseract::BelievableSuperscript | ( | bool | debug, |
const WERD_RES & | word, | ||
float | certainty_threshold, | ||
int * | left_ok, | ||
int * | right_ok | ||
) | const |
Return whether this is believable superscript or subscript text.
We insist that:
[in] | debug | If true, spew debug output |
[in] | word | The word whose best_choice we're evaluating |
[in] | certainty_threshold | If any of the characters have less certainty than this, reject. |
[out] | left_ok | How many left-side characters were ok? |
[out] | right_ok | How many right-side characters were ok? |
Definition at line 520 of file superscript.cpp.
|
inline |
Definition at line 212 of file tesseractclass.h.
void tesseract::Tesseract::bigram_correction_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 442 of file control.cpp.
void tesseract::Tesseract::blamer_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 686 of file control.cpp.
Definition at line 960 of file pgedit.cpp.
float tesseract::Tesseract::blob_noise_score | ( | TBLOB * | blob | ) |
Definition at line 761 of file fixspace.cpp.
void tesseract::Tesseract::break_noisiest_blob_word | ( | WERD_RES_LIST & | words | ) |
break_noisiest_blob_word() Find the word with the blob which looks like the worst noise. Break the word into two, deleting the noise blob.
Definition at line 616 of file fixspace.cpp.
SVMenuNode * tesseract::Tesseract::build_menu_new | ( | ) |
Definition at line 257 of file pgedit.cpp.
Definition at line 1767 of file control.cpp.
void tesseract::Tesseract::classify_word_and_language | ( | int | pass_n, |
PAGE_RES_IT * | pr_it, | ||
WordData * | word_data | ||
) |
Definition at line 1268 of file control.cpp.
void tesseract::Tesseract::classify_word_pass1 | ( | const WordData & | word_data, |
WERD_RES ** | in_word, | ||
PointerVector< WERD_RES > * | out_words | ||
) |
classify_word_pass1
Baseline normalize the word and pass it to Tess.
Definition at line 1344 of file control.cpp.
void tesseract::Tesseract::classify_word_pass2 | ( | const WordData & | word_data, |
WERD_RES ** | in_word, | ||
PointerVector< WERD_RES > * | out_words | ||
) |
classify_word_pass2
Control what to do with the word in pass 2
Definition at line 1488 of file control.cpp.
float tesseract::Tesseract::ClassifyBlobAsWord | ( | int | pass_n, |
PAGE_RES_IT * | pr_it, | ||
C_BLOB * | blob, | ||
STRING * | best_str, | ||
float * | c2 | ||
) |
Definition at line 1232 of file control.cpp.
float tesseract::Tesseract::ClassifyBlobPlusOutlines | ( | const GenericVector< bool > & | ok_outlines, |
const GenericVector< C_OUTLINE * > & | outlines, | ||
int | pass_n, | ||
PAGE_RES_IT * | pr_it, | ||
C_BLOB * | blob, | ||
STRING * | best_str | ||
) |
Definition at line 1190 of file control.cpp.
void tesseract::Tesseract::Clear | ( | ) |
Definition at line 640 of file tesseractclass.cpp.
float tesseract::Tesseract::ComputeCompatibleXheight | ( | WERD_RES * | word_res, |
float * | baseline_shift | ||
) |
Definition at line 101 of file fixxht.cpp.
void tesseract::Tesseract::convert_bad_unlv_chs | ( | WERD_RES * | word_res | ) |
Definition at line 663 of file docqual.cpp.
bool tesseract::Tesseract::ConvertStringToUnichars | ( | const char * | utf8, |
GenericVector< UNICHAR_ID > * | class_ids | ||
) |
Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
Definition at line 535 of file applybox.cpp.
void tesseract::Tesseract::CorrectClassifyWords | ( | PAGE_RES * | page_res | ) |
Creates a fake best_choice entry in each WERD_RES with the correct text.
Definition at line 772 of file applybox.cpp.
inT16 tesseract::Tesseract::count_alphanums | ( | const WERD_CHOICE & | word | ) |
Definition at line 410 of file output.cpp.
Definition at line 558 of file reject.cpp.
inT16 tesseract::Tesseract::count_alphas | ( | const WERD_CHOICE & | word | ) |
Definition at line 400 of file output.cpp.
Definition at line 128 of file docqual.cpp.
int tesseract::Tesseract::CountMisfitTops | ( | WERD_RES * | word_res | ) |
Definition at line 69 of file fixxht.cpp.
bool tesseract::Tesseract::create_cube_box_word | ( | Boxa * | char_boxes, |
int | num_chars, | ||
TBOX | word_box, | ||
BoxWord * | box_word | ||
) |
Definition at line 116 of file cube_control.cpp.
void tesseract::Tesseract::cube_combine_word | ( | CubeObject * | cube_obj, |
WERD_RES * | cube_word, | ||
WERD_RES * | tess_word | ||
) |
Definition at line 283 of file cube_control.cpp.
bool tesseract::Tesseract::cube_recognize | ( | CubeObject * | cube_obj, |
BLOCK * | block, | ||
WERD_RES * | word | ||
) |
Definition at line 326 of file cube_control.cpp.
CubeObject * tesseract::Tesseract::cube_recognize_word | ( | BLOCK * | block, |
WERD_RES * | word | ||
) |
Definition at line 246 of file cube_control.cpp.
Definition at line 235 of file cube_control.cpp.
debug_word
Process the whole image, but load word_config_ for the selected word(s).
Definition at line 641 of file pgedit.cpp.
void tesseract::Tesseract::dictionary_correction_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 2015 of file control.cpp.
Definition at line 344 of file fixspace.cpp.
void tesseract::Tesseract::do_re_display | ( | BOOL8(tesseract::Tesseract::*)(PAGE_RES_IT *pr_it) | word_painter | ) |
Redisplay page
Definition at line 308 of file pgedit.cpp.
void tesseract::Tesseract::doc_and_block_rejection | ( | PAGE_RES_IT & | page_res_it, |
BOOL8 | good_quality_doc | ||
) |
Definition at line 235 of file docqual.cpp.
void tesseract::Tesseract::dont_allow_1Il | ( | WERD_RES * | word | ) |
Definition at line 526 of file reject.cpp.
void tesseract::Tesseract::dump_words | ( | WERD_RES_LIST & | perm, |
inT16 | score, | ||
inT16 | mode, | ||
BOOL8 | improved | ||
) |
Definition at line 450 of file fixspace.cpp.
void tesseract::Tesseract::end_tesseract | ( | ) |
Definition at line 471 of file tessedit.cpp.
inT16 tesseract::Tesseract::eval_word_spacing | ( | WERD_RES_LIST & | word_res_list | ) |
Definition at line 240 of file fixspace.cpp.
bool tesseract::Tesseract::extract_cube_state | ( | CubeObject * | cube_obj, |
int * | num_chars, | ||
Boxa ** | char_boxes, | ||
CharSamp *** | char_samples | ||
) |
Definition at line 65 of file cube_control.cpp.
Definition at line 969 of file docqual.cpp.
void tesseract::Tesseract::fill_werd_res | ( | const BoxWord & | cube_box_word, |
const char * | cube_best_str, | ||
WERD_RES * | tess_werd_res | ||
) |
Definition at line 413 of file cube_control.cpp.
bool tesseract::Tesseract::FindSegmentation | ( | const GenericVector< UNICHAR_ID > & | target_text, |
WERD_RES * | word_res | ||
) |
Resegments the word to achieve the target_text from the classifier. Returns false if the re-segmentation fails. Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and applies a full search on the classifier results to find the best classified segmentation. As a compromise to obtain better recall, 1-1 ambiguity substitutions ARE used.
Definition at line 559 of file applybox.cpp.
inT16 tesseract::Tesseract::first_alphanum_index | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 469 of file reject.cpp.
inT16 tesseract::Tesseract::first_alphanum_offset | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 482 of file reject.cpp.
void tesseract::Tesseract::fix_fuzzy_space_list | ( | WERD_RES_LIST & | best_perm, |
ROW * | row, | ||
BLOCK * | block | ||
) |
Definition at line 145 of file fixspace.cpp.
void tesseract::Tesseract::fix_fuzzy_spaces | ( | ETEXT_DESC * | monitor, |
inT32 | word_count, | ||
PAGE_RES * | page_res | ||
) |
Definition at line 48 of file fixspace.cpp.
void tesseract::Tesseract::fix_noisy_space_list | ( | WERD_RES_LIST & | best_perm, |
ROW * | row, | ||
BLOCK * | block | ||
) |
Definition at line 570 of file fixspace.cpp.
void tesseract::Tesseract::fix_rep_char | ( | PAGE_RES_IT * | page_res_it | ) |
fix_rep_char() The word is a repeated char. (Leader.) Find the repeated char character. Create the appropriate single-word or multi-word sequence according to the size of spaces in between blobs, and correct the classifications where some of the characters disagree with the majority.
Definition at line 1624 of file control.cpp.
Definition at line 536 of file fixspace.cpp.
Definition at line 504 of file fixspace.cpp.
void tesseract::Tesseract::flip_0O | ( | WERD_RES * | word | ) |
Definition at line 673 of file reject.cpp.
void tesseract::Tesseract::flip_hyphens | ( | WERD_RES * | word | ) |
Definition at line 616 of file reject.cpp.
void tesseract::Tesseract::font_recognition_pass | ( | PAGE_RES * | page_res | ) |
font_recognition_pass
Smooth the fonts for the document.
Definition at line 1958 of file control.cpp.
inT16 tesseract::Tesseract::fp_eval_word_spacing | ( | WERD_RES_LIST & | word_res_list | ) |
Definition at line 831 of file fixspace.cpp.
GARBAGE_LEVEL tesseract::Tesseract::garbage_word | ( | WERD_RES * | word, |
BOOL8 | ok_dict_word | ||
) |
Definition at line 683 of file docqual.cpp.
UNICHAR_ID tesseract::Tesseract::get_rep_char | ( | WERD_RES * | word | ) |
Definition at line 285 of file output.cpp.
|
inline |
Definition at line 254 of file tesseractclass.h.
|
inline |
Definition at line 1160 of file tesseractclass.h.
void tesseract::Tesseract::GetSubAndSuperscriptCandidates | ( | const WERD_RES * | word, |
int * | num_rebuilt_leading, | ||
ScriptPos * | leading_pos, | ||
float * | leading_certainty, | ||
int * | num_rebuilt_trailing, | ||
ScriptPos * | trailing_pos, | ||
float * | trailing_certainty, | ||
float * | avg_certainty, | ||
float * | unlikely_threshold | ||
) |
Determine how many characters (rebuilt blobs) on each end of a given word might plausibly be superscripts so SubAndSuperscriptFix can try to re-recognize them. Even if we find no whole blobs at either end, we will set *unlikely_threshold to a certainty that might be used to select "bad enough" outlier characters. If *unlikely_threshold is set to 0, though, there's really no hope.
[in] | word | The word to examine. |
[out] | num_rebuilt_leading | the number of rebuilt blobs at the start of the word which are all up or down and seem badly classified. |
[out] | leading_pos | "super" or "sub" (for debugging) |
[out] | leading_certainty | the worst certainty in the leading blobs. |
[out] | num_rebuilt_trailing | the number of rebuilt blobs at the end of the word which are all up or down and seem badly classified. |
[out] | trailing_pos | "super" or "sub" (for debugging) |
[out] | trailing_certainty | the worst certainty in the trailing blobs. |
[out] | avg_certainty | the average certainty of "normal" blobs in the word. |
[out] | unlikely_threshold | the threshold (on certainty) we used to select "bad enough" outlier characters. |
Definition at line 253 of file superscript.cpp.
|
inline |
Definition at line 228 of file tesseractclass.h.
|
inline |
Definition at line 225 of file tesseractclass.h.
bool tesseract::Tesseract::init_cube_objects | ( | bool | load_combiner, |
TessdataManager * | tessdata_manager | ||
) |
Definition at line 154 of file cube_control.cpp.
FILE * tesseract::Tesseract::init_recog_training | ( | const STRING & | fname | ) |
Definition at line 36 of file recogtraining.cpp.
int tesseract::Tesseract::init_tesseract | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params | ||
) |
Definition at line 285 of file tessedit.cpp.
|
inline |
Definition at line 487 of file tesseractclass.h.
int tesseract::Tesseract::init_tesseract_internal | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params | ||
) |
Definition at line 389 of file tessedit.cpp.
bool tesseract::Tesseract::init_tesseract_lang_data | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params | ||
) |
Definition at line 83 of file tessedit.cpp.
int tesseract::Tesseract::init_tesseract_lm | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language | ||
) |
Definition at line 460 of file tessedit.cpp.
void tesseract::Tesseract::join_words | ( | WERD_RES * | word, |
WERD_RES * | word2, | ||
BlamerBundle * | orig_bb | ||
) | const |
Definition at line 240 of file tfacepp.cpp.
Definition at line 196 of file fixspace.cpp.
void tesseract::Tesseract::match_word_pass_n | ( | int | pass_n, |
WERD_RES * | word, | ||
ROW * | row, | ||
BLOCK * | block | ||
) |
match_word_pass2
Baseline normalize the word and pass it to Tess.
Definition at line 1549 of file control.cpp.
void tesseract::Tesseract::MaximallyChopWord | ( | const GenericVector< TBOX > & | boxes, |
BLOCK * | block, | ||
ROW * | row, | ||
WERD_RES * | word_res | ||
) |
Tests the chopper by exhaustively running chop_one_blob. The word_res will contain filled chopped_word, seam_array, denorm, box_word and best_state for the maximally chopped word.
Definition at line 253 of file applybox.cpp.
|
inline |
Definition at line 191 of file tesseractclass.h.
|
inline |
Definition at line 244 of file tesseractclass.h.
Definition at line 981 of file docqual.cpp.
BOOL8 tesseract::Tesseract::non_0_digit | ( | const UNICHARSET & | ch_set, |
UNICHAR_ID | unichar_id | ||
) |
Definition at line 789 of file reject.cpp.
BOOL8 tesseract::Tesseract::non_O_upper | ( | const UNICHARSET & | ch_set, |
UNICHAR_ID | unichar_id | ||
) |
Definition at line 785 of file reject.cpp.
|
inline |
Definition at line 251 of file tesseractclass.h.
Definition at line 292 of file reject.cpp.
void tesseract::Tesseract::output_pass | ( | PAGE_RES_IT & | page_res_it, |
const TBOX * | target_word_box | ||
) |
Definition at line 68 of file output.cpp.
void tesseract::Tesseract::ParseLanguageString | ( | const char * | lang_str, |
GenericVector< STRING > * | to_load, | ||
GenericVector< STRING > * | not_to_load | ||
) |
Definition at line 249 of file tessedit.cpp.
void tesseract::Tesseract::pgeditor_main | ( | int | width, |
int | height, | ||
PAGE_RES * | page_res | ||
) |
Top level editor operation: Setup a new window and an according event handler
Definition at line 337 of file pgedit.cpp.
|
inline |
Definition at line 195 of file tesseractclass.h.
|
inline |
Definition at line 198 of file tesseractclass.h.
BOOL8 tesseract::Tesseract::potential_word_crunch | ( | WERD_RES * | word, |
GARBAGE_LEVEL | garbage_level, | ||
BOOL8 | ok_dict_word | ||
) |
Definition at line 545 of file docqual.cpp.
void tesseract::Tesseract::PreenXHeights | ( | BLOCK_LIST * | block_list | ) |
Any row xheight that is significantly different from the median is set to the median.
Definition at line 193 of file applybox.cpp.
void tesseract::Tesseract::PrepareForPageseg | ( | ) |
Definition at line 690 of file tesseractclass.cpp.
void tesseract::Tesseract::PrepareForTessOCR | ( | BLOCK_LIST * | block_list, |
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Definition at line 726 of file tesseractclass.cpp.
void tesseract::Tesseract::PrerecAllWordsPar | ( | const GenericVector< WordData > & | words | ) |
Definition at line 36 of file par_control.cpp.
Definition at line 397 of file pgedit.cpp.
void tesseract::Tesseract::process_image_event | ( | const SVEvent & | event | ) |
User has done something in the image window - mouse down or up. Work out what it is and do something with it. If DOWN - just remember where it was. If UP - for each word in the selected area do the operation defined by the current mode.
Definition at line 565 of file pgedit.cpp.
void tesseract::Tesseract::process_selected_words | ( | PAGE_RES * | page_res, |
TBOX & | selection_box, | ||
BOOL8(tesseract::Tesseract::*)(PAGE_RES_IT *pr_it) | word_processor | ||
) |
Definition at line 30 of file pagewalk.cpp.
bool tesseract::Tesseract::ProcessTargetWord | ( | const TBOX & | word_box, |
const TBOX & | target_word_box, | ||
const char * | word_config, | ||
int | pass | ||
) |
Definition at line 118 of file control.cpp.
void tesseract::Tesseract::quality_based_rejection | ( | PAGE_RES_IT & | page_res_it, |
BOOL8 | good_quality_doc | ||
) |
Definition at line 140 of file docqual.cpp.
void tesseract::Tesseract::read_config_file | ( | const char * | filename, |
SetParamConstraint | constraint | ||
) |
Definition at line 52 of file tessedit.cpp.
bool tesseract::Tesseract::ReassignDiacritics | ( | int | pass, |
PAGE_RES_IT * | pr_it, | ||
bool * | make_next_word_fuzzy | ||
) |
Definition at line 910 of file control.cpp.
bool tesseract::Tesseract::recog_all_words | ( | PAGE_RES * | page_res, |
ETEXT_DESC * | monitor, | ||
const TBOX * | target_word_box, | ||
const char * | word_config, | ||
int | dopasses | ||
) |
Walk the page_res, recognizing all the words. If monitor is not null, it is used as a progress monitor/timeout/cancel. If dopasses is 0, all recognition passes are run, 1 just pass 1, 2 passes2 and higher. If target_word_box is not null, special things are done to words that overlap the target_word_box: if word_config is not null, the word config file is read for just the target word(s), otherwise, on pass 2 and beyond ONLY the target words are processed (Jetsoft modification.) Returns false if we cancelled prematurely.
page_res | page structure |
monitor | progress monitor |
word_config | word_config file |
target_word_box | specifies just to extract a rectangle |
dopasses | 0 - all, 1 just pass 1, 2 passes 2 and higher |
Definition at line 287 of file control.cpp.
BOOL8 tesseract::Tesseract::recog_interactive | ( | PAGE_RES_IT * | pr_it | ) |
Recognize a single word in interactive mode.
pr_it | the page results iterator |
Definition at line 84 of file control.cpp.
Definition at line 68 of file control.cpp.
void tesseract::Tesseract::recog_training_segmented | ( | const STRING & | fname, |
PAGE_RES * | page_res, | ||
volatile ETEXT_DESC * | monitor, | ||
FILE * | output_file | ||
) |
Definition at line 79 of file recogtraining.cpp.
void tesseract::Tesseract::recog_word | ( | WERD_RES * | word | ) |
Definition at line 46 of file tfacepp.cpp.
void tesseract::Tesseract::recog_word_recursive | ( | WERD_RES * | word | ) |
Definition at line 110 of file tfacepp.cpp.
bool tesseract::Tesseract::RecogAllWordsPassN | ( | int | pass_n, |
ETEXT_DESC * | monitor, | ||
PAGE_RES_IT * | pr_it, | ||
GenericVector< WordData > * | words | ||
) |
Definition at line 207 of file control.cpp.
void tesseract::Tesseract::recognize_page | ( | STRING & | image_name | ) |
void tesseract::Tesseract::reject_edge_blobs | ( | WERD_RES * | word | ) |
Definition at line 263 of file reject.cpp.
void tesseract::Tesseract::reject_I_1_L | ( | WERD_RES * | word | ) |
Definition at line 191 of file reject.cpp.
void tesseract::Tesseract::reject_mostly_rejects | ( | WERD_RES * | word | ) |
Definition at line 573 of file reject.cpp.
void tesseract::Tesseract::rejection_passes | ( | PAGE_RES * | page_res, |
ETEXT_DESC * | monitor, | ||
const TBOX * | target_word_box, | ||
const char * | word_config | ||
) |
Definition at line 590 of file control.cpp.
Definition at line 582 of file reject.cpp.
void tesseract::Tesseract::ReportFailedBox | ( | int | boxfile_lineno, |
TBOX | box, | ||
const char * | box_ch, | ||
const char * | err_msg | ||
) |
void tesseract::Tesseract::ReportXhtFixResult | ( | bool | accept_new_word, |
float | new_x_ht, | ||
WERD_RES * | word, | ||
WERD_RES * | new_word | ||
) |
Definition at line 1381 of file control.cpp.
void tesseract::Tesseract::ReSegmentByClassification | ( | PAGE_RES * | page_res | ) |
Resegments the words by running the classifier in an attempt to find the correct segmentation that produces the required string.
Definition at line 509 of file applybox.cpp.
bool tesseract::Tesseract::ResegmentCharBox | ( | PAGE_RES * | page_res, |
const TBOX * | prev_box, | ||
const TBOX & | box, | ||
const TBOX & | next_box, | ||
const char * | correct_text | ||
) |
Gather consecutive blobs that match the given box into the best_state and corresponding correct_text.
Fights over which box owns which blobs are settled by pre-chopping and applying the blobs to box or next_box with the least non-overlap.
This means that occasionally, blobs may be incorrectly segmented if the chopper fails to find a suitable chop point.
Definition at line 340 of file applybox.cpp.
bool tesseract::Tesseract::ResegmentWordBox | ( | BLOCK_LIST * | block_list, |
const TBOX & | box, | ||
const TBOX & | next_box, | ||
const char * | correct_text | ||
) |
Consume all source blobs that strongly overlap the given box, putting them into a new word, with the correct_text label. Fights over which box owns which blobs are settled by applying the blobs to box or next_box with the least non-overlap.
Definition at line 438 of file applybox.cpp.
void tesseract::Tesseract::ResetAdaptiveClassifier | ( | ) |
Definition at line 660 of file tesseractclass.cpp.
void tesseract::Tesseract::ResetDocumentDictionary | ( | ) |
Definition at line 668 of file tesseractclass.cpp.
|
inline |
Definition at line 187 of file tesseractclass.h.
int tesseract::Tesseract::RetryWithLanguage | ( | const WordData & | word_data, |
WordRecognizer | recognizer, | ||
WERD_RES ** | in_word, | ||
PointerVector< WERD_RES > * | best_words | ||
) |
Definition at line 869 of file control.cpp.
|
inline |
Definition at line 248 of file tesseractclass.h.
void tesseract::Tesseract::run_cube_combiner | ( | PAGE_RES * | page_res | ) |
Definition at line 193 of file cube_control.cpp.
Definition at line 607 of file reject.cpp.
|
inline |
Definition at line 231 of file tesseractclass.h.
|
inline |
Definition at line 234 of file tesseractclass.h.
void tesseract::Tesseract::script_pos_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 710 of file control.cpp.
void tesseract::Tesseract::SearchForText | ( | const GenericVector< BLOB_CHOICE_LIST * > * | choices, |
int | choices_pos, | ||
int | choices_length, | ||
const GenericVector< UNICHAR_ID > & | target_text, | ||
int | text_index, | ||
float | rating, | ||
GenericVector< int > * | segmentation, | ||
float * | best_rating, | ||
GenericVector< int > * | best_segmentation | ||
) |
Recursive helper to find a match to the target_text (from text_index position) in the choices (from choices_pos position).
choices | is an array of GenericVectors, of length choices_length, with each element representing a starting position in the word, and the GenericVector holding classification results for a sequence of consecutive blobs, with index 0 being a single blob, index 1 being 2 blobs etc. |
choices_pos | |
choices_length | |
target_text | |
text_index | |
rating | |
segmentation | |
best_rating | |
best_segmentation |
Definition at line 629 of file applybox.cpp.
int tesseract::Tesseract::SegmentPage | ( | const STRING * | input_file, |
BLOCK_LIST * | blocks, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Segment the page according to the current value of tessedit_pageseg_mode. pix_binary_ is used as the source image and should not be NULL. On return the blocks list owns all the constructed page layout.
Definition at line 109 of file pagesegmain.cpp.
bool tesseract::Tesseract::SelectGoodDiacriticOutlines | ( | int | pass, |
float | certainty_threshold, | ||
PAGE_RES_IT * | pr_it, | ||
C_BLOB * | blob, | ||
const GenericVector< C_OUTLINE * > & | outlines, | ||
int | num_outlines, | ||
GenericVector< bool > * | ok_outlines | ||
) |
Definition at line 1105 of file control.cpp.
|
inline |
Definition at line 201 of file tesseractclass.h.
|
inline |
Definition at line 215 of file tesseractclass.h.
|
inline |
Definition at line 222 of file tesseractclass.h.
void tesseract::Tesseract::set_unlv_suspects | ( | WERD_RES * | word | ) |
Definition at line 307 of file output.cpp.
void tesseract::Tesseract::set_word_fonts | ( | WERD_RES * | word | ) |
set_word_fonts
Get the fonts for the word.
Definition at line 1880 of file control.cpp.
void tesseract::Tesseract::SetBlackAndWhitelist | ( | ) |
Definition at line 675 of file tesseractclass.cpp.
void tesseract::Tesseract::SetEquationDetect | ( | EquationDetect * | detector | ) |
Definition at line 654 of file tesseractclass.cpp.
|
inline |
Definition at line 237 of file tesseractclass.h.
void tesseract::Tesseract::SetupAllWordsPassN | ( | int | pass_n, |
const TBOX * | target_word_box, | ||
const char * | word_config, | ||
PAGE_RES * | page_res, | ||
GenericVector< WordData > * | words | ||
) |
If tesseract is to be run, sets the words up ready for it.
Definition at line 148 of file control.cpp.
PAGE_RES * tesseract::Tesseract::SetupApplyBoxes | ( | const GenericVector< TBOX > & | boxes, |
BLOCK_LIST * | block_list | ||
) |
Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: All fuzzy spaces are removed, and all the words are maximally chopped.
Definition at line 217 of file applybox.cpp.
ColumnFinder * tesseract::Tesseract::SetupPageSegAndDetectOrientation | ( | PageSegMode | pageseg_mode, |
BLOCK_LIST * | blocks, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr, | ||
TO_BLOCK_LIST * | to_blocks, | ||
Pix ** | photo_mask_pix, | ||
Pix ** | music_mask_pix | ||
) |
Sets up auto page segmentation, determines the orientation, and corrects it. Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to facilitate testing. photo_mask_pix is a pointer to a NULL pointer that will be filled on return with the leptonica photo mask, which must be pixDestroyed by the caller. to_blocks is an empty list that will be filled with (usually a single) block that is used during layout analysis. This ugly API is required because of the possibility of a unlv zone file. TODO(rays) clean this up. See AutoPageSeg for other arguments. The returned ColumnFinder must be deleted after use.
Definition at line 309 of file pagesegmain.cpp.
void tesseract::Tesseract::SetupUniversalFontIds | ( | ) |
Definition at line 439 of file tessedit.cpp.
void tesseract::Tesseract::SetupWordPassN | ( | int | pass_n, |
WordData * | word | ||
) |
Definition at line 171 of file control.cpp.
void tesseract::Tesseract::SetupWordScripts | ( | BLOCK_LIST * | blocks | ) |
|
inline |
Definition at line 219 of file tesseractclass.h.
void tesseract::Tesseract::split_and_recog_word | ( | WERD_RES * | word | ) |
Definition at line 144 of file tfacepp.cpp.
void tesseract::Tesseract::split_word | ( | WERD_RES * | word, |
int | split_pt, | ||
WERD_RES ** | right_piece, | ||
BlamerBundle ** | orig_blamer_bundle | ||
) | const |
Definition at line 182 of file tfacepp.cpp.
bool tesseract::Tesseract::SubAndSuperscriptFix | ( | WERD_RES * | word | ) |
Attempt to split off any high (or low) bits at the ends of the word with poor certainty and recognize them separately. If the certainty gets much better and other sanity checks pass, acccept.
This superscript fix is meant to be called in the second pass of recognition when we have tried once and already have a preliminary answer for word.
Definition at line 101 of file superscript.cpp.
BOOL8 tesseract::Tesseract::terrible_word_crunch | ( | WERD_RES * | word, |
GARBAGE_LEVEL | garbage_level | ||
) |
Definition at line 507 of file docqual.cpp.
bool tesseract::Tesseract::tess_acceptable_word | ( | WERD_RES * | word | ) |
Definition at line 69 of file tessbox.cpp.
void tesseract::Tesseract::tess_add_doc_word | ( | WERD_CHOICE * | word_choice | ) |
Definition at line 79 of file tessbox.cpp.
void tesseract::Tesseract::tess_segment_pass_n | ( | int | pass_n, |
WERD_RES * | word | ||
) |
Definition at line 39 of file tessbox.cpp.
bool tesseract::Tesseract::TestNewNormalization | ( | int | original_misfits, |
float | baseline_shift, | ||
float | new_x_ht, | ||
WERD_RES * | word, | ||
BLOCK * | block, | ||
ROW * | row | ||
) |
Definition at line 1437 of file control.cpp.
|
inline |
Definition at line 241 of file tesseractclass.h.
void tesseract::Tesseract::TidyUp | ( | PAGE_RES * | page_res | ) |
Definition at line 706 of file applybox.cpp.
void tesseract::Tesseract::tilde_crunch | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 421 of file docqual.cpp.
void tesseract::Tesseract::tilde_delete | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 593 of file docqual.cpp.
Definition at line 1402 of file control.cpp.
WERD_RES * tesseract::Tesseract::TrySuperscriptSplits | ( | int | num_chopped_leading, |
float | leading_certainty, | ||
ScriptPos | leading_pos, | ||
int | num_chopped_trailing, | ||
float | trailing_certainty, | ||
ScriptPos | trailing_pos, | ||
WERD_RES * | word, | ||
bool * | is_good, | ||
int * | retry_rebuild_leading, | ||
int * | retry_rebuild_trailing | ||
) |
Try splitting off the given number of (chopped) blobs from the front and back of the given word and recognizing the pieces.
[in] | num_chopped_leading | how many chopped blobs from the left end of the word to chop off and try recognizing as a superscript (or subscript) |
[in] | leading_certainty | the (minimum) certainty had by the characters in the original leading section. |
[in] | leading_pos | "super" or "sub" (for debugging) |
[in] | num_chopped_trailing | how many chopped blobs from the right end of the word to chop off and try recognizing as a superscript (or subscript) |
[in] | trailing_certainty | the (minimum) certainty had by the characters in the original trailing section. |
[in] | trailing_pos | "super" or "sub" (for debugging) |
[in] | word | the word to try to chop up. |
[out] | is_good | do we believe our result? |
[out] | retry_rebuild_leading,retry_rebuild_trailing | If non-zero, and !is_good, then the caller may have luck trying to split the returned word with this number of (rebuilt) leading and trailing blobs / unichars. |
Definition at line 382 of file superscript.cpp.
Definition at line 117 of file docqual.cpp.
void tesseract::Tesseract::unrej_good_quality_words | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 163 of file docqual.cpp.
Definition at line 45 of file adaptions.cpp.
BOOL8 tesseract::Tesseract::word_blank_and_set_display | ( | PAGE_RES_IT * | pr_its | ) |
Definition at line 717 of file pgedit.cpp.
BOOL8 tesseract::Tesseract::word_bln_display | ( | PAGE_RES_IT * | pr_it | ) |
Normalize word and display in word window
Definition at line 729 of file pgedit.cpp.
Definition at line 65 of file docqual.cpp.
void tesseract::Tesseract::word_char_quality | ( | WERD_RES * | word, |
ROW * | row, | ||
inT16 * | match_count, | ||
inT16 * | accepted_match_count | ||
) |
Definition at line 97 of file docqual.cpp.
BOOL8 tesseract::Tesseract::word_contains_non_1_digit | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 509 of file reject.cpp.
CRUNCH_MODE tesseract::Tesseract::word_deletable | ( | WERD_RES * | word, |
inT16 & | delete_mode | ||
) |
Definition at line 898 of file docqual.cpp.
BOOL8 tesseract::Tesseract::word_display | ( | PAGE_RES_IT * | pr_it | ) |
word_display() Word Processor
Display a word according to its display modes
Definition at line 761 of file pgedit.cpp.
BOOL8 tesseract::Tesseract::word_dumper | ( | PAGE_RES_IT * | pr_it | ) |
Dump members to the debug window
Definition at line 922 of file pgedit.cpp.
Definition at line 77 of file docqual.cpp.
BOOL8 tesseract::Tesseract::word_set_display | ( | PAGE_RES_IT * | pr_it | ) |
word_set_display() Word processor
Display word according to current display mode settings
Definition at line 946 of file pgedit.cpp.
Definition at line 681 of file fixspace.cpp.
void tesseract::Tesseract::write_results | ( | PAGE_RES_IT & | page_res_it, |
char | newline_type, | ||
BOOL8 | force_eol | ||
) |
Definition at line 132 of file output.cpp.
int tesseract::Tesseract::applybox_debug = 1 |
"Debug level"
Definition at line 817 of file tesseractclass.h.
char* tesseract::Tesseract::applybox_exposure_pattern = ".exp" |
"Exposure value follows this pattern in the image" " filename. The name of the image files are expected" " to be in the form [lang].[fontname].exp[num].tif"
Definition at line 822 of file tesseractclass.h.
bool tesseract::Tesseract::applybox_learn_chars_and_char_frags_mode = false |
"Learn both character fragments (as is done in the" " special low exposure mode) as well as unfragmented" " characters."
Definition at line 826 of file tesseractclass.h.
bool tesseract::Tesseract::applybox_learn_ngrams_mode = false |
"Each bounding box is assumed to contain ngrams. Only" " learn the ngrams whose outlines overlap horizontally."
Definition at line 829 of file tesseractclass.h.
int tesseract::Tesseract::applybox_page = 0 |
"Page number to apply boxes from"
Definition at line 818 of file tesseractclass.h.
double tesseract::Tesseract::bestrate_pruning_factor = 2.0 |
"Multiplying factor of" " current best rate to prune other hypotheses"
Definition at line 1103 of file tesseractclass.h.
int tesseract::Tesseract::bidi_debug = 0 |
"Debug level for BiDi"
Definition at line 816 of file tesseractclass.h.
bool tesseract::Tesseract::bland_unrej = false |
"unrej potential with no chekcs"
Definition at line 929 of file tesseractclass.h.
char* tesseract::Tesseract::chs_leading_punct = "('`\"" |
"Leading punctuation"
Definition at line 869 of file tesseractclass.h.
char* tesseract::Tesseract::chs_trailing_punct1 = ").,;:?!" |
"1st Trailing punctuation"
Definition at line 870 of file tesseractclass.h.
char* tesseract::Tesseract::chs_trailing_punct2 = ")'`\"" |
"2nd Trailing punctuation"
Definition at line 871 of file tesseractclass.h.
char* tesseract::Tesseract::conflict_set_I_l_1 = "Il1[]" |
"Il1 conflict set"
Definition at line 1043 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_accept_ok = true |
"Use acceptability in okstring"
Definition at line 958 of file tesseractclass.h.
int tesseract::Tesseract::crunch_debug = 0 |
"As it says"
Definition at line 967 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_cert = -10.0 |
"POTENTIAL crunch cert lt this"
Definition at line 947 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_high_word = 1.5 |
"Del if word gt xht x this above bl"
Definition at line 952 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_low_word = 0.5 |
"Del if word gt xht x this below bl"
Definition at line 953 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_max_ht = 3.0 |
"Del if word ht gt xht x this"
Definition at line 949 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_min_ht = 0.7 |
"Del if word ht lt xht x this"
Definition at line 948 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_min_width = 3.0 |
"Del if word width lt xht x this"
Definition at line 950 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_rating = 60 |
"POTENTIAL crunch rating lt this"
Definition at line 946 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_early_convert_bad_unlv_chs = false |
"Take out ~^ early?"
Definition at line 937 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_early_merge_tess_fails = true |
"Before word crunch?"
Definition at line 936 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_include_numerals = false |
"Fiddle alpha figures"
Definition at line 961 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_leave_accept_strings = false |
"Dont pot crunch sensible strings"
Definition at line 960 of file tesseractclass.h.
int tesseract::Tesseract::crunch_leave_lc_strings = 4 |
"Dont crunch words with long lower case strings"
Definition at line 963 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_leave_ok_strings = true |
"Dont touch sensible strings"
Definition at line 957 of file tesseractclass.h.
int tesseract::Tesseract::crunch_leave_uc_strings = 4 |
"Dont crunch words with long lower case strings"
Definition at line 965 of file tesseractclass.h.
int tesseract::Tesseract::crunch_long_repetitions = 3 |
"Crunch words with long repetitions"
Definition at line 966 of file tesseractclass.h.
double tesseract::Tesseract::crunch_poor_garbage_cert = -9.0 |
"crunch garbage cert lt this"
Definition at line 941 of file tesseractclass.h.
double tesseract::Tesseract::crunch_poor_garbage_rate = 60 |
"crunch garbage rating lt this"
Definition at line 942 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_pot_garbage = true |
"POTENTIAL crunch garbage"
Definition at line 945 of file tesseractclass.h.
int tesseract::Tesseract::crunch_pot_indicators = 1 |
"How many potential indicators needed"
Definition at line 956 of file tesseractclass.h.
double tesseract::Tesseract::crunch_pot_poor_cert = -8.0 |
"POTENTIAL crunch cert lt this"
Definition at line 944 of file tesseractclass.h.
double tesseract::Tesseract::crunch_pot_poor_rate = 40 |
"POTENTIAL crunch rating lt this"
Definition at line 943 of file tesseractclass.h.
int tesseract::Tesseract::crunch_rating_max = 10 |
"For adj length in rating per ch"
Definition at line 955 of file tesseractclass.h.
double tesseract::Tesseract::crunch_small_outlines_size = 0.6 |
"Small if lt xht x this"
Definition at line 954 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_terrible_garbage = true |
"As it says"
Definition at line 939 of file tesseractclass.h.
double tesseract::Tesseract::crunch_terrible_rating = 80.0 |
"crunch rating lt this"
Definition at line 938 of file tesseractclass.h.
int tesseract::Tesseract::cube_debug_level = 1 |
"Print cube debug info."
Definition at line 893 of file tesseractclass.h.
bool tesseract::Tesseract::debug_acceptable_wds = false |
"Dump word pass/fail chk"
Definition at line 868 of file tesseractclass.h.
int tesseract::Tesseract::debug_fix_space_level = 0 |
"Contextual fixspace debug"
Definition at line 973 of file tesseractclass.h.
int tesseract::Tesseract::debug_noise_removal = 0 |
"Debug reassignment of small outlines"
Definition at line 852 of file tesseractclass.h.
int tesseract::Tesseract::debug_x_ht_level = 0 |
"Reestimate debug"
Definition at line 867 of file tesseractclass.h.
bool tesseract::Tesseract::docqual_excuse_outline_errs = false |
"Allow outline errs in unrejection?"
Definition at line 897 of file tesseractclass.h.
bool tesseract::Tesseract::enable_new_segsearch = false |
"Enable new segmentation search path."
Definition at line 1143 of file tesseractclass.h.
bool tesseract::Tesseract::enable_noise_removal = true |
"Remove and conditionally reassign small outlines when they" " confuse layout analysis, determining diacritics vs noise"
Definition at line 851 of file tesseractclass.h.
char* tesseract::Tesseract::file_type = ".tif" |
"Filename extension"
Definition at line 1050 of file tesseractclass.h.
int tesseract::Tesseract::fixsp_done_mode = 1 |
"What constitues done for spacing"
Definition at line 972 of file tesseractclass.h.
int tesseract::Tesseract::fixsp_non_noise_limit = 1 |
"How many non-noise blbs either side?"
Definition at line 969 of file tesseractclass.h.
double tesseract::Tesseract::fixsp_small_outlines_size = 0.28 |
"Small if lt xht x this"
Definition at line 970 of file tesseractclass.h.
double tesseract::Tesseract::heuristic_max_char_wh_ratio = 2.0 |
"max char width-to-height ratio allowed in segmentation"
Definition at line 1141 of file tesseractclass.h.
double tesseract::Tesseract::heuristic_segcost_rating_base = 1.25 |
"base factor for adding segmentation cost into word rating." "It's a multiplying factor, the larger the value above 1, " "the bigger the effect of segmentation cost."
Definition at line 1132 of file tesseractclass.h.
double tesseract::Tesseract::heuristic_weight_rating = 1 |
"weight associated with char rating in combined cost of state"
Definition at line 1134 of file tesseractclass.h.
double tesseract::Tesseract::heuristic_weight_seamcut = 0 |
"weight associated with seam cut in combined cost of state"
Definition at line 1139 of file tesseractclass.h.
double tesseract::Tesseract::heuristic_weight_width = 1000.0 |
"weight associated with width evidence in combined cost of" " state"
Definition at line 1137 of file tesseractclass.h.
bool tesseract::Tesseract::hocr_font_info = false |
"Add font info to hocr output"
Definition at line 935 of file tesseractclass.h.
bool tesseract::Tesseract::include_page_breaks = false |
"Include page separator string in output text after each " "image/page."
Definition at line 1083 of file tesseractclass.h.
bool tesseract::Tesseract::interactive_display_mode = false |
"Run interactively?"
Definition at line 1049 of file tesseractclass.h.
int tesseract::Tesseract::language_model_fixed_length_choices_depth = 3 |
"Depth of blob choice lists to explore" " when fixed length dawgs are on"
Definition at line 1126 of file tesseractclass.h.
bool tesseract::Tesseract::load_fixed_length_dawgs = true |
"Load fixed length" " dawgs (e.g. for non-space delimited languages)"
Definition at line 1099 of file tesseractclass.h.
double tesseract::Tesseract::min_orientation_margin = 7.0 |
"Min acceptable orientation margin"
Definition at line 1061 of file tesseractclass.h.
int tesseract::Tesseract::min_sane_x_ht_pixels = 8 |
"Reject any x-ht lt or eq than this"
Definition at line 1044 of file tesseractclass.h.
bool tesseract::Tesseract::ngram_permuter_activated = false |
"Activate character-level n-gram-based permuter"
Definition at line 1122 of file tesseractclass.h.
double tesseract::Tesseract::noise_cert_basechar = -8.0 |
"Hingepoint for base char certainty"
Definition at line 855 of file tesseractclass.h.
double tesseract::Tesseract::noise_cert_disjoint = -2.5 |
"Hingepoint for disjoint certainty"
Definition at line 858 of file tesseractclass.h.
double tesseract::Tesseract::noise_cert_factor = 0.375 |
"Scaling on certainty diff from Hingepoint"
Definition at line 864 of file tesseractclass.h.
double tesseract::Tesseract::noise_cert_punc = -2.5 |
"Threshold for new punc char certainty"
Definition at line 861 of file tesseractclass.h.
int tesseract::Tesseract::noise_maxperblob = 8 |
"Max diacritics to apply to a blob"
Definition at line 865 of file tesseractclass.h.
int tesseract::Tesseract::noise_maxperword = 16 |
"Max diacritics to apply to a word"
Definition at line 866 of file tesseractclass.h.
char* tesseract::Tesseract::numeric_punctuation = ".," |
"Punct. chs expected WITHIN numbers"
Definition at line 975 of file tesseractclass.h.
int tesseract::Tesseract::ocr_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT |
"Whether to use the top-line splitting process for Devanagari " "documents while performing ocr."
Definition at line 811 of file tesseractclass.h.
char* tesseract::Tesseract::ok_repeated_ch_non_alphanum_wds = "-?*\075" |
"Allow NN to unrej"
Definition at line 1042 of file tesseractclass.h.
char* tesseract::Tesseract::outlines_2 = "ij!?%\":;" |
"Non standard number of outlines"
Definition at line 895 of file tesseractclass.h.
char* tesseract::Tesseract::outlines_odd = "%| " |
"Non standard number of outlines"
Definition at line 894 of file tesseractclass.h.
char* tesseract::Tesseract::page_separator = "\f" |
"Page separator (default is form feed control character)"
Definition at line 1085 of file tesseractclass.h.
int tesseract::Tesseract::pageseg_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT |
"Whether to use the top-line splitting process for Devanagari " "documents while performing page-segmentation."
Definition at line 807 of file tesseractclass.h.
int tesseract::Tesseract::paragraph_debug_level = 0 |
"Print paragraph debug info."
Definition at line 889 of file tesseractclass.h.
bool tesseract::Tesseract::paragraph_text_based = true |
"Run paragraph detection on the post-text-recognition " "(more accurate)"
Definition at line 892 of file tesseractclass.h.
bool tesseract::Tesseract::permute_chartype_word = 0 |
"Turn on character type (property) consistency permuter"
Definition at line 1115 of file tesseractclass.h.
bool tesseract::Tesseract::permute_debug = 0 |
"char permutation debug"
Definition at line 1101 of file tesseractclass.h.
bool tesseract::Tesseract::permute_fixed_length_dawg = 0 |
"Turn on fixed-length phrasebook search permuter"
Definition at line 1113 of file tesseractclass.h.
bool tesseract::Tesseract::permute_only_top = false |
"Run only the top choice permuter"
Definition at line 1123 of file tesseractclass.h.
bool tesseract::Tesseract::permute_script_word = 0 |
"Turn on word script consistency permuter"
Definition at line 1105 of file tesseractclass.h.
bool tesseract::Tesseract::poly_allow_detailed_fx = false |
"Allow feature extractors to see the original outline"
Definition at line 1065 of file tesseractclass.h.
bool tesseract::Tesseract::preserve_interword_spaces = false |
"Preserve multiple interword spaces"
Definition at line 1080 of file tesseractclass.h.
double tesseract::Tesseract::quality_blob_pc = 0.0 |
"good_quality_doc gte good blobs limit"
Definition at line 873 of file tesseractclass.h.
double tesseract::Tesseract::quality_char_pc = 0.95 |
"good_quality_doc gte good char limit"
Definition at line 876 of file tesseractclass.h.
int tesseract::Tesseract::quality_min_initial_alphas_reqd = 2 |
"alphas in a good word"
Definition at line 877 of file tesseractclass.h.
double tesseract::Tesseract::quality_outline_pc = 1.0 |
"good_quality_doc lte outline error limit"
Definition at line 875 of file tesseractclass.h.
double tesseract::Tesseract::quality_rej_pc = 0.08 |
"good_quality_doc lte rejection limit"
Definition at line 872 of file tesseractclass.h.
double tesseract::Tesseract::quality_rowrej_pc = 1.1 |
"good_quality_doc gte good char limit"
Definition at line 931 of file tesseractclass.h.
bool tesseract::Tesseract::rej_1Il_trust_permuter_type = true |
"Dont double check"
Definition at line 1033 of file tesseractclass.h.
bool tesseract::Tesseract::rej_1Il_use_dict_word = false |
"Use dictword test"
Definition at line 1032 of file tesseractclass.h.
bool tesseract::Tesseract::rej_alphas_in_number_perm = false |
"Extend permuter check"
Definition at line 1038 of file tesseractclass.h.
bool tesseract::Tesseract::rej_trust_doc_dawg = false |
"Use DOC dawg in 11l conf. detector"
Definition at line 1031 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_good_perm = true |
"Individual rejection control"
Definition at line 1036 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_sensible_wd = false |
"Extend permuter check"
Definition at line 1037 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_tess_accepted = true |
"Individual rejection control"
Definition at line 1034 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_tess_blanks = true |
"Individual rejection control"
Definition at line 1035 of file tesseractclass.h.
double tesseract::Tesseract::rej_whole_of_mostly_reject_word_fract = 0.85 |
"if >this fract"
Definition at line 1039 of file tesseractclass.h.
int tesseract::Tesseract::segment_debug = 0 |
"Debug the whole segmentation process"
Definition at line 1100 of file tesseractclass.h.
double tesseract::Tesseract::segment_reward_chartype = 0.97 |
"Score multipler for char type consistency within a word. "
Definition at line 1117 of file tesseractclass.h.
double tesseract::Tesseract::segment_reward_ngram_best_choice = 0.99 |
"Score multipler for ngram permuter's best choice" " (only used in the Han script path)."
Definition at line 1120 of file tesseractclass.h.
double tesseract::Tesseract::segment_reward_script = 0.95 |
"Score multipler for script consistency within a word. " "Being a 'reward' factor, it should be <= 1. " "Smaller value implies bigger reward."
Definition at line 1111 of file tesseractclass.h.
bool tesseract::Tesseract::segment_segcost_rating = 0 |
"incorporate segmentation cost in word rating?"
Definition at line 1107 of file tesseractclass.h.
double tesseract::Tesseract::segsearch_max_fixed_pitch_char_wh_ratio = 2.0 |
"Maximum character width-to-height ratio for" "fixed pitch fonts"
Definition at line 1146 of file tesseractclass.h.
double tesseract::Tesseract::subscript_max_y_top = 0.5 |
"Maximum top of a character measured as a multiple of x-height " "above the baseline for us to reconsider whether it's a " "subscript."
Definition at line 994 of file tesseractclass.h.
double tesseract::Tesseract::superscript_bettered_certainty = 0.97 |
"What reduction in " "badness do we think sufficient to choose a superscript over " "what we'd thought. For example, a value of 0.6 means we want " "to reduce badness of certainty by 40%"
Definition at line 986 of file tesseractclass.h.
int tesseract::Tesseract::superscript_debug = 0 |
"Debug level for sub & superscript fixer"
Definition at line 979 of file tesseractclass.h.
double tesseract::Tesseract::superscript_min_y_bottom = 0.3 |
"Minimum bottom of a character measured as a multiple of " "x-height above the baseline for us to reconsider whether it's " "a superscript."
Definition at line 998 of file tesseractclass.h.
double tesseract::Tesseract::superscript_scaledown_ratio = 0.4 |
"A superscript scaled down more than this is unbelievably " "small. For example, 0.3 means we expect the font size to " "be no smaller than 30% of the text line font size."
Definition at line 990 of file tesseractclass.h.
double tesseract::Tesseract::superscript_worse_certainty = 2.0 |
"How many times worse " "certainty does a superscript position glyph need to be for us " "to try classifying it as a char with a different baseline?"
Definition at line 982 of file tesseractclass.h.
double tesseract::Tesseract::suspect_accept_rating = -999.9 |
"Accept good rating limit"
Definition at line 1016 of file tesseractclass.h.
bool tesseract::Tesseract::suspect_constrain_1Il = false |
"UNLV keep 1Il chars rejected"
Definition at line 1014 of file tesseractclass.h.
int tesseract::Tesseract::suspect_level = 99 |
"Suspect marker level"
Definition at line 1009 of file tesseractclass.h.
double tesseract::Tesseract::suspect_rating_per_ch = 999.9 |
"Dont touch bad rating limit"
Definition at line 1015 of file tesseractclass.h.
int tesseract::Tesseract::suspect_short_words = 2 |
"Dont Suspect dict wds longer than this"
Definition at line 1013 of file tesseractclass.h.
int tesseract::Tesseract::suspect_space_level = 100 |
"Min suspect level for rejecting spaces"
Definition at line 1011 of file tesseractclass.h.
int tesseract::Tesseract::tessdata_manager_debug_level = 0 |
"Debug level for TessdataManager functions."
Definition at line 1053 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_adaption_debug = false |
"Generate and print debug information for adaption"
Definition at line 815 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_ambigs_training = false |
"Perform training for ambiguities"
Definition at line 803 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_bigram_debug = 0 |
"Amount of debug output for bigram " "correction."
Definition at line 848 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_char_blacklist = "" |
"Blacklist of chars not to recognize"
Definition at line 797 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_char_unblacklist = "" |
"List of chars to override tessedit_char_blacklist"
Definition at line 801 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_char_whitelist = "" |
"Whitelist of chars to recognize"
Definition at line 799 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_consistent_reps = true |
"Force all rep chars the same"
Definition at line 1023 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_boxfile = false |
"Output text with boxes"
Definition at line 1045 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_hocr = false |
"Write .html hOCR output file"
Definition at line 1005 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_pdf = false |
"Write .pdf output file"
Definition at line 1006 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_txt = true |
"Write .txt output file"
Definition at line 1004 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_block_rejection = false |
"Block and Row stats"
Definition at line 842 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_doc_rejection = false |
"Page stats"
Definition at line 926 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_fonts = false |
"Output font info per char"
Definition at line 841 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_quality_metrics = false |
"Output data to debug file"
Definition at line 928 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_display_outwords = false |
"Draw output words"
Definition at line 830 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dont_blkrej_good_wds = false |
"Use word segmentation quality metric"
Definition at line 915 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dont_rowrej_good_wds = false |
"Use word segmentation quality metric"
Definition at line 917 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dump_choices = false |
"Dump char choices"
Definition at line 831 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dump_pageseg_images = false |
"Dump intermediate images made during page segmentation"
Definition at line 787 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_enable_bigram_correction = true |
"Enable correction based on the word bigram dictionary."
Definition at line 844 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_enable_dict_correction = false |
"Enable single word correction based on the dictionary."
Definition at line 846 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_enable_doc_dict = true |
"Add words to the document dictionary"
Definition at line 840 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_fix_fuzzy_spaces = true |
"Try to improve fuzzy spaces"
Definition at line 834 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_fix_hyphens = true |
"Crunch double hyphens?"
Definition at line 837 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_flip_0O = true |
"Contextual 0O O0 flips"
Definition at line 1026 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_good_doc_still_rowrej_wd = 1.1 |
"rej good doc wd if more than this fraction rejected"
Definition at line 923 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_good_quality_unrej = true |
"Reduce rejection on good docs"
Definition at line 899 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_image_border = 2 |
"Rej blbs near image edge limit"
Definition at line 1040 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_init_config_only = false |
"Only initialize with the config file. Useful if the instance is " "not going to be used for OCR but say only for layout analysis."
Definition at line 1068 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_load_sublangs = "" |
"List of languages to load with this one"
Definition at line 1055 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_lower_flip_hyphen = 1.5 |
"Aspect ratio dot/hyphen test"
Definition at line 1028 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_make_boxes_from_boxes = false |
"Generate more boxes from boxed chars"
Definition at line 785 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_matcher_log = false |
"Log matcher activity"
Definition at line 883 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_minimal_rej_pass1 = false |
"Do minimal rejection on pass 1 output"
Definition at line 881 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_minimal_rejection = false |
"Only reject tess failures"
Definition at line 1017 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_ocr_engine_mode = tesseract::OEM_TESSERACT_ONLY |
"Which OCR engine(s) to run (Tesseract, Cube, both). Defaults" " to loading and running only Tesseract (no Cube, no combiner)." " (Values from OcrEngineMode enum in tesseractclass.h)"
Definition at line 795 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_ok_mode = 5 |
"Acceptance decision algorithm"
Definition at line 1097 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_override_permuter = true |
"According to dict_word"
Definition at line 1051 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_page_number = -1 |
"-1 -> All pages, else specifc page to process"
Definition at line 1047 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_pageseg_mode = PSM_SINGLE_BLOCK |
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," " 5=line, 6=word, 7=char" " (Values from PageSegMode enum in publictypes.h)"
Definition at line 791 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_parallelize = 0 |
"Run in parallel where possible"
Definition at line 1078 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_prefer_joined_punct = false |
"Reward punctation joins"
Definition at line 971 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_preserve_blk_rej_perfect_wds = true |
"Only rej partially rejected words in block rejection"
Definition at line 911 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_preserve_min_wd_len = 2 |
"Only preserve wds longer than this"
Definition at line 919 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_preserve_row_rej_perfect_wds = true |
"Only rej partially rejected words in row rejection"
Definition at line 913 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_redo_xheight = true |
"Check/Correct x-height"
Definition at line 838 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_reject_bad_qual_wds = true |
"Reject all bad quality wds"
Definition at line 925 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_block_percent = 45.00 |
"%rej allowed before rej whole block"
Definition at line 904 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_doc_percent = 65.00 |
"%rej allowed before rej whole doc"
Definition at line 902 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_reject_mode = 0 |
"Rejection algorithm"
Definition at line 1024 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_row_percent = 40.00 |
"%rej allowed before rej whole row"
Definition at line 906 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_rejection_debug = false |
"Adaption debug"
Definition at line 1025 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_resegment_from_boxes = false |
"Take segmentation and labeling from box file"
Definition at line 779 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_resegment_from_line_boxes = false |
"Conversion of word/line box file to char box file"
Definition at line 781 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_row_rej_good_docs = true |
"Apply row rejection to good docs"
Definition at line 921 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_tess_adaption_mode = 0x27 |
"Adaptation decision algorithm for tess"
Definition at line 879 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_test_adaption = false |
"Test adaption criteria"
Definition at line 882 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_test_adaption_mode = 3 |
"Adaptation decision algorithm for tess"
Definition at line 885 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_timing_debug = false |
"Print timing stats"
Definition at line 832 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_train_from_boxes = false |
"Generate training data from boxed chars"
Definition at line 783 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_unrej_any_wd = false |
"Dont bother with word plausibility"
Definition at line 836 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_upper_flip_hyphen = 1.8 |
"Aspect ratio dot/hyphen test"
Definition at line 1030 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_use_primary_params_model = false |
"In multilingual mode use params model of the primary language"
Definition at line 1057 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_use_reject_spaces = true |
"Reject spaces?"
Definition at line 900 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_whole_wd_rej_row_percent = 70.00 |
"Number of row rejects in whole word rejects" "which prevents whole row rejection"
Definition at line 909 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_word_for_word = false |
"Make output have exactly one word per WERD"
Definition at line 1020 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_block_separators = false |
"Write block separators in output"
Definition at line 1000 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_images = false |
"Capture the image from the IPE"
Definition at line 1048 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_write_params_to_file = "" |
"Write all parameters to the given file."
Definition at line 813 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_rep_codes = false |
"Write repetition char code"
Definition at line 1002 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_unlv = false |
"Write .unlv output file"
Definition at line 1003 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_zero_kelvin_rejection = false |
"Dont reject ANYTHING AT ALL"
Definition at line 1022 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_zero_rejection = false |
"Dont reject ANYTHING"
Definition at line 1018 of file tesseractclass.h.
bool tesseract::Tesseract::test_pt = false |
"Test for point"
Definition at line 886 of file tesseractclass.h.
double tesseract::Tesseract::test_pt_x = 99999.99 |
"xcoord"
Definition at line 887 of file tesseractclass.h.
double tesseract::Tesseract::test_pt_y = 99999.99 |
"ycoord"
Definition at line 888 of file tesseractclass.h.
bool tesseract::Tesseract::textord_equation_detect = false |
"Turn on equation detector"
Definition at line 1069 of file tesseractclass.h.
double tesseract::Tesseract::textord_tabfind_aligned_gap_fraction = 0.75 |
"Fraction of height used as a minimum gap for aligned blobs."
Definition at line 1077 of file tesseractclass.h.
bool tesseract::Tesseract::textord_tabfind_force_vertical_text = false |
"Force using vertical text page mode"
Definition at line 1072 of file tesseractclass.h.
bool tesseract::Tesseract::textord_tabfind_show_vlines = false |
"Debug line finding"
Definition at line 1062 of file tesseractclass.h.
bool tesseract::Tesseract::textord_tabfind_vertical_horizontal_mix = true |
"find horizontal lines such as headers in vertical page mode"
Definition at line 1096 of file tesseractclass.h.
bool tesseract::Tesseract::textord_tabfind_vertical_text = true |
"Enable vertical detection"
Definition at line 1070 of file tesseractclass.h.
double tesseract::Tesseract::textord_tabfind_vertical_text_ratio = 0.5 |
"Fraction of textlines deemed vertical to use vertical page " "mode"
Definition at line 1075 of file tesseractclass.h.
bool tesseract::Tesseract::textord_use_cjk_fp_model = FALSE |
"Use CJK fixed pitch model"
Definition at line 1063 of file tesseractclass.h.
bool tesseract::Tesseract::unlv_tilde_crunching = true |
"Mark v.bad words for tilde crunch"
Definition at line 933 of file tesseractclass.h.
char* tesseract::Tesseract::unrecognised_char = "|" |
"Output char for unidentified blobs"
Definition at line 1008 of file tesseractclass.h.
bool tesseract::Tesseract::use_new_state_cost = FALSE |
"use new state cost heuristics for segmentation state evaluation"
Definition at line 1128 of file tesseractclass.h.
int tesseract::Tesseract::x_ht_acceptance_tolerance = 8 |
"Max allowed deviation of blob top outside of font data"
Definition at line 977 of file tesseractclass.h.
int tesseract::Tesseract::x_ht_min_change = 8 |
"Min change in xht before actually trying it"
Definition at line 978 of file tesseractclass.h.