tesseract
4.0.0-1-g2a2b
|
#include <tesseractclass.h>
Public Member Functions | ||||||||||
Tesseract () | ||||||||||
~Tesseract () | ||||||||||
Dict & | getDict () override | |||||||||
void | Clear () | |||||||||
void | ResetAdaptiveClassifier () | |||||||||
void | ResetDocumentDictionary () | |||||||||
void | SetEquationDetect (EquationDetect *detector) | |||||||||
const FCOORD & | reskew () const | |||||||||
Pix ** | mutable_pix_binary () | |||||||||
Pix * | pix_binary () const | |||||||||
Pix * | pix_grey () const | |||||||||
void | set_pix_grey (Pix *grey_pix) | |||||||||
Pix * | pix_original () const | |||||||||
void | set_pix_original (Pix *original_pix) | |||||||||
Pix * | BestPix () const | |||||||||
void | set_pix_thresholds (Pix *thresholds) | |||||||||
int | source_resolution () const | |||||||||
void | set_source_resolution (int ppi) | |||||||||
int | ImageWidth () const | |||||||||
int | ImageHeight () const | |||||||||
Pix * | scaled_color () const | |||||||||
int | scaled_factor () const | |||||||||
void | SetScaledColor (int factor, Pix *color) | |||||||||
const Textord & | textord () const | |||||||||
Textord * | mutable_textord () | |||||||||
bool | right_to_left () const | |||||||||
int | num_sub_langs () const | |||||||||
Tesseract * | get_sub_lang (int index) const | |||||||||
bool | AnyTessLang () const | |||||||||
bool | AnyLSTMLang () const | |||||||||
void | SetBlackAndWhitelist () | |||||||||
void | PrepareForPageseg () | |||||||||
void | PrepareForTessOCR (BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr) | |||||||||
int | SegmentPage (const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr) | |||||||||
void | SetupWordScripts (BLOCK_LIST *blocks) | |||||||||
int | AutoPageSeg (PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr) | |||||||||
ColumnFinder * | SetupPageSegAndDetectOrientation (PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix) | |||||||||
void | PrerecAllWordsPar (const GenericVector< WordData > &words) | |||||||||
void | TrainLineRecognizer (const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list) | |||||||||
void | TrainFromBoxes (const GenericVector< TBOX > &boxes, const GenericVector< STRING > &texts, BLOCK_LIST *block_list, DocumentData *training_data) | |||||||||
ImageData * | GetLineData (const TBOX &line_box, const GenericVector< TBOX > &boxes, const GenericVector< STRING > &texts, int start_box, int end_box, const BLOCK &block) | |||||||||
ImageData * | GetRectImage (const TBOX &box, const BLOCK &block, int padding, TBOX *revised_box) const | |||||||||
void | LSTMRecognizeWord (const BLOCK &block, ROW *row, WERD_RES *word, PointerVector< WERD_RES > *words) | |||||||||
void | SearchWords (PointerVector< WERD_RES > *words) | |||||||||
bool | ProcessTargetWord (const TBOX &word_box, const TBOX &target_word_box, const char *word_config, int pass) | |||||||||
void | SetupAllWordsPassN (int pass_n, const TBOX *target_word_box, const char *word_config, PAGE_RES *page_res, GenericVector< WordData > *words) | |||||||||
void | SetupWordPassN (int pass_n, WordData *word) | |||||||||
bool | RecogAllWordsPassN (int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT *pr_it, GenericVector< WordData > *words) | |||||||||
bool | recog_all_words (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses) | |||||||||
void | rejection_passes (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config) | |||||||||
void | bigram_correction_pass (PAGE_RES *page_res) | |||||||||
void | blamer_pass (PAGE_RES *page_res) | |||||||||
void | script_pos_pass (PAGE_RES *page_res) | |||||||||
int | RetryWithLanguage (const WordData &word_data, WordRecognizer recognizer, bool debug, WERD_RES **in_word, PointerVector< WERD_RES > *best_words) | |||||||||
bool | ReassignDiacritics (int pass, PAGE_RES_IT *pr_it, bool *make_next_word_fuzzy) | |||||||||
void | AssignDiacriticsToOverlappingBlobs (const GenericVector< C_OUTLINE *> &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< bool > *overlapped_any_blob, GenericVector< C_BLOB *> *target_blobs) | |||||||||
void | AssignDiacriticsToNewBlobs (const GenericVector< C_OUTLINE *> &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< C_BLOB *> *target_blobs) | |||||||||
bool | SelectGoodDiacriticOutlines (int pass, float certainty_threshold, PAGE_RES_IT *pr_it, C_BLOB *blob, const GenericVector< C_OUTLINE *> &outlines, int num_outlines, GenericVector< bool > *ok_outlines) | |||||||||
float | ClassifyBlobPlusOutlines (const GenericVector< bool > &ok_outlines, const GenericVector< C_OUTLINE *> &outlines, int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str) | |||||||||
float | ClassifyBlobAsWord (int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str, float *c2) | |||||||||
void | classify_word_and_language (int pass_n, PAGE_RES_IT *pr_it, WordData *word_data) | |||||||||
void | classify_word_pass1 (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) | |||||||||
void | recog_pseudo_word (PAGE_RES *page_res, TBOX &selection_box) | |||||||||
void | fix_rep_char (PAGE_RES_IT *page_res_it) | |||||||||
ACCEPTABLE_WERD_TYPE | acceptable_word_string (const UNICHARSET &char_set, const char *s, const char *lengths) | |||||||||
void | match_word_pass_n (int pass_n, WERD_RES *word, ROW *row, BLOCK *block) | |||||||||
void | classify_word_pass2 (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) | |||||||||
void | ReportXhtFixResult (bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word) | |||||||||
bool | RunOldFixXht (WERD_RES *word, BLOCK *block, ROW *row) | |||||||||
bool | TrainedXheightFix (WERD_RES *word, BLOCK *block, ROW *row) | |||||||||
bool | TestNewNormalization (int original_misfits, float baseline_shift, float new_x_ht, WERD_RES *word, BLOCK *block, ROW *row) | |||||||||
bool | recog_interactive (PAGE_RES_IT *pr_it) | |||||||||
void | set_word_fonts (WERD_RES *word) | |||||||||
void | font_recognition_pass (PAGE_RES *page_res) | |||||||||
void | dictionary_correction_pass (PAGE_RES *page_res) | |||||||||
bool | check_debug_pt (WERD_RES *word, int location) | |||||||||
bool | SubAndSuperscriptFix (WERD_RES *word_res) | |||||||||
void | GetSubAndSuperscriptCandidates (const WERD_RES *word, int *num_rebuilt_leading, ScriptPos *leading_pos, float *leading_certainty, int *num_rebuilt_trailing, ScriptPos *trailing_pos, float *trailing_certainty, float *avg_certainty, float *unlikely_threshold) | |||||||||
WERD_RES * | TrySuperscriptSplits (int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, bool *is_good, int *retry_leading, int *retry_trailing) | |||||||||
bool | BelievableSuperscript (bool debug, const WERD_RES &word, float certainty_threshold, int *left_ok, int *right_ok) const | |||||||||
void | output_pass (PAGE_RES_IT &page_res_it, const TBOX *target_word_box) | |||||||||
void | write_results (PAGE_RES_IT &page_res_it, char newline_type, bool force_eol) | |||||||||
void | set_unlv_suspects (WERD_RES *word) | |||||||||
UNICHAR_ID | get_rep_char (WERD_RES *word) | |||||||||
bool | acceptable_number_string (const char *s, const char *lengths) | |||||||||
int16_t | count_alphanums (const WERD_CHOICE &word) | |||||||||
int16_t | count_alphas (const WERD_CHOICE &word) | |||||||||
void | read_config_file (const char *filename, SetParamConstraint constraint) | |||||||||
int | init_tesseract (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr) | |||||||||
int | init_tesseract (const char *datapath, const char *language, OcrEngineMode oem) | |||||||||
int | init_tesseract_internal (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr) | |||||||||
void | SetupUniversalFontIds () | |||||||||
int | init_tesseract_lm (const char *arg0, const char *textbase, const char *language, TessdataManager *mgr) | |||||||||
void | recognize_page (STRING &image_name) | |||||||||
void | end_tesseract () | |||||||||
bool | init_tesseract_lang_data (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr) | |||||||||
void | ParseLanguageString (const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load) | |||||||||
SVMenuNode * | build_menu_new () | |||||||||
void | pgeditor_main (int width, int height, PAGE_RES *page_res) | |||||||||
void | process_image_event (const SVEvent &event) | |||||||||
bool | process_cmd_win_event (int32_t cmd_event, char *new_value) | |||||||||
void | debug_word (PAGE_RES *page_res, const TBOX &selection_box) | |||||||||
void | do_re_display (bool(tesseract::Tesseract::*word_painter)(PAGE_RES_IT *pr_it)) | |||||||||
bool | word_display (PAGE_RES_IT *pr_it) | |||||||||
bool | word_bln_display (PAGE_RES_IT *pr_it) | |||||||||
bool | word_blank_and_set_display (PAGE_RES_IT *pr_its) | |||||||||
bool | word_set_display (PAGE_RES_IT *pr_it) | |||||||||
bool | word_dumper (PAGE_RES_IT *pr_it) | |||||||||
void | blob_feature_display (PAGE_RES *page_res, const TBOX &selection_box) | |||||||||
void | make_reject_map (WERD_RES *word, ROW *row, int16_t pass) | |||||||||
bool | one_ell_conflict (WERD_RES *word_res, bool update_map) | |||||||||
int16_t | first_alphanum_index (const char *word, const char *word_lengths) | |||||||||
int16_t | first_alphanum_offset (const char *word, const char *word_lengths) | |||||||||
int16_t | alpha_count (const char *word, const char *word_lengths) | |||||||||
bool | word_contains_non_1_digit (const char *word, const char *word_lengths) | |||||||||
void | dont_allow_1Il (WERD_RES *word) | |||||||||
int16_t | count_alphanums (WERD_RES *word) | |||||||||
void | flip_0O (WERD_RES *word) | |||||||||
bool | non_0_digit (const UNICHARSET &ch_set, UNICHAR_ID unichar_id) | |||||||||
bool | non_O_upper (const UNICHARSET &ch_set, UNICHAR_ID unichar_id) | |||||||||
bool | repeated_nonalphanum_wd (WERD_RES *word, ROW *row) | |||||||||
void | nn_match_word (WERD_RES *word, ROW *row) | |||||||||
void | nn_recover_rejects (WERD_RES *word, ROW *row) | |||||||||
void | set_done (WERD_RES *word, int16_t pass) | |||||||||
int16_t | safe_dict_word (const WERD_RES *werd_res) | |||||||||
void | flip_hyphens (WERD_RES *word) | |||||||||
void | reject_I_1_L (WERD_RES *word) | |||||||||
void | reject_edge_blobs (WERD_RES *word) | |||||||||
void | reject_mostly_rejects (WERD_RES *word) | |||||||||
bool | word_adaptable (WERD_RES *word, uint16_t mode) | |||||||||
void | recog_word_recursive (WERD_RES *word) | |||||||||
void | recog_word (WERD_RES *word) | |||||||||
void | split_and_recog_word (WERD_RES *word) | |||||||||
void | split_word (WERD_RES *word, int split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const | |||||||||
void | join_words (WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const | |||||||||
void | match_current_words (WERD_RES_LIST &words, ROW *row, BLOCK *block) | |||||||||
int16_t | fp_eval_word_spacing (WERD_RES_LIST &word_res_list) | |||||||||
void | dump_words (WERD_RES_LIST &perm, int16_t score, int16_t mode, bool improved) | |||||||||
bool | fixspace_thinks_word_done (WERD_RES *word) | |||||||||
GARBAGE_LEVEL | garbage_word (WERD_RES *word, BOOL8 ok_dict_word) | |||||||||
bool | potential_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level, bool ok_dict_word) | |||||||||
void | tilde_crunch (PAGE_RES_IT &page_res_it) | |||||||||
void | unrej_good_quality_words (PAGE_RES_IT &page_res_it) | |||||||||
void | doc_and_block_rejection (PAGE_RES_IT &page_res_it, bool good_quality_doc) | |||||||||
void | quality_based_rejection (PAGE_RES_IT &page_res_it, bool good_quality_doc) | |||||||||
void | convert_bad_unlv_chs (WERD_RES *word_res) | |||||||||
void | tilde_delete (PAGE_RES_IT &page_res_it) | |||||||||
int16_t | word_blob_quality (WERD_RES *word, ROW *row) | |||||||||
void | word_char_quality (WERD_RES *word, ROW *row, int16_t *match_count, int16_t *accepted_match_count) | |||||||||
void | unrej_good_chs (WERD_RES *word, ROW *row) | |||||||||
int16_t | count_outline_errs (char c, int16_t outline_count) | |||||||||
int16_t | word_outline_errs (WERD_RES *word) | |||||||||
bool | terrible_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level) | |||||||||
CRUNCH_MODE | word_deletable (WERD_RES *word, int16_t &delete_mode) | |||||||||
int16_t | failure_count (WERD_RES *word) | |||||||||
bool | noise_outlines (TWERD *word) | |||||||||
void | tess_segment_pass_n (int pass_n, WERD_RES *word) | |||||||||
PAGE_RES * | ApplyBoxes (const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list) | |||||||||
void | PreenXHeights (BLOCK_LIST *block_list) | |||||||||
PAGE_RES * | SetupApplyBoxes (const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list) | |||||||||
void | MaximallyChopWord (const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
bool | ResegmentCharBox (PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box, const TBOX *next_box, const char *correct_text) | |||||||||
bool | ResegmentWordBox (BLOCK_LIST *block_list, const TBOX &box, const TBOX *next_box, const char *correct_text) | |||||||||
void | ReSegmentByClassification (PAGE_RES *page_res) | |||||||||
bool | ConvertStringToUnichars (const char *utf8, GenericVector< UNICHAR_ID > *class_ids) | |||||||||
bool | FindSegmentation (const GenericVector< UNICHAR_ID > &target_text, WERD_RES *word_res) | |||||||||
void | SearchForText (const GenericVector< BLOB_CHOICE_LIST *> *choices, int choices_pos, int choices_length, const GenericVector< UNICHAR_ID > &target_text, int text_index, float rating, GenericVector< int > *segmentation, float *best_rating, GenericVector< int > *best_segmentation) | |||||||||
void | TidyUp (PAGE_RES *page_res) | |||||||||
void | ReportFailedBox (int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg) | |||||||||
void | CorrectClassifyWords (PAGE_RES *page_res) | |||||||||
void | ApplyBoxTraining (const STRING &fontname, PAGE_RES *page_res) | |||||||||
int | CountMisfitTops (WERD_RES *word_res) | |||||||||
float | ComputeCompatibleXheight (WERD_RES *word_res, float *baseline_shift) | |||||||||
FILE * | init_recog_training (const STRING &fname) | |||||||||
void | recog_training_segmented (const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) | |||||||||
void | ambigs_classify_and_output (const char *label, PAGE_RES_IT *pr_it, FILE *output_file) | |||||||||
eval_word_spacing() | ||||||||||
The basic measure is the number of characters in contextually confirmed words. (I.e the word is done) If all words are contextually confirmed the evaluation is deemed perfect. Some fiddles are done to handle "1"s as these are VERY frequent causes of fuzzy spaces. The problem with the basic measure is that "561 63" would score the same as "56163", though given our knowledge that the space is fuzzy, and that there is a "1" next to the fuzzy space, we need to ensure that "56163" is preferred. The solution is to NOT COUNT the score of any word which has a digit at one end and a "1Il" as the character the other side of the space. Conversely, any character next to a "1" within a word is counted as a positive score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of the "1" joined). "56163" would score 7 - all chars in a numeric word + 2 sides of a "1" joined. The joined 1 rule is applied to any word REGARDLESS of contextual confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2. | ||||||||||
bool | digit_or_numeric_punct (WERD_RES *word, int char_position) | |||||||||
int16_t | eval_word_spacing (WERD_RES_LIST &word_res_list) | |||||||||
fix_sp_fp_word() | ||||||||||
Test the current word to see if it can be split by deleting noise blobs. If so, do the business. Return with the iterator pointing to the same place if the word is unchanged, or the last of the replacement words. | ||||||||||
void | fix_noisy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) | |||||||||
void | fix_sp_fp_word (WERD_RES_IT &word_res_it, ROW *row, BLOCK *block) | |||||||||
int16_t | worst_noise_blob (WERD_RES *word_res, float *worst_noise_score) | |||||||||
float | blob_noise_score (TBLOB *blob) | |||||||||
void | break_noisiest_blob_word (WERD_RES_LIST &words) | |||||||||
fix_fuzzy_spaces() | ||||||||||
Walk over the page finding sequences of words joined by fuzzy spaces. Extract them as a sublist, process the sublist to find the optimal arrangement of spaces then replace the sublist in the ROW_RES.
| ||||||||||
void | fix_fuzzy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) | |||||||||
void | fix_fuzzy_spaces (ETEXT_DESC *monitor, int32_t word_count, PAGE_RES *page_res) | |||||||||
process_selected_words() | ||||||||||
Walk the current block list applying the specified word processor function to each word that overlaps the selection_box. | ||||||||||
void | process_selected_words (PAGE_RES *page_res, TBOX &selection_box, bool(tesseract::Tesseract::*word_processor)(PAGE_RES_IT *pr_it)) | |||||||||
tess_add_doc_word | ||||||||||
Add the given word to the document dictionary | ||||||||||
void | tess_add_doc_word (WERD_CHOICE *word_choice) | |||||||||
tess_acceptable_word | ||||||||||
| ||||||||||
bool | tess_acceptable_word (WERD_RES *word) | |||||||||
Public Member Functions inherited from tesseract::Wordrec | ||||||||||
Wordrec () | ||||||||||
virtual | ~Wordrec ()=default | |||||||||
void | SaveAltChoices (const LIST &best_choices, WERD_RES *word) | |||||||||
void | FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | |||||||||
void | CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | |||||||||
void | SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | |||||||||
void | InitialSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) | |||||||||
void | DoSegSearch (WERD_RES *word_res) | |||||||||
SEAM * | attempt_blob_chop (TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams) | |||||||||
SEAM * | chop_numbered_blob (TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams) | |||||||||
SEAM * | chop_overlapping_blob (const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number) | |||||||||
void | add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams) | |||||||||
void | choose_best_seam (SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) | |||||||||
void | combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) | |||||||||
SEAM * | pick_good_seam (TBLOB *blob) | |||||||||
void | try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) | |||||||||
void | try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) | |||||||||
PRIORITY | grade_split_length (SPLIT *split) | |||||||||
PRIORITY | grade_sharpness (SPLIT *split) | |||||||||
bool | near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt) | |||||||||
virtual BLOB_CHOICE_LIST * | classify_piece (const GenericVector< SEAM *> &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle) | |||||||||
void | merge_fragments (MATRIX *ratings, int16_t num_blobs) | |||||||||
void | get_fragment_lists (int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists) | |||||||||
void | merge_and_put_fragment_lists (int16_t row, int16_t column, int16_t num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) | |||||||||
void | fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) | |||||||||
void | program_editup (const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict) | |||||||||
void | cc_recog (WERD_RES *word) | |||||||||
void | program_editdown (int32_t elasped_time) | |||||||||
void | set_pass1 () | |||||||||
void | set_pass2 () | |||||||||
int | end_recog () | |||||||||
BLOB_CHOICE_LIST * | call_matcher (TBLOB *blob) | |||||||||
int | dict_word (const WERD_CHOICE &word) | |||||||||
BLOB_CHOICE_LIST * | classify_blob (TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle) | |||||||||
PRIORITY | point_priority (EDGEPT *point) | |||||||||
void | add_point_to_list (PointHeap *point_heap, EDGEPT *point) | |||||||||
bool | is_inside_angle (EDGEPT *pt) | |||||||||
int | angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) | |||||||||
EDGEPT * | pick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) | |||||||||
void | prioritize_points (TESSLINE *outline, PointHeap *points) | |||||||||
void | new_min_point (EDGEPT *local_min, PointHeap *points) | |||||||||
void | new_max_point (EDGEPT *local_max, PointHeap *points) | |||||||||
void | vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points) | |||||||||
SEAM * | improve_one_blob (const GenericVector< BLOB_CHOICE *> &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number) | |||||||||
SEAM * | chop_one_blob (const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE *> &blob_choices, WERD_RES *word_res, int *blob_number) | |||||||||
void | chop_word_main (WERD_RES *word) | |||||||||
void | improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending) | |||||||||
int | select_blob_to_split (const GenericVector< BLOB_CHOICE *> &blob_choices, float rating_ceiling, bool split_next_to_fragment) | |||||||||
int | select_blob_to_split_from_fixpt (DANGERR *fixpt) | |||||||||
Public Member Functions inherited from tesseract::Classify | ||||||||||
Classify () | ||||||||||
virtual | ~Classify () | |||||||||
const ShapeTable * | shape_table () const | |||||||||
void | SetStaticClassifier (ShapeClassifier *static_classifier) | |||||||||
void | AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices) | |||||||||
bool | LargeSpeckle (const TBLOB &blob) | |||||||||
ADAPT_TEMPLATES | NewAdaptedTemplates (bool InitFromUnicharset) | |||||||||
int | GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId) | |||||||||
int | PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results) | |||||||||
void | ReadNewCutoffs (TFile *fp, CLASS_CUTOFF_ARRAY Cutoffs) | |||||||||
void | PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) | |||||||||
void | WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) | |||||||||
ADAPT_TEMPLATES | ReadAdaptedTemplates (TFile *File) | |||||||||
float | ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch) | |||||||||
void | FreeNormProtos () | |||||||||
NORM_PROTOS * | ReadNormProtos (TFile *fp) | |||||||||
void | ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class) | |||||||||
INT_TEMPLATES | CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset) | |||||||||
void | LearnWord (const char *fontname, WERD_RES *word) | |||||||||
void | LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word) | |||||||||
void | InitAdaptiveClassifier (TessdataManager *mgr) | |||||||||
void | InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) | |||||||||
void | AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results) | |||||||||
void | MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results) | |||||||||
void | ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results) | |||||||||
double | ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors) | |||||||||
void | ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) | |||||||||
void | AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results) | |||||||||
int | GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) | |||||||||
void | DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results) | |||||||||
PROTO_ID | MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) | |||||||||
int | MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) | |||||||||
void | MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob) | |||||||||
void | PrintAdaptiveMatchResults (const ADAPT_RESULTS &results) | |||||||||
void | RemoveExtraPuncs (ADAPT_RESULTS *Results) | |||||||||
void | RemoveBadMatches (ADAPT_RESULTS *Results) | |||||||||
void | SetAdaptiveThreshold (float Threshold) | |||||||||
void | ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features) | |||||||||
STRING | ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const | |||||||||
int | ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const | |||||||||
int | ShapeIDToClassID (int shape_id) const | |||||||||
UNICHAR_ID * | BaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) | |||||||||
int | CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results) | |||||||||
int | CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results) | |||||||||
UNICHAR_ID * | GetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass) | |||||||||
void | DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results) | |||||||||
void | AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates) | |||||||||
void | DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class) | |||||||||
bool | AdaptableWord (WERD_RES *word) | |||||||||
void | EndAdaptiveClassifier () | |||||||||
void | SettupPass1 () | |||||||||
void | SettupPass2 () | |||||||||
void | AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices) | |||||||||
void | ClassifyAsNoise (ADAPT_RESULTS *Results) | |||||||||
void | ResetAdaptiveClassifierInternal () | |||||||||
void | SwitchAdaptiveClassifier () | |||||||||
void | StartBackupAdaptiveClassifier () | |||||||||
int | GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array) | |||||||||
void | ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array) | |||||||||
bool | TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config) | |||||||||
void | UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob) | |||||||||
bool | AdaptiveClassifierIsFull () const | |||||||||
bool | AdaptiveClassifierIsEmpty () const | |||||||||
bool | LooksLikeGarbage (TBLOB *blob) | |||||||||
void | RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox) | |||||||||
void | ClearCharNormArray (uint8_t *char_norm_array) | |||||||||
void | ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array) | |||||||||
void | ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) | |||||||||
INT_TEMPLATES | ReadIntTemplates (TFile *fp) | |||||||||
void | WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset) | |||||||||
CLASS_ID | GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id) | |||||||||
void | ShowMatchDisplay () | |||||||||
UnicityTable< FontInfo > & | get_fontinfo_table () | |||||||||
const UnicityTable< FontInfo > & | get_fontinfo_table () const | |||||||||
UnicityTable< FontSet > & | get_fontset_table () | |||||||||
void | NormalizeOutlines (LIST Outlines, float *XScale, float *YScale) | |||||||||
FEATURE_SET | ExtractOutlineFeatures (TBLOB *Blob) | |||||||||
FEATURE_SET | ExtractPicoFeatures (TBLOB *Blob) | |||||||||
FEATURE_SET | ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) | |||||||||
FEATURE_SET | ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) | |||||||||
void | LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) | |||||||||
bool | WriteTRFile (const STRING &filename) | |||||||||
Public Member Functions inherited from tesseract::CCStruct | ||||||||||
CCStruct ()=default | ||||||||||
virtual | ~CCStruct () | |||||||||
Public Member Functions inherited from tesseract::CUtil | ||||||||||
CUtil ()=default | ||||||||||
virtual | ~CUtil () | |||||||||
void | read_variables (const char *filename, bool global_only) | |||||||||
Public Member Functions inherited from tesseract::CCUtil | ||||||||||
CCUtil () | ||||||||||
virtual | ~CCUtil () | |||||||||
void | main_setup (const char *argv0, const char *basename) | |||||||||
CCUtil::main_setup - set location of tessdata and name of image. More... | ||||||||||
ParamsVectors * | params () | |||||||||
Additional Inherited Members | |
Static Public Member Functions inherited from tesseract::Classify | |
static void | SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info) |
static void | ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts) |
Static Public Attributes inherited from tesseract::CCStruct | |
static const double | kDescenderFraction = 0.25 |
static const double | kXHeightFraction = 0.5 |
static const double | kAscenderFraction = 0.25 |
static const double | kXHeightCapRatio |
Protected Member Functions inherited from tesseract::Wordrec | |
bool | SegSearchDone (int num_futile_classifications) |
void | UpdateSegSearchNodes (float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) |
void | ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle) |
void | ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending) |
void | InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug) |
Protected Attributes inherited from tesseract::Classify | |
IntegerMatcher | im_ |
FEATURE_DEFS_STRUCT | feature_defs_ |
ShapeTable * | shape_table_ |
Definition at line 173 of file tesseractclass.h.
tesseract::Tesseract::Tesseract | ( | ) |
Definition at line 54 of file tesseractclass.cpp.
tesseract::Tesseract::~Tesseract | ( | ) |
Definition at line 545 of file tesseractclass.cpp.
bool tesseract::Tesseract::acceptable_number_string | ( | const char * | s, |
const char * | lengths | ||
) |
Definition at line 394 of file output.cpp.
ACCEPTABLE_WERD_TYPE tesseract::Tesseract::acceptable_word_string | ( | const UNICHARSET & | char_set, |
const char * | s, | ||
const char * | lengths | ||
) |
Definition at line 1764 of file control.cpp.
int16_t tesseract::Tesseract::alpha_count | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 500 of file reject.cpp.
void tesseract::Tesseract::ambigs_classify_and_output | ( | const char * | label, |
PAGE_RES_IT * | pr_it, | ||
FILE * | output_file | ||
) |
Definition at line 209 of file recogtraining.cpp.
|
inline |
Definition at line 288 of file tesseractclass.h.
|
inline |
Definition at line 280 of file tesseractclass.h.
PAGE_RES* tesseract::Tesseract::ApplyBoxes | ( | const STRING & | fname, |
bool | find_segmentation, | ||
BLOCK_LIST * | block_list | ||
) |
void tesseract::Tesseract::AssignDiacriticsToNewBlobs | ( | const GenericVector< C_OUTLINE *> & | outlines, |
int | pass, | ||
WERD * | real_word, | ||
PAGE_RES_IT * | pr_it, | ||
GenericVector< bool > * | word_wanted, | ||
GenericVector< C_BLOB *> * | target_blobs | ||
) |
Definition at line 1074 of file control.cpp.
void tesseract::Tesseract::AssignDiacriticsToOverlappingBlobs | ( | const GenericVector< C_OUTLINE *> & | outlines, |
int | pass, | ||
WERD * | real_word, | ||
PAGE_RES_IT * | pr_it, | ||
GenericVector< bool > * | word_wanted, | ||
GenericVector< bool > * | overlapped_any_blob, | ||
GenericVector< C_BLOB *> * | target_blobs | ||
) |
Definition at line 1019 of file control.cpp.
int tesseract::Tesseract::AutoPageSeg | ( | PageSegMode | pageseg_mode, |
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | to_blocks, | ||
BLOBNBOX_LIST * | diacritic_blobs, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Auto page segmentation. Divide the page image into blocks of uniform text linespacing and images.
Resolution (in ppi) is derived from the input image.
The output goes in the blocks list with corresponding TO_BLOCKs in the to_blocks list.
If !PSM_COL_FIND_ENABLED(pageseg_mode), then no attempt is made to divide the image into columns, but multiple blocks are still made if the text is of non-uniform linespacing.
If diacritic_blobs is non-null, then diacritics/noise blobs, that would confuse layout analysis by causing textline overlap, are placed there, with the expectation that they will be reassigned to words later and noise/diacriticness determined via classification.
If osd (orientation and script detection) is true then that is performed as well. If only_osd is true, then only orientation and script detection is performed. If osd is desired, (osd or only_osd) then osr_tess must be another Tesseract that was initialized especially for osd, and the results will be output into osr (orientation and script result).
Definition at line 201 of file pagesegmain.cpp.
bool tesseract::Tesseract::BelievableSuperscript | ( | bool | debug, |
const WERD_RES & | word, | ||
float | certainty_threshold, | ||
int * | left_ok, | ||
int * | right_ok | ||
) | const |
Return whether this is believable superscript or subscript text.
We insist that:
[in] | debug | If true, spew debug output |
[in] | word | The word whose best_choice we're evaluating |
[in] | certainty_threshold | If any of the characters have less certainty than this, reject. |
[out] | left_ok | How many left-side characters were ok? |
[out] | right_ok | How many right-side characters were ok? |
Definition at line 522 of file superscript.cpp.
|
inline |
Definition at line 229 of file tesseractclass.h.
void tesseract::Tesseract::bigram_correction_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 473 of file control.cpp.
void tesseract::Tesseract::blamer_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 716 of file control.cpp.
Definition at line 959 of file pgedit.cpp.
float tesseract::Tesseract::blob_noise_score | ( | TBLOB * | blob | ) |
Definition at line 790 of file fixspace.cpp.
void tesseract::Tesseract::break_noisiest_blob_word | ( | WERD_RES_LIST & | words | ) |
break_noisiest_blob_word() Find the word with the blob which looks like the worst noise. Break the word into two, deleting the noise blob.
Definition at line 645 of file fixspace.cpp.
SVMenuNode * tesseract::Tesseract::build_menu_new | ( | ) |
Definition at line 247 of file pgedit.cpp.
bool tesseract::Tesseract::check_debug_pt | ( | WERD_RES * | word, |
int | location | ||
) |
Definition at line 1868 of file control.cpp.
void tesseract::Tesseract::classify_word_and_language | ( | int | pass_n, |
PAGE_RES_IT * | pr_it, | ||
WordData * | word_data | ||
) |
Definition at line 1338 of file control.cpp.
void tesseract::Tesseract::classify_word_pass1 | ( | const WordData & | word_data, |
WERD_RES ** | in_word, | ||
PointerVector< WERD_RES > * | out_words | ||
) |
classify_word_pass1
Baseline normalize the word and pass it to Tess.
Definition at line 1420 of file control.cpp.
void tesseract::Tesseract::classify_word_pass2 | ( | const WordData & | word_data, |
WERD_RES ** | in_word, | ||
PointerVector< WERD_RES > * | out_words | ||
) |
classify_word_pass2
Control what to do with the word in pass 2
Definition at line 1591 of file control.cpp.
float tesseract::Tesseract::ClassifyBlobAsWord | ( | int | pass_n, |
PAGE_RES_IT * | pr_it, | ||
C_BLOB * | blob, | ||
STRING * | best_str, | ||
float * | c2 | ||
) |
Definition at line 1287 of file control.cpp.
float tesseract::Tesseract::ClassifyBlobPlusOutlines | ( | const GenericVector< bool > & | ok_outlines, |
const GenericVector< C_OUTLINE *> & | outlines, | ||
int | pass_n, | ||
PAGE_RES_IT * | pr_it, | ||
C_BLOB * | blob, | ||
STRING * | best_str | ||
) |
Definition at line 1239 of file control.cpp.
void tesseract::Tesseract::Clear | ( | ) |
Definition at line 569 of file tesseractclass.cpp.
float tesseract::Tesseract::ComputeCompatibleXheight | ( | WERD_RES * | word_res, |
float * | baseline_shift | ||
) |
Definition at line 102 of file fixxht.cpp.
void tesseract::Tesseract::convert_bad_unlv_chs | ( | WERD_RES * | word_res | ) |
Definition at line 660 of file docqual.cpp.
bool tesseract::Tesseract::ConvertStringToUnichars | ( | const char * | utf8, |
GenericVector< UNICHAR_ID > * | class_ids | ||
) |
void tesseract::Tesseract::CorrectClassifyWords | ( | PAGE_RES * | page_res | ) |
int16_t tesseract::Tesseract::count_alphanums | ( | const WERD_CHOICE & | word | ) |
Definition at line 383 of file output.cpp.
int16_t tesseract::Tesseract::count_alphanums | ( | WERD_RES * | word | ) |
Definition at line 563 of file reject.cpp.
int16_t tesseract::Tesseract::count_alphas | ( | const WERD_CHOICE & | word | ) |
Definition at line 373 of file output.cpp.
int16_t tesseract::Tesseract::count_outline_errs | ( | char | c, |
int16_t | outline_count | ||
) |
Definition at line 127 of file docqual.cpp.
int tesseract::Tesseract::CountMisfitTops | ( | WERD_RES * | word_res | ) |
Definition at line 70 of file fixxht.cpp.
debug_word
Process the whole image, but load word_config_ for the selected word(s).
Definition at line 637 of file pgedit.cpp.
void tesseract::Tesseract::dictionary_correction_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 2117 of file control.cpp.
bool tesseract::Tesseract::digit_or_numeric_punct | ( | WERD_RES * | word, |
int | char_position | ||
) |
Definition at line 373 of file fixspace.cpp.
void tesseract::Tesseract::do_re_display | ( | bool(tesseract::Tesseract::*)(PAGE_RES_IT *pr_it) | word_painter | ) |
Redisplay page
Definition at line 298 of file pgedit.cpp.
void tesseract::Tesseract::doc_and_block_rejection | ( | PAGE_RES_IT & | page_res_it, |
bool | good_quality_doc | ||
) |
Definition at line 233 of file docqual.cpp.
void tesseract::Tesseract::dont_allow_1Il | ( | WERD_RES * | word | ) |
Definition at line 531 of file reject.cpp.
void tesseract::Tesseract::dump_words | ( | WERD_RES_LIST & | perm, |
int16_t | score, | ||
int16_t | mode, | ||
bool | improved | ||
) |
Definition at line 479 of file fixspace.cpp.
void tesseract::Tesseract::end_tesseract | ( | ) |
Definition at line 475 of file tessedit.cpp.
int16_t tesseract::Tesseract::eval_word_spacing | ( | WERD_RES_LIST & | word_res_list | ) |
Definition at line 269 of file fixspace.cpp.
int16_t tesseract::Tesseract::failure_count | ( | WERD_RES * | word | ) |
Definition at line 966 of file docqual.cpp.
bool tesseract::Tesseract::FindSegmentation | ( | const GenericVector< UNICHAR_ID > & | target_text, |
WERD_RES * | word_res | ||
) |
int16_t tesseract::Tesseract::first_alphanum_index | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 474 of file reject.cpp.
int16_t tesseract::Tesseract::first_alphanum_offset | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 487 of file reject.cpp.
void tesseract::Tesseract::fix_fuzzy_space_list | ( | WERD_RES_LIST & | best_perm, |
ROW * | row, | ||
BLOCK * | block | ||
) |
Definition at line 175 of file fixspace.cpp.
void tesseract::Tesseract::fix_fuzzy_spaces | ( | ETEXT_DESC * | monitor, |
int32_t | word_count, | ||
PAGE_RES * | page_res | ||
) |
Definition at line 78 of file fixspace.cpp.
void tesseract::Tesseract::fix_noisy_space_list | ( | WERD_RES_LIST & | best_perm, |
ROW * | row, | ||
BLOCK * | block | ||
) |
Definition at line 599 of file fixspace.cpp.
void tesseract::Tesseract::fix_rep_char | ( | PAGE_RES_IT * | page_res_it | ) |
fix_rep_char() The word is a repeated char. (Leader.) Find the repeated char character. Create the appropriate single-word or multi-word sequence according to the size of spaces in between blobs, and correct the classifications where some of the characters disagree with the majority.
Definition at line 1725 of file control.cpp.
Definition at line 565 of file fixspace.cpp.
bool tesseract::Tesseract::fixspace_thinks_word_done | ( | WERD_RES * | word | ) |
Definition at line 533 of file fixspace.cpp.
void tesseract::Tesseract::flip_0O | ( | WERD_RES * | word | ) |
Definition at line 678 of file reject.cpp.
void tesseract::Tesseract::flip_hyphens | ( | WERD_RES * | word | ) |
Definition at line 621 of file reject.cpp.
void tesseract::Tesseract::font_recognition_pass | ( | PAGE_RES * | page_res | ) |
font_recognition_pass
Smooth the fonts for the document.
Definition at line 2060 of file control.cpp.
int16_t tesseract::Tesseract::fp_eval_word_spacing | ( | WERD_RES_LIST & | word_res_list | ) |
Definition at line 860 of file fixspace.cpp.
GARBAGE_LEVEL tesseract::Tesseract::garbage_word | ( | WERD_RES * | word, |
BOOL8 | ok_dict_word | ||
) |
Definition at line 680 of file docqual.cpp.
UNICHAR_ID tesseract::Tesseract::get_rep_char | ( | WERD_RES * | word | ) |
Definition at line 258 of file output.cpp.
|
inline |
Definition at line 276 of file tesseractclass.h.
|
overridevirtual |
ImageData * tesseract::Tesseract::GetLineData | ( | const TBOX & | line_box, |
const GenericVector< TBOX > & | boxes, | ||
const GenericVector< STRING > & | texts, | ||
int | start_box, | ||
int | end_box, | ||
const BLOCK & | block | ||
) |
Definition at line 129 of file linerec.cpp.
ImageData * tesseract::Tesseract::GetRectImage | ( | const TBOX & | box, |
const BLOCK & | block, | ||
int | padding, | ||
TBOX * | revised_box | ||
) | const |
Definition at line 163 of file linerec.cpp.
void tesseract::Tesseract::GetSubAndSuperscriptCandidates | ( | const WERD_RES * | word, |
int * | num_rebuilt_leading, | ||
ScriptPos * | leading_pos, | ||
float * | leading_certainty, | ||
int * | num_rebuilt_trailing, | ||
ScriptPos * | trailing_pos, | ||
float * | trailing_certainty, | ||
float * | avg_certainty, | ||
float * | unlikely_threshold | ||
) |
Determine how many characters (rebuilt blobs) on each end of a given word might plausibly be superscripts so SubAndSuperscriptFix can try to re-recognize them. Even if we find no whole blobs at either end, we will set *unlikely_threshold to a certainty that might be used to select "bad enough" outlier characters. If *unlikely_threshold is set to 0, though, there's really no hope.
[in] | word | The word to examine. |
[out] | num_rebuilt_leading | the number of rebuilt blobs at the start of the word which are all up or down and seem badly classified. |
[out] | leading_pos | "super" or "sub" (for debugging) |
[out] | leading_certainty | the worst certainty in the leading blobs. |
[out] | num_rebuilt_trailing | the number of rebuilt blobs at the end of the word which are all up or down and seem badly classified. |
[out] | trailing_pos | "super" or "sub" (for debugging) |
[out] | trailing_certainty | the worst certainty in the trailing blobs. |
[out] | avg_certainty | the average certainty of "normal" blobs in the word. |
[out] | unlikely_threshold | the threshold (on certainty) we used to select "bad enough" outlier characters. |
Definition at line 254 of file superscript.cpp.
|
inline |
Definition at line 250 of file tesseractclass.h.
|
inline |
Definition at line 247 of file tesseractclass.h.
FILE * tesseract::Tesseract::init_recog_training | ( | const STRING & | fname | ) |
Definition at line 35 of file recogtraining.cpp.
int tesseract::Tesseract::init_tesseract | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params, | ||
TessdataManager * | mgr | ||
) |
Definition at line 296 of file tessedit.cpp.
|
inline |
Definition at line 524 of file tesseractclass.h.
int tesseract::Tesseract::init_tesseract_internal | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params, | ||
TessdataManager * | mgr | ||
) |
Definition at line 396 of file tessedit.cpp.
bool tesseract::Tesseract::init_tesseract_lang_data | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params, | ||
TessdataManager * | mgr | ||
) |
Definition at line 91 of file tessedit.cpp.
int tesseract::Tesseract::init_tesseract_lm | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
TessdataManager * | mgr | ||
) |
Definition at line 462 of file tessedit.cpp.
void tesseract::Tesseract::join_words | ( | WERD_RES * | word, |
WERD_RES * | word2, | ||
BlamerBundle * | orig_bb | ||
) | const |
Definition at line 234 of file tfacepp.cpp.
void tesseract::Tesseract::LSTMRecognizeWord | ( | const BLOCK & | block, |
ROW * | row, | ||
WERD_RES * | word, | ||
PointerVector< WERD_RES > * | words | ||
) |
Definition at line 222 of file linerec.cpp.
Definition at line 226 of file fixspace.cpp.
void tesseract::Tesseract::match_word_pass_n | ( | int | pass_n, |
WERD_RES * | word, | ||
ROW * | row, | ||
BLOCK * | block | ||
) |
match_word_pass2
Baseline normalize the word and pass it to Tess.
Definition at line 1649 of file control.cpp.
void tesseract::Tesseract::MaximallyChopWord | ( | const GenericVector< TBOX > & | boxes, |
BLOCK * | block, | ||
ROW * | row, | ||
WERD_RES * | word_res | ||
) |
|
inline |
Definition at line 197 of file tesseractclass.h.
|
inline |
Definition at line 266 of file tesseractclass.h.
bool tesseract::Tesseract::noise_outlines | ( | TWERD * | word | ) |
Definition at line 978 of file docqual.cpp.
bool tesseract::Tesseract::non_0_digit | ( | const UNICHARSET & | ch_set, |
UNICHAR_ID | unichar_id | ||
) |
Definition at line 794 of file reject.cpp.
bool tesseract::Tesseract::non_O_upper | ( | const UNICHARSET & | ch_set, |
UNICHAR_ID | unichar_id | ||
) |
Definition at line 790 of file reject.cpp.
|
inline |
Definition at line 273 of file tesseractclass.h.
bool tesseract::Tesseract::one_ell_conflict | ( | WERD_RES * | word_res, |
bool | update_map | ||
) |
Definition at line 297 of file reject.cpp.
void tesseract::Tesseract::output_pass | ( | PAGE_RES_IT & | page_res_it, |
const TBOX * | target_word_box | ||
) |
Definition at line 43 of file output.cpp.
void tesseract::Tesseract::ParseLanguageString | ( | const char * | lang_str, |
GenericVector< STRING > * | to_load, | ||
GenericVector< STRING > * | not_to_load | ||
) |
Definition at line 262 of file tessedit.cpp.
void tesseract::Tesseract::pgeditor_main | ( | int | width, |
int | height, | ||
PAGE_RES * | page_res | ||
) |
Top level editor operation: Setup a new window and an according event handler
Definition at line 327 of file pgedit.cpp.
|
inline |
Definition at line 201 of file tesseractclass.h.
|
inline |
Definition at line 204 of file tesseractclass.h.
|
inline |
Definition at line 211 of file tesseractclass.h.
bool tesseract::Tesseract::potential_word_crunch | ( | WERD_RES * | word, |
GARBAGE_LEVEL | garbage_level, | ||
bool | ok_dict_word | ||
) |
Definition at line 542 of file docqual.cpp.
void tesseract::Tesseract::PreenXHeights | ( | BLOCK_LIST * | block_list | ) |
void tesseract::Tesseract::PrepareForPageseg | ( | ) |
Definition at line 624 of file tesseractclass.cpp.
void tesseract::Tesseract::PrepareForTessOCR | ( | BLOCK_LIST * | block_list, |
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Definition at line 655 of file tesseractclass.cpp.
void tesseract::Tesseract::PrerecAllWordsPar | ( | const GenericVector< WordData > & | words | ) |
Definition at line 39 of file par_control.cpp.
bool tesseract::Tesseract::process_cmd_win_event | ( | int32_t | cmd_event, |
char * | new_value | ||
) |
Definition at line 387 of file pgedit.cpp.
void tesseract::Tesseract::process_image_event | ( | const SVEvent & | event | ) |
User has done something in the image window - mouse down or up. Work out what it is and do something with it. If DOWN - just remember where it was. If UP - for each word in the selected area do the operation defined by the current mode.
Definition at line 559 of file pgedit.cpp.
void tesseract::Tesseract::process_selected_words | ( | PAGE_RES * | page_res, |
TBOX & | selection_box, | ||
bool(tesseract::Tesseract::*)(PAGE_RES_IT *pr_it) | word_processor | ||
) |
Definition at line 30 of file pagewalk.cpp.
bool tesseract::Tesseract::ProcessTargetWord | ( | const TBOX & | word_box, |
const TBOX & | target_word_box, | ||
const char * | word_config, | ||
int | pass | ||
) |
Definition at line 125 of file control.cpp.
void tesseract::Tesseract::quality_based_rejection | ( | PAGE_RES_IT & | page_res_it, |
bool | good_quality_doc | ||
) |
Definition at line 139 of file docqual.cpp.
void tesseract::Tesseract::read_config_file | ( | const char * | filename, |
SetParamConstraint | constraint | ||
) |
Definition at line 60 of file tessedit.cpp.
bool tesseract::Tesseract::ReassignDiacritics | ( | int | pass, |
PAGE_RES_IT * | pr_it, | ||
bool * | make_next_word_fuzzy | ||
) |
Definition at line 949 of file control.cpp.
bool tesseract::Tesseract::recog_all_words | ( | PAGE_RES * | page_res, |
ETEXT_DESC * | monitor, | ||
const TBOX * | target_word_box, | ||
const char * | word_config, | ||
int | dopasses | ||
) |
Walk the page_res, recognizing all the words. If monitor is not null, it is used as a progress monitor/timeout/cancel. If dopasses is 0, all recognition passes are run, 1 just pass 1, 2 passes2 and higher. If target_word_box is not null, special things are done to words that overlap the target_word_box: if word_config is not null, the word config file is read for just the target word(s), otherwise, on pass 2 and beyond ONLY the target words are processed (Jetsoft modification.) Returns false if we cancelled prematurely.
page_res | page structure |
monitor | progress monitor |
word_config | word_config file |
target_word_box | specifies just to extract a rectangle |
dopasses | 0 - all, 1 just pass 1, 2 passes 2 and higher |
Definition at line 308 of file control.cpp.
bool tesseract::Tesseract::recog_interactive | ( | PAGE_RES_IT * | pr_it | ) |
Recognize a single word in interactive mode.
pr_it | the page results iterator |
Definition at line 82 of file control.cpp.
Definition at line 67 of file control.cpp.
void tesseract::Tesseract::recog_training_segmented | ( | const STRING & | fname, |
PAGE_RES * | page_res, | ||
volatile ETEXT_DESC * | monitor, | ||
FILE * | output_file | ||
) |
Definition at line 82 of file recogtraining.cpp.
void tesseract::Tesseract::recog_word | ( | WERD_RES * | word | ) |
Definition at line 40 of file tfacepp.cpp.
void tesseract::Tesseract::recog_word_recursive | ( | WERD_RES * | word | ) |
Definition at line 104 of file tfacepp.cpp.
bool tesseract::Tesseract::RecogAllWordsPassN | ( | int | pass_n, |
ETEXT_DESC * | monitor, | ||
PAGE_RES_IT * | pr_it, | ||
GenericVector< WordData > * | words | ||
) |
Definition at line 218 of file control.cpp.
void tesseract::Tesseract::recognize_page | ( | STRING & | image_name | ) |
void tesseract::Tesseract::reject_edge_blobs | ( | WERD_RES * | word | ) |
Definition at line 268 of file reject.cpp.
void tesseract::Tesseract::reject_I_1_L | ( | WERD_RES * | word | ) |
Definition at line 198 of file reject.cpp.
void tesseract::Tesseract::reject_mostly_rejects | ( | WERD_RES * | word | ) |
Definition at line 578 of file reject.cpp.
void tesseract::Tesseract::rejection_passes | ( | PAGE_RES * | page_res, |
ETEXT_DESC * | monitor, | ||
const TBOX * | target_word_box, | ||
const char * | word_config | ||
) |
Definition at line 618 of file control.cpp.
Definition at line 587 of file reject.cpp.
void tesseract::Tesseract::ReportFailedBox | ( | int | boxfile_lineno, |
TBOX | box, | ||
const char * | box_ch, | ||
const char * | err_msg | ||
) |
void tesseract::Tesseract::ReportXhtFixResult | ( | bool | accept_new_word, |
float | new_x_ht, | ||
WERD_RES * | word, | ||
WERD_RES * | new_word | ||
) |
Definition at line 1481 of file control.cpp.
void tesseract::Tesseract::ReSegmentByClassification | ( | PAGE_RES * | page_res | ) |
bool tesseract::Tesseract::ResegmentCharBox | ( | PAGE_RES * | page_res, |
const TBOX * | prev_box, | ||
const TBOX & | box, | ||
const TBOX * | next_box, | ||
const char * | correct_text | ||
) |
bool tesseract::Tesseract::ResegmentWordBox | ( | BLOCK_LIST * | block_list, |
const TBOX & | box, | ||
const TBOX * | next_box, | ||
const char * | correct_text | ||
) |
void tesseract::Tesseract::ResetAdaptiveClassifier | ( | ) |
Definition at line 592 of file tesseractclass.cpp.
void tesseract::Tesseract::ResetDocumentDictionary | ( | ) |
Definition at line 602 of file tesseractclass.cpp.
|
inline |
Definition at line 193 of file tesseractclass.h.
int tesseract::Tesseract::RetryWithLanguage | ( | const WordData & | word_data, |
WordRecognizer | recognizer, | ||
bool | debug, | ||
WERD_RES ** | in_word, | ||
PointerVector< WERD_RES > * | best_words | ||
) |
Definition at line 910 of file control.cpp.
|
inline |
Definition at line 270 of file tesseractclass.h.
int16_t tesseract::Tesseract::safe_dict_word | ( | const WERD_RES * | werd_res | ) |
Definition at line 612 of file reject.cpp.
|
inline |
Definition at line 253 of file tesseractclass.h.
|
inline |
Definition at line 256 of file tesseractclass.h.
void tesseract::Tesseract::script_pos_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 740 of file control.cpp.
void tesseract::Tesseract::SearchForText | ( | const GenericVector< BLOB_CHOICE_LIST *> * | choices, |
int | choices_pos, | ||
int | choices_length, | ||
const GenericVector< UNICHAR_ID > & | target_text, | ||
int | text_index, | ||
float | rating, | ||
GenericVector< int > * | segmentation, | ||
float * | best_rating, | ||
GenericVector< int > * | best_segmentation | ||
) |
void tesseract::Tesseract::SearchWords | ( | PointerVector< WERD_RES > * | words | ) |
Definition at line 250 of file linerec.cpp.
int tesseract::Tesseract::SegmentPage | ( | const STRING * | input_file, |
BLOCK_LIST * | blocks, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Segment the page according to the current value of tessedit_pageseg_mode. pix_binary_ is used as the source image and should not be nullptr. On return the blocks list owns all the constructed page layout.
Definition at line 100 of file pagesegmain.cpp.
bool tesseract::Tesseract::SelectGoodDiacriticOutlines | ( | int | pass, |
float | certainty_threshold, | ||
PAGE_RES_IT * | pr_it, | ||
C_BLOB * | blob, | ||
const GenericVector< C_OUTLINE *> & | outlines, | ||
int | num_outlines, | ||
GenericVector< bool > * | ok_outlines | ||
) |
Definition at line 1152 of file control.cpp.
void tesseract::Tesseract::set_done | ( | WERD_RES * | word, |
int16_t | pass | ||
) |
|
inline |
Definition at line 207 of file tesseractclass.h.
|
inline |
Definition at line 213 of file tesseractclass.h.
|
inline |
Definition at line 237 of file tesseractclass.h.
|
inline |
Definition at line 244 of file tesseractclass.h.
void tesseract::Tesseract::set_unlv_suspects | ( | WERD_RES * | word | ) |
Definition at line 280 of file output.cpp.
void tesseract::Tesseract::set_word_fonts | ( | WERD_RES * | word | ) |
set_word_fonts
Get the fonts for the word.
Definition at line 1981 of file control.cpp.
void tesseract::Tesseract::SetBlackAndWhitelist | ( | ) |
Definition at line 609 of file tesseractclass.cpp.
void tesseract::Tesseract::SetEquationDetect | ( | EquationDetect * | detector | ) |
Definition at line 586 of file tesseractclass.cpp.
|
inline |
Definition at line 259 of file tesseractclass.h.
void tesseract::Tesseract::SetupAllWordsPassN | ( | int | pass_n, |
const TBOX * | target_word_box, | ||
const char * | word_config, | ||
PAGE_RES * | page_res, | ||
GenericVector< WordData > * | words | ||
) |
If tesseract is to be run, sets the words up ready for it.
Definition at line 159 of file control.cpp.
PAGE_RES* tesseract::Tesseract::SetupApplyBoxes | ( | const GenericVector< TBOX > & | boxes, |
BLOCK_LIST * | block_list | ||
) |
ColumnFinder * tesseract::Tesseract::SetupPageSegAndDetectOrientation | ( | PageSegMode | pageseg_mode, |
BLOCK_LIST * | blocks, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr, | ||
TO_BLOCK_LIST * | to_blocks, | ||
Pix ** | photo_mask_pix, | ||
Pix ** | music_mask_pix | ||
) |
Sets up auto page segmentation, determines the orientation, and corrects it. Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to facilitate testing. photo_mask_pix is a pointer to a nullptr pointer that will be filled on return with the leptonica photo mask, which must be pixDestroyed by the caller. to_blocks is an empty list that will be filled with (usually a single) block that is used during layout analysis. This ugly API is required because of the possibility of a unlv zone file. TODO(rays) clean this up. See AutoPageSeg for other arguments. The returned ColumnFinder must be deleted after use.
Definition at line 271 of file pagesegmain.cpp.
void tesseract::Tesseract::SetupUniversalFontIds | ( | ) |
Definition at line 441 of file tessedit.cpp.
void tesseract::Tesseract::SetupWordPassN | ( | int | pass_n, |
WordData * | word | ||
) |
Definition at line 182 of file control.cpp.
void tesseract::Tesseract::SetupWordScripts | ( | BLOCK_LIST * | blocks | ) |
|
inline |
Definition at line 241 of file tesseractclass.h.
void tesseract::Tesseract::split_and_recog_word | ( | WERD_RES * | word | ) |
Definition at line 138 of file tfacepp.cpp.
void tesseract::Tesseract::split_word | ( | WERD_RES * | word, |
int | split_pt, | ||
WERD_RES ** | right_piece, | ||
BlamerBundle ** | orig_blamer_bundle | ||
) | const |
Definition at line 176 of file tfacepp.cpp.
bool tesseract::Tesseract::SubAndSuperscriptFix | ( | WERD_RES * | word | ) |
Attempt to split off any high (or low) bits at the ends of the word with poor certainty and recognize them separately. If the certainty gets much better and other sanity checks pass, accept.
This superscript fix is meant to be called in the second pass of recognition when we have tried once and already have a preliminary answer for word.
Definition at line 102 of file superscript.cpp.
bool tesseract::Tesseract::terrible_word_crunch | ( | WERD_RES * | word, |
GARBAGE_LEVEL | garbage_level | ||
) |
Definition at line 504 of file docqual.cpp.
bool tesseract::Tesseract::tess_acceptable_word | ( | WERD_RES * | word | ) |
Definition at line 62 of file tessbox.cpp.
void tesseract::Tesseract::tess_add_doc_word | ( | WERD_CHOICE * | word_choice | ) |
Definition at line 72 of file tessbox.cpp.
void tesseract::Tesseract::tess_segment_pass_n | ( | int | pass_n, |
WERD_RES * | word | ||
) |
Definition at line 32 of file tessbox.cpp.
bool tesseract::Tesseract::TestNewNormalization | ( | int | original_misfits, |
float | baseline_shift, | ||
float | new_x_ht, | ||
WERD_RES * | word, | ||
BLOCK * | block, | ||
ROW * | row | ||
) |
Definition at line 1538 of file control.cpp.
|
inline |
Definition at line 263 of file tesseractclass.h.
void tesseract::Tesseract::TidyUp | ( | PAGE_RES * | page_res | ) |
void tesseract::Tesseract::tilde_crunch | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 418 of file docqual.cpp.
void tesseract::Tesseract::tilde_delete | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 590 of file docqual.cpp.
Definition at line 1504 of file control.cpp.
void tesseract::Tesseract::TrainFromBoxes | ( | const GenericVector< TBOX > & | boxes, |
const GenericVector< STRING > & | texts, | ||
BLOCK_LIST * | block_list, | ||
DocumentData * | training_data | ||
) |
Definition at line 74 of file linerec.cpp.
void tesseract::Tesseract::TrainLineRecognizer | ( | const STRING & | input_imagename, |
const STRING & | output_basename, | ||
BLOCK_LIST * | block_list | ||
) |
Definition at line 43 of file linerec.cpp.
WERD_RES * tesseract::Tesseract::TrySuperscriptSplits | ( | int | num_chopped_leading, |
float | leading_certainty, | ||
ScriptPos | leading_pos, | ||
int | num_chopped_trailing, | ||
float | trailing_certainty, | ||
ScriptPos | trailing_pos, | ||
WERD_RES * | word, | ||
bool * | is_good, | ||
int * | retry_rebuild_leading, | ||
int * | retry_rebuild_trailing | ||
) |
Try splitting off the given number of (chopped) blobs from the front and back of the given word and recognizing the pieces.
[in] | num_chopped_leading | how many chopped blobs from the left end of the word to chop off and try recognizing as a superscript (or subscript) |
[in] | leading_certainty | the (minimum) certainty had by the characters in the original leading section. |
[in] | leading_pos | "super" or "sub" (for debugging) |
[in] | num_chopped_trailing | how many chopped blobs from the right end of the word to chop off and try recognizing as a superscript (or subscript) |
[in] | trailing_certainty | the (minimum) certainty had by the characters in the original trailing section. |
[in] | trailing_pos | "super" or "sub" (for debugging) |
[in] | word | the word to try to chop up. |
[out] | is_good | do we believe our result? |
[out] | retry_rebuild_leading,retry_rebuild_trailing | If non-zero, and !is_good, then the caller may have luck trying to split the returned word with this number of (rebuilt) leading and trailing blobs / unichars. |
Definition at line 383 of file superscript.cpp.
Definition at line 116 of file docqual.cpp.
void tesseract::Tesseract::unrej_good_quality_words | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 161 of file docqual.cpp.
bool tesseract::Tesseract::word_adaptable | ( | WERD_RES * | word, |
uint16_t | mode | ||
) |
Definition at line 35 of file adaptions.cpp.
bool tesseract::Tesseract::word_blank_and_set_display | ( | PAGE_RES_IT * | pr_its | ) |
Definition at line 715 of file pgedit.cpp.
bool tesseract::Tesseract::word_bln_display | ( | PAGE_RES_IT * | pr_it | ) |
Normalize word and display in word window
Definition at line 727 of file pgedit.cpp.
Definition at line 61 of file docqual.cpp.
void tesseract::Tesseract::word_char_quality | ( | WERD_RES * | word, |
ROW * | row, | ||
int16_t * | match_count, | ||
int16_t * | accepted_match_count | ||
) |
Definition at line 93 of file docqual.cpp.
bool tesseract::Tesseract::word_contains_non_1_digit | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 514 of file reject.cpp.
CRUNCH_MODE tesseract::Tesseract::word_deletable | ( | WERD_RES * | word, |
int16_t & | delete_mode | ||
) |
Definition at line 895 of file docqual.cpp.
bool tesseract::Tesseract::word_display | ( | PAGE_RES_IT * | pr_it | ) |
word_display() Word Processor
Display a word according to its display modes
Definition at line 759 of file pgedit.cpp.
bool tesseract::Tesseract::word_dumper | ( | PAGE_RES_IT * | pr_it | ) |
Dump members to the debug window
Definition at line 920 of file pgedit.cpp.
int16_t tesseract::Tesseract::word_outline_errs | ( | WERD_RES * | word | ) |
Definition at line 73 of file docqual.cpp.
bool tesseract::Tesseract::word_set_display | ( | PAGE_RES_IT * | pr_it | ) |
word_set_display() Word processor
Display word according to current display mode settings
Definition at line 944 of file pgedit.cpp.
int16_t tesseract::Tesseract::worst_noise_blob | ( | WERD_RES * | word_res, |
float * | worst_noise_score | ||
) |
Definition at line 710 of file fixspace.cpp.
void tesseract::Tesseract::write_results | ( | PAGE_RES_IT & | page_res_it, |
char | newline_type, | ||
bool | force_eol | ||
) |
Definition at line 105 of file output.cpp.
int tesseract::Tesseract::applybox_debug = 1 |
"Debug level"
Definition at line 850 of file tesseractclass.h.
char* tesseract::Tesseract::applybox_exposure_pattern = ".exp" |
"Exposure value follows this pattern in the image" " filename. The name of the image files are expected" " to be in the form [lang].[fontname].exp[num].tif"
Definition at line 855 of file tesseractclass.h.
bool tesseract::Tesseract::applybox_learn_chars_and_char_frags_mode = false |
"Learn both character fragments (as is done in the" " special low exposure mode) as well as unfragmented" " characters."
Definition at line 859 of file tesseractclass.h.
bool tesseract::Tesseract::applybox_learn_ngrams_mode = false |
"Each bounding box is assumed to contain ngrams. Only" " learn the ngrams whose outlines overlap horizontally."
Definition at line 862 of file tesseractclass.h.
int tesseract::Tesseract::applybox_page = 0 |
"Page number to apply boxes from"
Definition at line 851 of file tesseractclass.h.
int tesseract::Tesseract::bidi_debug = 0 |
"Debug level for BiDi"
Definition at line 849 of file tesseractclass.h.
bool tesseract::Tesseract::bland_unrej = false |
"unrej potential with no checks"
Definition at line 963 of file tesseractclass.h.
char* tesseract::Tesseract::chs_leading_punct = "('`\"" |
"Leading punctuation"
Definition at line 902 of file tesseractclass.h.
char* tesseract::Tesseract::chs_trailing_punct1 = ").,;:?!" |
"1st Trailing punctuation"
Definition at line 903 of file tesseractclass.h.
char* tesseract::Tesseract::chs_trailing_punct2 = ")'`\"" |
"2nd Trailing punctuation"
Definition at line 904 of file tesseractclass.h.
char* tesseract::Tesseract::conflict_set_I_l_1 = "Il1[]" |
"Il1 conflict set"
Definition at line 1083 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_accept_ok = true |
"Use acceptability in okstring"
Definition at line 992 of file tesseractclass.h.
int tesseract::Tesseract::crunch_debug = 0 |
"As it says"
Definition at line 1001 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_cert = -10.0 |
"POTENTIAL crunch cert lt this"
Definition at line 981 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_high_word = 1.5 |
"Del if word gt xht x this above bl"
Definition at line 986 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_low_word = 0.5 |
"Del if word gt xht x this below bl"
Definition at line 987 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_max_ht = 3.0 |
"Del if word ht gt xht x this"
Definition at line 983 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_min_ht = 0.7 |
"Del if word ht lt xht x this"
Definition at line 982 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_min_width = 3.0 |
"Del if word width lt xht x this"
Definition at line 984 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_rating = 60 |
"POTENTIAL crunch rating lt this"
Definition at line 980 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_early_convert_bad_unlv_chs = false |
"Take out ~^ early?"
Definition at line 971 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_early_merge_tess_fails = true |
"Before word crunch?"
Definition at line 970 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_include_numerals = false |
"Fiddle alpha figures"
Definition at line 995 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_leave_accept_strings = false |
"Don't pot crunch sensible strings"
Definition at line 994 of file tesseractclass.h.
int tesseract::Tesseract::crunch_leave_lc_strings = 4 |
"Don't crunch words with long lower case strings"
Definition at line 997 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_leave_ok_strings = true |
"Don't touch sensible strings"
Definition at line 991 of file tesseractclass.h.
int tesseract::Tesseract::crunch_leave_uc_strings = 4 |
"Don't crunch words with long lower case strings"
Definition at line 999 of file tesseractclass.h.
int tesseract::Tesseract::crunch_long_repetitions = 3 |
"Crunch words with long repetitions"
Definition at line 1000 of file tesseractclass.h.
double tesseract::Tesseract::crunch_poor_garbage_cert = -9.0 |
"crunch garbage cert lt this"
Definition at line 975 of file tesseractclass.h.
double tesseract::Tesseract::crunch_poor_garbage_rate = 60 |
"crunch garbage rating lt this"
Definition at line 976 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_pot_garbage = true |
"POTENTIAL crunch garbage"
Definition at line 979 of file tesseractclass.h.
int tesseract::Tesseract::crunch_pot_indicators = 1 |
"How many potential indicators needed"
Definition at line 990 of file tesseractclass.h.
double tesseract::Tesseract::crunch_pot_poor_cert = -8.0 |
"POTENTIAL crunch cert lt this"
Definition at line 978 of file tesseractclass.h.
double tesseract::Tesseract::crunch_pot_poor_rate = 40 |
"POTENTIAL crunch rating lt this"
Definition at line 977 of file tesseractclass.h.
int tesseract::Tesseract::crunch_rating_max = 10 |
"For adj length in rating per ch"
Definition at line 989 of file tesseractclass.h.
double tesseract::Tesseract::crunch_small_outlines_size = 0.6 |
"Small if lt xht x this"
Definition at line 988 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_terrible_garbage = true |
"As it says"
Definition at line 973 of file tesseractclass.h.
double tesseract::Tesseract::crunch_terrible_rating = 80.0 |
"crunch rating lt this"
Definition at line 972 of file tesseractclass.h.
bool tesseract::Tesseract::debug_acceptable_wds = false |
"Dump word pass/fail chk"
Definition at line 901 of file tesseractclass.h.
int tesseract::Tesseract::debug_fix_space_level = 0 |
"Contextual fixspace debug"
Definition at line 1007 of file tesseractclass.h.
int tesseract::Tesseract::debug_noise_removal = 0 |
"Debug reassignment of small outlines"
Definition at line 885 of file tesseractclass.h.
int tesseract::Tesseract::debug_x_ht_level = 0 |
"Reestimate debug"
Definition at line 900 of file tesseractclass.h.
bool tesseract::Tesseract::docqual_excuse_outline_errs = false |
"Allow outline errs in unrejection?"
Definition at line 931 of file tesseractclass.h.
bool tesseract::Tesseract::enable_noise_removal = true |
"Remove and conditionally reassign small outlines when they" " confuse layout analysis, determining diacritics vs noise"
Definition at line 884 of file tesseractclass.h.
char* tesseract::Tesseract::file_type = ".tif" |
"Filename extension"
Definition at line 1090 of file tesseractclass.h.
int tesseract::Tesseract::fixsp_done_mode = 1 |
"What constitues done for spacing"
Definition at line 1006 of file tesseractclass.h.
int tesseract::Tesseract::fixsp_non_noise_limit = 1 |
"How many non-noise blbs either side?"
Definition at line 1003 of file tesseractclass.h.
double tesseract::Tesseract::fixsp_small_outlines_size = 0.28 |
"Small if lt xht x this"
Definition at line 1004 of file tesseractclass.h.
bool tesseract::Tesseract::hocr_font_info = false |
"Add font info to hocr output"
Definition at line 969 of file tesseractclass.h.
bool tesseract::Tesseract::interactive_display_mode = false |
"Run interactively?"
Definition at line 1089 of file tesseractclass.h.
int tesseract::Tesseract::jpg_quality = 85 |
"Set JPEG quality level"
Definition at line 1044 of file tesseractclass.h.
int tesseract::Tesseract::lstm_choice_mode = 0 |
"Allows to include alternative symbols choices in the hOCR output. " "Valid input values are 0, 1 and 2. 0 is the default value. " "With 1 the alternative symbol choices per timestep are included. " "With 2 the alternative symbol choices are accumulated per character."
Definition at line 1125 of file tesseractclass.h.
bool tesseract::Tesseract::lstm_use_matrix = 1 |
"Use ratings matrix/beam searct with lstm"
Definition at line 927 of file tesseractclass.h.
int tesseract::Tesseract::min_characters_to_try = 50 |
"Specify minimum characters to try during OSD"
Definition at line 1047 of file tesseractclass.h.
double tesseract::Tesseract::min_orientation_margin = 7.0 |
"Min acceptable orientation margin"
Definition at line 1099 of file tesseractclass.h.
int tesseract::Tesseract::min_sane_x_ht_pixels = 8 |
"Reject any x-ht lt or eq than this"
Definition at line 1084 of file tesseractclass.h.
int tesseract::Tesseract::multilang_debug_level = 0 |
"Print multilang debug info."
Definition at line 922 of file tesseractclass.h.
double tesseract::Tesseract::noise_cert_basechar = -8.0 |
"Hingepoint for base char certainty"
Definition at line 888 of file tesseractclass.h.
double tesseract::Tesseract::noise_cert_disjoint = -2.5 |
"Hingepoint for disjoint certainty"
Definition at line 891 of file tesseractclass.h.
double tesseract::Tesseract::noise_cert_factor = 0.375 |
"Scaling on certainty diff from Hingepoint"
Definition at line 897 of file tesseractclass.h.
double tesseract::Tesseract::noise_cert_punc = -2.5 |
"Threshold for new punc char certainty"
Definition at line 894 of file tesseractclass.h.
int tesseract::Tesseract::noise_maxperblob = 8 |
"Max diacritics to apply to a blob"
Definition at line 898 of file tesseractclass.h.
int tesseract::Tesseract::noise_maxperword = 16 |
"Max diacritics to apply to a word"
Definition at line 899 of file tesseractclass.h.
char* tesseract::Tesseract::numeric_punctuation = ".," |
"Punct. chs expected WITHIN numbers"
Definition at line 1009 of file tesseractclass.h.
int tesseract::Tesseract::ocr_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT |
"Whether to use the top-line splitting process for Devanagari " "documents while performing ocr."
Definition at line 844 of file tesseractclass.h.
char* tesseract::Tesseract::ok_repeated_ch_non_alphanum_wds = "-?*\075" |
"Allow NN to unrej"
Definition at line 1082 of file tesseractclass.h.
char* tesseract::Tesseract::outlines_2 = "ij!?%\":;" |
"Non standard number of outlines"
Definition at line 929 of file tesseractclass.h.
char* tesseract::Tesseract::outlines_odd = "%| " |
"Non standard number of outlines"
Definition at line 928 of file tesseractclass.h.
char* tesseract::Tesseract::page_separator = "\f" |
"Page separator (default is form feed control character)"
Definition at line 1120 of file tesseractclass.h.
int tesseract::Tesseract::pageseg_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT |
"Whether to use the top-line splitting process for Devanagari " "documents while performing page-segmentation."
Definition at line 840 of file tesseractclass.h.
int tesseract::Tesseract::paragraph_debug_level = 0 |
"Print paragraph debug info."
Definition at line 923 of file tesseractclass.h.
bool tesseract::Tesseract::paragraph_text_based = true |
"Run paragraph detection on the post-text-recognition " "(more accurate)"
Definition at line 926 of file tesseractclass.h.
bool tesseract::Tesseract::poly_allow_detailed_fx = false |
"Allow feature extractors to see the original outline"
Definition at line 1103 of file tesseractclass.h.
bool tesseract::Tesseract::preserve_interword_spaces = false |
"Preserve multiple interword spaces"
Definition at line 1118 of file tesseractclass.h.
double tesseract::Tesseract::quality_blob_pc = 0.0 |
"good_quality_doc gte good blobs limit"
Definition at line 906 of file tesseractclass.h.
double tesseract::Tesseract::quality_char_pc = 0.95 |
"good_quality_doc gte good char limit"
Definition at line 909 of file tesseractclass.h.
int tesseract::Tesseract::quality_min_initial_alphas_reqd = 2 |
"alphas in a good word"
Definition at line 910 of file tesseractclass.h.
double tesseract::Tesseract::quality_outline_pc = 1.0 |
"good_quality_doc lte outline error limit"
Definition at line 908 of file tesseractclass.h.
double tesseract::Tesseract::quality_rej_pc = 0.08 |
"good_quality_doc lte rejection limit"
Definition at line 905 of file tesseractclass.h.
double tesseract::Tesseract::quality_rowrej_pc = 1.1 |
"good_quality_doc gte good char limit"
Definition at line 965 of file tesseractclass.h.
bool tesseract::Tesseract::rej_1Il_trust_permuter_type = true |
"Don't double check"
Definition at line 1073 of file tesseractclass.h.
bool tesseract::Tesseract::rej_1Il_use_dict_word = false |
"Use dictword test"
Definition at line 1072 of file tesseractclass.h.
bool tesseract::Tesseract::rej_alphas_in_number_perm = false |
"Extend permuter check"
Definition at line 1078 of file tesseractclass.h.
bool tesseract::Tesseract::rej_trust_doc_dawg = false |
"Use DOC dawg in 11l conf. detector"
Definition at line 1071 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_good_perm = true |
"Individual rejection control"
Definition at line 1076 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_sensible_wd = false |
"Extend permuter check"
Definition at line 1077 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_tess_accepted = true |
"Individual rejection control"
Definition at line 1074 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_tess_blanks = true |
"Individual rejection control"
Definition at line 1075 of file tesseractclass.h.
double tesseract::Tesseract::rej_whole_of_mostly_reject_word_fract = 0.85 |
"if >this fract"
Definition at line 1079 of file tesseractclass.h.
double tesseract::Tesseract::subscript_max_y_top = 0.5 |
"Maximum top of a character measured as a multiple of x-height " "above the baseline for us to reconsider whether it's a " "subscript."
Definition at line 1028 of file tesseractclass.h.
double tesseract::Tesseract::superscript_bettered_certainty = 0.97 |
"What reduction in " "badness do we think sufficient to choose a superscript over " "what we'd thought. For example, a value of 0.6 means we want " "to reduce badness of certainty by 40%"
Definition at line 1020 of file tesseractclass.h.
int tesseract::Tesseract::superscript_debug = 0 |
"Debug level for sub & superscript fixer"
Definition at line 1013 of file tesseractclass.h.
double tesseract::Tesseract::superscript_min_y_bottom = 0.3 |
"Minimum bottom of a character measured as a multiple of " "x-height above the baseline for us to reconsider whether it's " "a superscript."
Definition at line 1032 of file tesseractclass.h.
double tesseract::Tesseract::superscript_scaledown_ratio = 0.4 |
"A superscript scaled down more than this is unbelievably " "small. For example, 0.3 means we expect the font size to " "be no smaller than 30% of the text line font size."
Definition at line 1024 of file tesseractclass.h.
double tesseract::Tesseract::superscript_worse_certainty = 2.0 |
"How many times worse " "certainty does a superscript position glyph need to be for us " "to try classifying it as a char with a different baseline?"
Definition at line 1016 of file tesseractclass.h.
double tesseract::Tesseract::suspect_accept_rating = -999.9 |
"Accept good rating limit"
Definition at line 1056 of file tesseractclass.h.
bool tesseract::Tesseract::suspect_constrain_1Il = false |
"UNLV keep 1Il chars rejected"
Definition at line 1054 of file tesseractclass.h.
int tesseract::Tesseract::suspect_level = 99 |
"Suspect marker level"
Definition at line 1050 of file tesseractclass.h.
double tesseract::Tesseract::suspect_rating_per_ch = 999.9 |
"Don't touch bad rating limit"
Definition at line 1055 of file tesseractclass.h.
int tesseract::Tesseract::suspect_short_words = 2 |
"Don't Suspect dict wds longer than this"
Definition at line 1053 of file tesseractclass.h.
int tesseract::Tesseract::suspect_space_level = 100 |
"Min suspect level for rejecting spaces"
Definition at line 1052 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_adaption_debug = false |
"Generate and print debug information for adaption"
Definition at line 848 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_ambigs_training = false |
"Perform training for ambiguities"
Definition at line 836 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_bigram_debug = 0 |
"Amount of debug output for bigram " "correction."
Definition at line 881 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_char_blacklist = "" |
"Blacklist of chars not to recognize"
Definition at line 830 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_char_unblacklist = "" |
"List of chars to override tessedit_char_blacklist"
Definition at line 834 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_char_whitelist = "" |
"Whitelist of chars to recognize"
Definition at line 832 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_consistent_reps = true |
"Force all rep chars the same"
Definition at line 1063 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_boxfile = false |
"Output text with boxes"
Definition at line 1085 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_hocr = false |
"Write .html hOCR output file"
Definition at line 1039 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_pdf = false |
"Write .pdf output file"
Definition at line 1041 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_tsv = false |
"Write .tsv output file"
Definition at line 1040 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_txt = false |
"Write .txt output file"
Definition at line 1038 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_block_rejection = false |
"Block and Row stats"
Definition at line 875 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_doc_rejection = false |
"Page stats"
Definition at line 960 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_fonts = false |
"Output font info per char"
Definition at line 874 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_quality_metrics = false |
"Output data to debug file"
Definition at line 962 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_display_outwords = false |
"Draw output words"
Definition at line 863 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dont_blkrej_good_wds = false |
"Use word segmentation quality metric"
Definition at line 949 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dont_rowrej_good_wds = false |
"Use word segmentation quality metric"
Definition at line 951 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dump_choices = false |
"Dump char choices"
Definition at line 864 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dump_pageseg_images = false |
"Dump intermediate images made during page segmentation"
Definition at line 821 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_enable_bigram_correction = true |
"Enable correction based on the word bigram dictionary."
Definition at line 877 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_enable_dict_correction = false |
"Enable single word correction based on the dictionary."
Definition at line 879 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_enable_doc_dict = true |
"Add words to the document dictionary"
Definition at line 873 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_fix_fuzzy_spaces = true |
"Try to improve fuzzy spaces"
Definition at line 867 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_fix_hyphens = true |
"Crunch double hyphens?"
Definition at line 870 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_flip_0O = true |
"Contextual 0O O0 flips"
Definition at line 1066 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_good_doc_still_rowrej_wd = 1.1 |
"rej good doc wd if more than this fraction rejected"
Definition at line 957 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_good_quality_unrej = true |
"Reduce rejection on good docs"
Definition at line 933 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_image_border = 2 |
"Rej blbs near image edge limit"
Definition at line 1080 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_init_config_only = false |
"Only initialize with the config file. Useful if the instance is " "not going to be used for OCR but say only for layout analysis."
Definition at line 1106 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_load_sublangs = "" |
"List of languages to load with this one"
Definition at line 1093 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_lower_flip_hyphen = 1.5 |
"Aspect ratio dot/hyphen test"
Definition at line 1068 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_make_boxes_from_boxes = false |
"Generate more boxes from boxed chars"
Definition at line 817 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_matcher_log = false |
"Log matcher activity"
Definition at line 916 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_minimal_rej_pass1 = false |
"Do minimal rejection on pass 1 output"
Definition at line 914 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_minimal_rejection = false |
"Only reject tess failures"
Definition at line 1057 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_ocr_engine_mode = tesseract::OEM_DEFAULT |
"Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults" " to loading and running the most accurate available."
Definition at line 828 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_override_permuter = true |
"According to dict_word"
Definition at line 1091 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_page_number = -1 |
"-1 -> All pages, else specific page to process"
Definition at line 1087 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_pageseg_mode = PSM_SINGLE_BLOCK |
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," " 5=line, 6=word, 7=char" " (Values from PageSegMode enum in publictypes.h)"
Definition at line 825 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_parallelize = 0 |
"Run in parallel where possible"
Definition at line 1116 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_prefer_joined_punct = false |
"Reward punctuation joins"
Definition at line 1005 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_preserve_blk_rej_perfect_wds = true |
"Only rej partially rejected words in block rejection"
Definition at line 945 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_preserve_min_wd_len = 2 |
"Only preserve wds longer than this"
Definition at line 953 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_preserve_row_rej_perfect_wds = true |
"Only rej partially rejected words in row rejection"
Definition at line 947 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_redo_xheight = true |
"Check/Correct x-height"
Definition at line 871 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_reject_bad_qual_wds = true |
"Reject all bad quality wds"
Definition at line 959 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_block_percent = 45.00 |
"%rej allowed before rej whole block"
Definition at line 938 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_doc_percent = 65.00 |
"%rej allowed before rej whole doc"
Definition at line 936 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_reject_mode = 0 |
"Rejection algorithm"
Definition at line 1064 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_row_percent = 40.00 |
"%rej allowed before rej whole row"
Definition at line 940 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_rejection_debug = false |
"Adaption debug"
Definition at line 1065 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_resegment_from_boxes = false |
"Take segmentation and labeling from box file"
Definition at line 811 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_resegment_from_line_boxes = false |
"Conversion of word/line box file to char box file"
Definition at line 813 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_row_rej_good_docs = true |
"Apply row rejection to good docs"
Definition at line 955 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_tess_adaption_mode = 0x27 |
"Adaptation decision algorithm for tess"
Definition at line 912 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_test_adaption = false |
"Test adaption criteria"
Definition at line 915 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_test_adaption_mode = 3 |
"Adaptation decision algorithm for tess"
Definition at line 918 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_timing_debug = false |
"Print timing stats"
Definition at line 865 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_train_from_boxes = false |
"Generate training data from boxed chars"
Definition at line 815 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_train_line_recognizer = false |
"Break input into lines and remap boxes if present"
Definition at line 819 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_unrej_any_wd = false |
"Don't bother with word plausibility"
Definition at line 869 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_upper_flip_hyphen = 1.8 |
"Aspect ratio dot/hyphen test"
Definition at line 1070 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_use_primary_params_model = false |
"In multilingual mode use params model of the primary language"
Definition at line 1095 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_use_reject_spaces = true |
"Reject spaces?"
Definition at line 934 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_whole_wd_rej_row_percent = 70.00 |
"Number of row rejects in whole word rejects" "which prevents whole row rejection"
Definition at line 943 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_word_for_word = false |
"Make output have exactly one word per WERD"
Definition at line 1060 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_block_separators = false |
"Write block separators in output"
Definition at line 1034 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_images = false |
"Capture the image from the IPE"
Definition at line 1088 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_write_params_to_file = "" |
"Write all parameters to the given file."
Definition at line 846 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_rep_codes = false |
"Write repetition char code"
Definition at line 1036 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_unlv = false |
"Write .unlv output file"
Definition at line 1037 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_zero_kelvin_rejection = false |
"Don't reject ANYTHING AT ALL"
Definition at line 1062 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_zero_rejection = false |
"Don't reject ANYTHING"
Definition at line 1058 of file tesseractclass.h.
bool tesseract::Tesseract::test_pt = false |
"Test for point"
Definition at line 919 of file tesseractclass.h.
double tesseract::Tesseract::test_pt_x = 99999.99 |
"xcoord"
Definition at line 920 of file tesseractclass.h.
double tesseract::Tesseract::test_pt_y = 99999.99 |
"ycoord"
Definition at line 921 of file tesseractclass.h.
bool tesseract::Tesseract::textonly_pdf = false |
"Create PDF with only one invisible text layer"
Definition at line 1043 of file tesseractclass.h.
bool tesseract::Tesseract::textord_equation_detect = false |
"Turn on equation detector"
Definition at line 1107 of file tesseractclass.h.
double tesseract::Tesseract::textord_tabfind_aligned_gap_fraction = 0.75 |
"Fraction of height used as a minimum gap for aligned blobs."
Definition at line 1115 of file tesseractclass.h.
bool tesseract::Tesseract::textord_tabfind_force_vertical_text = false |
"Force using vertical text page mode"
Definition at line 1110 of file tesseractclass.h.
bool tesseract::Tesseract::textord_tabfind_show_vlines = false |
"Debug line finding"
Definition at line 1100 of file tesseractclass.h.
bool tesseract::Tesseract::textord_tabfind_vertical_text = true |
"Enable vertical detection"
Definition at line 1108 of file tesseractclass.h.
double tesseract::Tesseract::textord_tabfind_vertical_text_ratio = 0.5 |
"Fraction of textlines deemed vertical to use vertical page " "mode"
Definition at line 1113 of file tesseractclass.h.
bool tesseract::Tesseract::textord_use_cjk_fp_model = FALSE |
"Use CJK fixed pitch model"
Definition at line 1101 of file tesseractclass.h.
bool tesseract::Tesseract::unlv_tilde_crunching = false |
"Mark v.bad words for tilde crunch"
Definition at line 967 of file tesseractclass.h.
char* tesseract::Tesseract::unrecognised_char = "|" |
"Output char for unidentified blobs"
Definition at line 1049 of file tesseractclass.h.
int tesseract::Tesseract::user_defined_dpi = 0 |
"Specify DPI for input image"
Definition at line 1045 of file tesseractclass.h.
int tesseract::Tesseract::x_ht_acceptance_tolerance = 8 |
"Max allowed deviation of blob top outside of font data"
Definition at line 1011 of file tesseractclass.h.
int tesseract::Tesseract::x_ht_min_change = 8 |
"Min change in xht before actually trying it"
Definition at line 1012 of file tesseractclass.h.