tessapi/3.x/a00736_source.html

 // File:        tesseractclass.h

 // Description: The Tesseract class. It holds/owns everything needed

 //              to run Tesseract on a single language, and also a set of

 //              sub-Tesseracts to run sub-languages. For thread safety, *every*

 //              global variable goes in here, directly, or indirectly.

 //              This makes it safe to run multiple Tesseracts in different

 //              threads in parallel, and keeps the different language

 //              instances separate.

 // Author:      Ray Smith

 // Created:     Fri Mar 07 08:17:01 PST 2008

 //

 // (C) Copyright 2008, Google Inc.

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 // http://www.apache.org/licenses/LICENSE-2.0

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.

 //


 #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__

 #define TESSERACT_CCMAIN_TESSERACTCLASS_H__


 #include "allheaders.h"

 #include "control.h"

 #include "docqual.h"

 #include "devanagari_processing.h"

 #include "genericvector.h"

 #include "params.h"

 #include "ocrclass.h"

 #include "textord.h"

 #include "wordrec.h"


 class BLOB_CHOICE_LIST_CLIST;

 class BLOCK_LIST;

 class CharSamp;

 struct OSResults;

 class PAGE_RES;

 class PAGE_RES_IT;

 struct Pix;

 class ROW;

 class SVMenuNode;

 class TBOX;

 class TO_BLOCK_LIST;

 class WERD;

 class WERD_CHOICE;

 class WERD_RES;


 // Top-level class for all tesseract global instance data.

 // This class either holds or points to all data used by an instance

 // of Tesseract, including the memory allocator. When this is

 // complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT!

 //

 // NOTE to developers: Do not create cyclic dependencies through this class!

 // The directory dependency tree must remain a tree! The keep this clean,

 // lower-level code (eg in ccutil, the bottom level) must never need to

 // know about the content of a higher-level directory.

 // The following scheme will grant the easiest access to lower-level

 // global members without creating a cyclic dependency:

 //

 // Class Hierarchy (^ = inheritance):

 //

 //             CCUtil (ccutil/ccutil.h)

 //                         ^      Members include: UNICHARSET

 //            CUtil (cutil/cutil_class.h)

 //                         ^       Members include: TBLOB*, TEXTBLOCK*

 //           CCStruct (ccstruct/ccstruct.h)

 //                         ^       Members include: Image

 //           Classify (classify/classify.h)

 //                         ^       Members include: Dict

 //             WordRec (wordrec/wordrec.h)

 //                         ^       Members include: WERD*, DENORM*

 //        Tesseract (ccmain/tesseractclass.h)

 //                                 Members include: Pix*, CubeRecoContext*,

 //                                 TesseractCubeCombiner*

 //

 // Other important classes:

 //

 //  TessBaseAPI (api/baseapi.h)

 //                                 Members include: BLOCK_LIST*, PAGE_RES*,

 //                                 Tesseract*, ImageThresholder*

 //  Dict (dict/dict.h)

 //                                 Members include: Image* (private)

 //

 // NOTE: that each level contains members that correspond to global

 // data that is defined (and used) at that level, not necessarily where

 // the type is defined so for instance:

 // BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs");

 // goes inside the Textord class, not the cc_util class.


 namespace tesseract {


 class ColumnFinder;

 #ifndef ANDROID_BUILD

 class CubeLineObject;

 class CubeObject;

 class CubeRecoContext;

 #endif

 class EquationDetect;

 class Tesseract;

 #ifndef ANDROID_BUILD

 class TesseractCubeCombiner;

 #endif


 // A collection of various variables for statistics and debugging.

 struct TesseractStats {

   TesseractStats()

     : adaption_word_number(0),

       doc_blob_quality(0),

       doc_outline_errs(0),

       doc_char_quality(0),

       good_char_count(0),

       doc_good_char_quality(0),

       word_count(0),

       dict_words(0),

       tilde_crunch_written(false),

       last_char_was_newline(true),

       last_char_was_tilde(false),

       write_results_empty_block(true) {}


   inT32 adaption_word_number;

   inT16 doc_blob_quality;

   inT16 doc_outline_errs;

   inT16 doc_char_quality;

   inT16 good_char_count;

   inT16 doc_good_char_quality;

   inT32 word_count;  // count of word in the document

   inT32 dict_words;  // number of dicitionary words in the document

   STRING dump_words_str;  // accumulator used by dump_words()

   // Flags used by write_results()

   bool tilde_crunch_written;

   bool last_char_was_newline;

   bool last_char_was_tilde;

   bool write_results_empty_block;

 };


 // Struct to hold all the pointers to relevant data for processing a word.

 struct WordData {

   WordData() : word(NULL), row(NULL), block(NULL), prev_word(NULL) {}

   explicit WordData(const PAGE_RES_IT& page_res_it)

     : word(page_res_it.word()), row(page_res_it.row()->row),

       block(page_res_it.block()->block), prev_word(NULL) {}

   WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res)

     : word(word_res), row(row_in), block(block_in), prev_word(NULL) {}


   WERD_RES* word;

   ROW* row;

   BLOCK* block;

   WordData* prev_word;

   PointerVector<WERD_RES> lang_words;

 };


 // Definition of a Tesseract WordRecognizer. The WordData provides the context

 // of row/block, in_word holds an initialized, possibly pre-classified word,

 // that the recognizer may or may not consume (but if so it sets *in_word=NULL)

 // and produces one or more output words in out_words, which may be the

 // consumed in_word, or may be generated independently.

 // This api allows both a conventional tesseract classifier to work, or a

 // line-level classifier that generates multiple words from a merged input.

 typedef void (Tesseract::*WordRecognizer)(const WordData& word_data,

                                           WERD_RES** in_word,

                                           PointerVector<WERD_RES>* out_words);


 class Tesseract : public Wordrec {

  public:

   Tesseract();

   ~Tesseract();


   // Clear as much used memory as possible without resetting the adaptive

   // classifier or losing any other classifier data.

   void Clear();

   // Clear all memory of adaption for this and all subclassifiers.

   void ResetAdaptiveClassifier();

   // Clear the document dictionary for this and all subclassifiers.

   void ResetDocumentDictionary();


   // Set the equation detector.

   void SetEquationDetect(EquationDetect* detector);


   // Simple accessors.

   const FCOORD& reskew() const {

     return reskew_;

   }

   // Destroy any existing pix and return a pointer to the pointer.

   Pix** mutable_pix_binary() {

     Clear();

     return &pix_binary_;

   }

   Pix* pix_binary() const {

     return pix_binary_;

   }

   Pix* pix_grey() const {

     return pix_grey_;

   }

   void set_pix_grey(Pix* grey_pix) {

     pixDestroy(&pix_grey_);

     pix_grey_ = grey_pix;

   }

   // Returns a pointer to a Pix representing the best available image of the

   // page. The image will be 8-bit grey if the input was grey or color. Note

   // that in grey 0 is black and 255 is white. If the input was binary, then

   // the returned Pix will be binary. Note that here black is 1 and white is 0.

   // To tell the difference pixGetDepth() will return 8 or 1.

   // In either case, the return value is a borrowed Pix, and should not be

   // deleted or pixDestroyed.

   Pix* BestPix() const {

     return pix_grey_ != NULL ? pix_grey_ : pix_binary_;

   }

   void set_pix_thresholds(Pix* thresholds) {

     pixDestroy(&pix_thresholds_);

     pix_thresholds_ = thresholds;

   }

   int source_resolution() const {

     return source_resolution_;

   }

   void set_source_resolution(int ppi) {

     source_resolution_ = ppi;

   }

   int ImageWidth() const {

     return pixGetWidth(pix_binary_);

   }

   int ImageHeight() const {

     return pixGetHeight(pix_binary_);

   }

   Pix* scaled_color() const {

     return scaled_color_;

   }

   int scaled_factor() const {

     return scaled_factor_;

   }

   void SetScaledColor(int factor, Pix* color) {

     scaled_factor_ = factor;

     scaled_color_ = color;

   }

   const Textord& textord() const {

     return textord_;

   }

   Textord* mutable_textord() {

     return &textord_;

   }


   bool right_to_left() const {

     return right_to_left_;

   }

   int num_sub_langs() const {

     return sub_langs_.size();

   }

   Tesseract* get_sub_lang(int index) const {

     return sub_langs_[index];

   }

   // Returns true if any language uses Tesseract (as opposed to cube).

   bool AnyTessLang() const {

     if (tessedit_ocr_engine_mode != OEM_CUBE_ONLY) return true;

     for (int i = 0; i < sub_langs_.size(); ++i) {

       if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_CUBE_ONLY)

         return true;

     }

     return false;

   }


   void SetBlackAndWhitelist();


   // Perform steps to prepare underlying binary image/other data structures for

   // page segmentation. Uses the strategy specified in the global variable

   // pageseg_devanagari_split_strategy for perform splitting while preparing for

   // page segmentation.

   void PrepareForPageseg();


   // Perform steps to prepare underlying binary image/other data structures for

   // Tesseract OCR. The current segmentation is required by this method.

   // Uses the strategy specified in the global variable

   // ocr_devanagari_split_strategy for performing splitting while preparing for

   // Tesseract ocr.

   void PrepareForTessOCR(BLOCK_LIST* block_list,

                          Tesseract* osd_tess, OSResults* osr);


   int SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,

                   Tesseract* osd_tess, OSResults* osr);

   void SetupWordScripts(BLOCK_LIST* blocks);

   int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,

                   TO_BLOCK_LIST* to_blocks, BLOBNBOX_LIST* diacritic_blobs,

                   Tesseract* osd_tess, OSResults* osr);

   ColumnFinder* SetupPageSegAndDetectOrientation(

       PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,

       OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,

       Pix** music_mask_pix);

   // par_control.cpp

   void PrerecAllWordsPar(const GenericVector<WordData>& words);


   bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,

                          const char* word_config, int pass);

   // Sets up the words ready for whichever engine is to be run

   void SetupAllWordsPassN(int pass_n,

                           const TBOX* target_word_box,

                           const char* word_config,

                           PAGE_RES* page_res,

                           GenericVector<WordData>* words);

   // Sets up the single word ready for whichever engine is to be run.

   void SetupWordPassN(int pass_n, WordData* word);

   // Runs word recognition on all the words.

   bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,

                           PAGE_RES_IT* pr_it,

                           GenericVector<WordData>* words);

   bool recog_all_words(PAGE_RES* page_res,

                        ETEXT_DESC* monitor,

                        const TBOX* target_word_box,

                        const char* word_config,

                        int dopasses);

   void rejection_passes(PAGE_RES* page_res,

                         ETEXT_DESC* monitor,

                         const TBOX* target_word_box,

                         const char* word_config);

   void bigram_correction_pass(PAGE_RES *page_res);

   void blamer_pass(PAGE_RES* page_res);

   // Sets script positions and detects smallcaps on all output words.

   void script_pos_pass(PAGE_RES* page_res);

   // Helper to recognize the word using the given (language-specific) tesseract.

   // Returns positive if this recognizer found more new best words than the

   // number kept from best_words.

   int RetryWithLanguage(const WordData& word_data,

                         WordRecognizer recognizer,

                         WERD_RES** in_word,

                         PointerVector<WERD_RES>* best_words);

   // Moves good-looking "noise"/diacritics from the reject list to the main

   // blob list on the current word. Returns true if anything was done, and

   // sets make_next_word_fuzzy if blob(s) were added to the end of the word.

   bool ReassignDiacritics(int pass, PAGE_RES_IT* pr_it,

                           bool* make_next_word_fuzzy);

   // Attempts to put noise/diacritic outlines into the blobs that they overlap.

   // Input: a set of noisy outlines that probably belong to the real_word.

   // Output: outlines that overlapped blobs are set to NULL and put back into

   // the word, either in the blobs or in the reject list.

   void AssignDiacriticsToOverlappingBlobs(

       const GenericVector<C_OUTLINE*>& outlines, int pass, WERD* real_word,

       PAGE_RES_IT* pr_it, GenericVector<bool>* word_wanted,

       GenericVector<bool>* overlapped_any_blob,

       GenericVector<C_BLOB*>* target_blobs);

   // Attempts to assign non-overlapping outlines to their nearest blobs or

   // make new blobs out of them.

   void AssignDiacriticsToNewBlobs(const GenericVector<C_OUTLINE*>& outlines,

                                   int pass, WERD* real_word, PAGE_RES_IT* pr_it,

                                   GenericVector<bool>* word_wanted,

                                   GenericVector<C_BLOB*>* target_blobs);

   // Starting with ok_outlines set to indicate which outlines overlap the blob,

   // chooses the optimal set (approximately) and returns true if any outlines

   // are desired, in which case ok_outlines indicates which ones.

   bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold,

                                    PAGE_RES_IT* pr_it, C_BLOB* blob,

                                    const GenericVector<C_OUTLINE*>& outlines,

                                    int num_outlines,

                                    GenericVector<bool>* ok_outlines);

   // Classifies the given blob plus the outlines flagged by ok_outlines, undoes

   // the inclusion of the outlines, and returns the certainty of the raw choice.

   float ClassifyBlobPlusOutlines(const GenericVector<bool>& ok_outlines,

                                  const GenericVector<C_OUTLINE*>& outlines,

                                  int pass_n, PAGE_RES_IT* pr_it, C_BLOB* blob,

                                  STRING* best_str);

   // Classifies the given blob (part of word_data->word->word) as an individual

   // word, using languages, chopper etc, returning only the certainty of the

   // best raw choice, and undoing all the work done to fake out the word.

   float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT* pr_it, C_BLOB* blob,

                            STRING* best_str, float* c2);

   void classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,

                                   WordData* word_data);

   void classify_word_pass1(const WordData& word_data,

                            WERD_RES** in_word,

                            PointerVector<WERD_RES>* out_words);

   void recog_pseudo_word(PAGE_RES* page_res,  // blocks to check

                          TBOX &selection_box);


   void fix_rep_char(PAGE_RES_IT* page_res_it);


   ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET& char_set,

                                               const char *s,

                                               const char *lengths);

   void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);

   void classify_word_pass2(const WordData& word_data,

                            WERD_RES** in_word,

                            PointerVector<WERD_RES>* out_words);

   void ReportXhtFixResult(bool accept_new_word, float new_x_ht,

                           WERD_RES* word, WERD_RES* new_word);

   bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);

   bool TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row);

   // Runs recognition with the test baseline shift and x-height and returns true

   // if there was an improvement in recognition result.

   bool TestNewNormalization(int original_misfits, float baseline_shift,

                             float new_x_ht, WERD_RES *word, BLOCK* block,

                             ROW *row);

   BOOL8 recog_interactive(PAGE_RES_IT* pr_it);


   // Set fonts of this word.

   void set_word_fonts(WERD_RES *word);

   void font_recognition_pass(PAGE_RES* page_res);

   void dictionary_correction_pass(PAGE_RES* page_res);

   BOOL8 check_debug_pt(WERD_RES *word, int location);


   bool SubAndSuperscriptFix(WERD_RES *word_res);

   void GetSubAndSuperscriptCandidates(const WERD_RES *word,

                                       int *num_rebuilt_leading,

                                       ScriptPos *leading_pos,

                                       float *leading_certainty,

                                       int *num_rebuilt_trailing,

                                       ScriptPos *trailing_pos,

                                       float *trailing_certainty,

                                       float *avg_certainty,

                                       float *unlikely_threshold);

   WERD_RES *TrySuperscriptSplits(int num_chopped_leading,

                                  float leading_certainty,

                                  ScriptPos leading_pos,

                                  int num_chopped_trailing,

                                  float trailing_certainty,

                                  ScriptPos trailing_pos,

                                  WERD_RES *word,

                                  bool *is_good,

                                  int *retry_leading,

                                  int *retry_trailing);

   bool BelievableSuperscript(bool debug,

                              const WERD_RES &word,

                              float certainty_threshold,

                              int *left_ok,

                              int *right_ok) const;


 #ifndef ANDROID_BUILD

   bool init_cube_objects(bool load_combiner,

                          TessdataManager *tessdata_manager);

   // Iterates through tesseract's results and calls cube on each word,

   // combining the results with the existing tesseract result.

   void run_cube_combiner(PAGE_RES *page_res);

   // Recognizes a single word using (only) cube. Compatible with

   // Tesseract's classify_word_pass1/classify_word_pass2.

   void cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word);

   // Cube recognizer to recognize a single word as with classify_word_pass1

   // but also returns the cube object in case the combiner is needed.

   CubeObject* cube_recognize_word(BLOCK* block, WERD_RES* word);

   // Combines the cube and tesseract results for a single word, leaving the

   // result in tess_word.

   void cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,

                         WERD_RES* tess_word);

   // Call cube on the current word, and write the result to word.

   // Sets up a fake result  and returns false if something goes wrong.

   bool cube_recognize(CubeObject *cube_obj, BLOCK* block, WERD_RES *word);

   void fill_werd_res(const BoxWord& cube_box_word,

                      const char* cube_best_str,

                      WERD_RES* tess_werd_res);

   bool extract_cube_state(CubeObject* cube_obj, int* num_chars,

                           Boxa** char_boxes, CharSamp*** char_samples);

   bool create_cube_box_word(Boxa *char_boxes, int num_chars,

                             TBOX word_box, BoxWord* box_word);

 #endif


   void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box);

   void write_results(PAGE_RES_IT &page_res_it,  // full info

                      char newline_type,         // type of newline

                      BOOL8 force_eol            // override tilde crunch?

                     );

   void set_unlv_suspects(WERD_RES *word);

   UNICHAR_ID get_rep_char(WERD_RES *word);  // what char is repeated?

   BOOL8 acceptable_number_string(const char *s,

                                  const char *lengths);

   inT16 count_alphanums(const WERD_CHOICE &word);

   inT16 count_alphas(const WERD_CHOICE &word);

   void read_config_file(const char *filename, SetParamConstraint constraint);

   // Initialize for potentially a set of languages defined by the language

   // string and recursively any additional languages required by any language

   // traineddata file (via tessedit_load_sublangs in its config) that is loaded.

   // See init_tesseract_internal for args.

   int init_tesseract(const char *arg0,

                      const char *textbase,

                      const char *language,

                      OcrEngineMode oem,

                      char **configs,

                      int configs_size,

                      const GenericVector<STRING> *vars_vec,

                      const GenericVector<STRING> *vars_values,

                      bool set_only_init_params);

   int init_tesseract(const char *datapath,

                      const char *language,

                      OcrEngineMode oem) {

     return init_tesseract(datapath, NULL, language, oem,

                           NULL, 0, NULL, NULL, false);

   }

   // Common initialization for a single language.

   // arg0 is the datapath for the tessdata directory, which could be the

   // path of the tessdata directory with no trailing /, or (if tessdata

   // lives in the same directory as the executable, the path of the executable,

   // hence the name arg0.

   // textbase is an optional output file basename (used only for training)

   // language is the language code to load.

   // oem controls which engine(s) will operate on the image

   // configs (argv) is an array of config filenames to load variables from.

   // May be NULL.

   // configs_size (argc) is the number of elements in configs.

   // vars_vec is an optional vector of variables to set.

   // vars_values is an optional corresponding vector of values for the variables

   // in vars_vec.

   // If set_only_init_params is true, then only the initialization variables

   // will be set.

   int init_tesseract_internal(const char *arg0,

                               const char *textbase,

                               const char *language,

                               OcrEngineMode oem,

                               char **configs,

                               int configs_size,

                               const GenericVector<STRING> *vars_vec,

                               const GenericVector<STRING> *vars_values,

                               bool set_only_init_params);


   // Set the universal_id member of each font to be unique among all

   // instances of the same font loaded.

   void SetupUniversalFontIds();


   int init_tesseract_lm(const char *arg0,

                         const char *textbase,

                         const char *language);


   void recognize_page(STRING& image_name);

   void end_tesseract();


   bool init_tesseract_lang_data(const char *arg0,

                                 const char *textbase,

                                 const char *language,

                                 OcrEngineMode oem,

                                 char **configs,

                                 int configs_size,

                                 const GenericVector<STRING> *vars_vec,

                                 const GenericVector<STRING> *vars_values,

                                 bool set_only_init_params);


   void ParseLanguageString(const char* lang_str,

                            GenericVector<STRING>* to_load,

                            GenericVector<STRING>* not_to_load);


   SVMenuNode *build_menu_new();

   #ifndef GRAPHICS_DISABLED

   void pgeditor_main(int width, int height, PAGE_RES* page_res);

   #endif  // GRAPHICS_DISABLED

   void process_image_event( // action in image win

                            const SVEvent &event);

   BOOL8 process_cmd_win_event(                 // UI command semantics

                               inT32 cmd_event,  // which menu item?

                               char *new_value   // any prompt data

                              );

   void debug_word(PAGE_RES* page_res, const TBOX &selection_box);

   void do_re_display(

       BOOL8 (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it));

   BOOL8 word_display(PAGE_RES_IT* pr_it);

   BOOL8 word_bln_display(PAGE_RES_IT* pr_it);

   BOOL8 word_blank_and_set_display(PAGE_RES_IT* pr_its);

   BOOL8 word_set_display(PAGE_RES_IT* pr_it);

   // #ifndef GRAPHICS_DISABLED

   BOOL8 word_dumper(PAGE_RES_IT* pr_it);

   // #endif  // GRAPHICS_DISABLED

   void blob_feature_display(PAGE_RES* page_res, const TBOX& selection_box);

   // make rej map for word

   void make_reject_map(WERD_RES *word, ROW *row, inT16 pass);

   BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);

   inT16 first_alphanum_index(const char *word,

                              const char *word_lengths);

   inT16 first_alphanum_offset(const char *word,

                               const char *word_lengths);

   inT16 alpha_count(const char *word,

                     const char *word_lengths);

   BOOL8 word_contains_non_1_digit(const char *word,

                                   const char *word_lengths);

   void dont_allow_1Il(WERD_RES *word);

   inT16 count_alphanums(  //how many alphanums

                         WERD_RES *word);

   void flip_0O(WERD_RES *word);

   BOOL8 non_0_digit(const UNICHARSET& ch_set, UNICHAR_ID unichar_id);

   BOOL8 non_O_upper(const UNICHARSET& ch_set, UNICHAR_ID unichar_id);

   BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row);

   void nn_match_word(  //Match a word

                      WERD_RES *word,

                      ROW *row);

   void nn_recover_rejects(WERD_RES *word, ROW *row);

   void set_done(  //set done flag

                 WERD_RES *word,

                 inT16 pass);

   inT16 safe_dict_word(const WERD_RES *werd_res);  // is best_choice in dict?

   void flip_hyphens(WERD_RES *word);

   void reject_I_1_L(WERD_RES *word);

   void reject_edge_blobs(WERD_RES *word);

   void reject_mostly_rejects(WERD_RES *word);

   BOOL8 word_adaptable(  //should we adapt?

                        WERD_RES *word,

                        uinT16 mode);


   void recog_word_recursive(WERD_RES* word);

   void recog_word(WERD_RES *word);

   void split_and_recog_word(WERD_RES* word);

   void split_word(WERD_RES *word,

                   int split_pt,

                   WERD_RES **right_piece,

                   BlamerBundle **orig_blamer_bundle) const;

   void join_words(WERD_RES *word,

                   WERD_RES *word2,

                   BlamerBundle *orig_bb) const;

   BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position);

   inT16 eval_word_spacing(WERD_RES_LIST &word_res_list);

   void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block);

   inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);

   void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);

   void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);

   void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block);

   void fix_fuzzy_spaces(                      //find fuzzy words

                         ETEXT_DESC *monitor,  //progress monitor

                         inT32 word_count,     //count of words in doc

                         PAGE_RES *page_res);

   void dump_words(WERD_RES_LIST &perm, inT16 score,

                   inT16 mode, BOOL8 improved);

   BOOL8 fixspace_thinks_word_done(WERD_RES *word);

   inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score);

   float blob_noise_score(TBLOB *blob);

   void break_noisiest_blob_word(WERD_RES_LIST &words);

   GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word);

   BOOL8 potential_word_crunch(WERD_RES *word,

                               GARBAGE_LEVEL garbage_level,

                               BOOL8 ok_dict_word);

   void tilde_crunch(PAGE_RES_IT &page_res_it);

   void unrej_good_quality_words(  //unreject potential

                                 PAGE_RES_IT &page_res_it);

   void doc_and_block_rejection(  //reject big chunks

                                PAGE_RES_IT &page_res_it,

                                BOOL8 good_quality_doc);

   void quality_based_rejection(PAGE_RES_IT &page_res_it,

                                BOOL8 good_quality_doc);

   void convert_bad_unlv_chs(WERD_RES *word_res);

   void tilde_delete(PAGE_RES_IT &page_res_it);

   inT16 word_blob_quality(WERD_RES *word, ROW *row);

   void word_char_quality(WERD_RES *word, ROW *row, inT16 *match_count,

                          inT16 *accepted_match_count);

   void unrej_good_chs(WERD_RES *word, ROW *row);

   inT16 count_outline_errs(char c, inT16 outline_count);

   inT16 word_outline_errs(WERD_RES *word);

   BOOL8 terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level);

   CRUNCH_MODE word_deletable(WERD_RES *word, inT16 &delete_mode);

   inT16 failure_count(WERD_RES *word);

   BOOL8 noise_outlines(TWERD *word);

   void

   process_selected_words (

       PAGE_RES* page_res, // blocks to check

       //function to call

       TBOX & selection_box,

       BOOL8 (tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it));

   void tess_add_doc_word(                          //test acceptability

                          WERD_CHOICE *word_choice  //after context

                         );

   void tess_segment_pass_n(int pass_n, WERD_RES *word);

   bool tess_acceptable_word(WERD_RES *word);


   // Applies the box file based on the image name fname, and resegments

   // the words in the block_list (page), with:

   // blob-mode: one blob per line in the box file, words as input.

   // word/line-mode: one blob per space-delimited unit after the #, and one word

   // per line in the box file. (See comment above for box file format.)

   // If find_segmentation is true, (word/line mode) then the classifier is used

   // to re-segment words/lines to match the space-delimited truth string for

   // each box. In this case, the input box may be for a word or even a whole

   // text line, and the output words will contain multiple blobs corresponding

   // to the space-delimited input string.

   // With find_segmentation false, no classifier is needed, but the chopper

   // can still be used to correctly segment touching characters with the help

   // of the input boxes.

   // In the returned PAGE_RES, the WERD_RES are setup as they would be returned

   // from normal classification, ie. with a word, chopped_word, rebuild_word,

   // seam_array, denorm, box_word, and best_state, but NO best_choice or

   // raw_choice, as they would require a UNICHARSET, which we aim to avoid.

   // Instead, the correct_text member of WERD_RES is set, and this may be later

   // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords

   // is not required before calling ApplyBoxTraining.

   PAGE_RES* ApplyBoxes(const STRING& fname, bool find_segmentation,

                        BLOCK_LIST *block_list);


   // Any row xheight that is significantly different from the median is set

   // to the median.

   void PreenXHeights(BLOCK_LIST *block_list);


   // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:

   // All fuzzy spaces are removed, and all the words are maximally chopped.

   PAGE_RES* SetupApplyBoxes(const GenericVector<TBOX>& boxes,

                             BLOCK_LIST *block_list);

   // Tests the chopper by exhaustively running chop_one_blob.

   // The word_res will contain filled chopped_word, seam_array, denorm,

   // box_word and best_state for the maximally chopped word.

   void MaximallyChopWord(const GenericVector<TBOX>& boxes,

                          BLOCK* block, ROW* row, WERD_RES* word_res);

   // Gather consecutive blobs that match the given box into the best_state

   // and corresponding correct_text.

   // Fights over which box owns which blobs are settled by pre-chopping and

   // applying the blobs to box or next_box with the least non-overlap.

   // Returns false if the box was in error, which can only be caused by

   // failing to find an appropriate blob for a box.

   // This means that occasionally, blobs may be incorrectly segmented if the

   // chopper fails to find a suitable chop point.

   bool ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,

                         const TBOX& box, const TBOX& next_box,

                         const char* correct_text);

   // Consume all source blobs that strongly overlap the given box,

   // putting them into a new word, with the correct_text label.

   // Fights over which box owns which blobs are settled by

   // applying the blobs to box or next_box with the least non-overlap.

   // Returns false if the box was in error, which can only be caused by

   // failing to find an overlapping blob for a box.

   bool ResegmentWordBox(BLOCK_LIST *block_list,

                         const TBOX& box, const TBOX& next_box,

                         const char* correct_text);

   // Resegments the words by running the classifier in an attempt to find the

   // correct segmentation that produces the required string.

   void ReSegmentByClassification(PAGE_RES* page_res);

   // Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.

   // Returns false if an invalid UNICHAR_ID is encountered.

   bool ConvertStringToUnichars(const char* utf8,

                                GenericVector<UNICHAR_ID>* class_ids);

   // Resegments the word to achieve the target_text from the classifier.

   // Returns false if the re-segmentation fails.

   // Uses brute-force combination of upto kMaxGroupSize adjacent blobs, and

   // applies a full search on the classifier results to find the best classified

   // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity

   // substitutions ARE used.

   bool FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,

                         WERD_RES* word_res);

   // Recursive helper to find a match to the target_text (from text_index

   // position) in the choices (from choices_pos position).

   // Choices is an array of GenericVectors, of length choices_length, with each

   // element representing a starting position in the word, and the

   // GenericVector holding classification results for a sequence of consecutive

   // blobs, with index 0 being a single blob, index 1 being 2 blobs etc.

   void SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,

                      int choices_pos, int choices_length,

                      const GenericVector<UNICHAR_ID>& target_text,

                      int text_index,

                      float rating, GenericVector<int>* segmentation,

                      float* best_rating, GenericVector<int>* best_segmentation);

   // Counts up the labelled words and the blobs within.

   // Deletes all unused or emptied words, counting the unused ones.

   // Resets W_BOL and W_EOL flags correctly.

   // Builds the rebuild_word and rebuilds the box_word.

   void TidyUp(PAGE_RES* page_res);

   // Logs a bad box by line in the box file and box coords.

   void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch,

                        const char *err_msg);

   // Creates a fake best_choice entry in each WERD_RES with the correct text.

   void CorrectClassifyWords(PAGE_RES* page_res);

   // Call LearnWord to extract features for labelled blobs within each word.

   // Features are stored in an internal buffer.

   void ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res);


   // Returns the number of misfit blob tops in this word.

   int CountMisfitTops(WERD_RES *word_res);

   // Returns a new x-height in pixels (original image coords) that is

   // maximally compatible with the result in word_res.

   // Returns 0.0f if no x-height is found that is better than the current

   // estimate.

   float ComputeCompatibleXheight(WERD_RES *word_res, float* baseline_shift);

   // TODO(ocr-team): Find and remove obsolete parameters.

   BOOL_VAR_H(tessedit_resegment_from_boxes, false,

              "Take segmentation and labeling from box file");

   BOOL_VAR_H(tessedit_resegment_from_line_boxes, false,

               "Conversion of word/line box file to char box file");

   BOOL_VAR_H(tessedit_train_from_boxes, false,

              "Generate training data from boxed chars");

   BOOL_VAR_H(tessedit_make_boxes_from_boxes, false,

              "Generate more boxes from boxed chars");

   BOOL_VAR_H(tessedit_dump_pageseg_images, false,

              "Dump intermediate images made during page segmentation");

   INT_VAR_H(tessedit_pageseg_mode, PSM_SINGLE_BLOCK,

             "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"

             " 5=line, 6=word, 7=char"

             " (Values from PageSegMode enum in publictypes.h)");

   INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,

             "Which OCR engine(s) to run (Tesseract, Cube, both). Defaults"

             " to loading and running only Tesseract (no Cube, no combiner)."

             " (Values from OcrEngineMode enum in tesseractclass.h)");

   STRING_VAR_H(tessedit_char_blacklist, "",

                "Blacklist of chars not to recognize");

   STRING_VAR_H(tessedit_char_whitelist, "",

                "Whitelist of chars to recognize");

   STRING_VAR_H(tessedit_char_unblacklist, "",

                "List of chars to override tessedit_char_blacklist");

   BOOL_VAR_H(tessedit_ambigs_training, false,

              "Perform training for ambiguities");

   INT_VAR_H(pageseg_devanagari_split_strategy,

             tesseract::ShiroRekhaSplitter::NO_SPLIT,

             "Whether to use the top-line splitting process for Devanagari "

             "documents while performing page-segmentation.");

   INT_VAR_H(ocr_devanagari_split_strategy,

             tesseract::ShiroRekhaSplitter::NO_SPLIT,

             "Whether to use the top-line splitting process for Devanagari "

             "documents while performing ocr.");

   STRING_VAR_H(tessedit_write_params_to_file, "",

                "Write all parameters to the given file.");

   BOOL_VAR_H(tessedit_adaption_debug, false,

              "Generate and print debug information for adaption");

   INT_VAR_H(bidi_debug, 0, "Debug level for BiDi");

   INT_VAR_H(applybox_debug, 1, "Debug level");

   INT_VAR_H(applybox_page, 0, "Page number to apply boxes from");

   STRING_VAR_H(applybox_exposure_pattern, ".exp",

                "Exposure value follows this pattern in the image"

                " filename. The name of the image files are expected"

                " to be in the form [lang].[fontname].exp[num].tif");

   BOOL_VAR_H(applybox_learn_chars_and_char_frags_mode, false,

              "Learn both character fragments (as is done in the"

              " special low exposure mode) as well as unfragmented"

              " characters.");

   BOOL_VAR_H(applybox_learn_ngrams_mode, false,

              "Each bounding box is assumed to contain ngrams. Only"

              " learn the ngrams whose outlines overlap horizontally.");

   BOOL_VAR_H(tessedit_display_outwords, false, "Draw output words");

   BOOL_VAR_H(tessedit_dump_choices, false, "Dump char choices");

   BOOL_VAR_H(tessedit_timing_debug, false, "Print timing stats");

   BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,

              "Try to improve fuzzy spaces");

   BOOL_VAR_H(tessedit_unrej_any_wd, false,

              "Dont bother with word plausibility");

   BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?");

   BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height");

   BOOL_VAR_H(tessedit_enable_doc_dict, true,

              "Add words to the document dictionary");

   BOOL_VAR_H(tessedit_debug_fonts, false, "Output font info per char");

   BOOL_VAR_H(tessedit_debug_block_rejection, false, "Block and Row stats");

   BOOL_VAR_H(tessedit_enable_bigram_correction, true,

              "Enable correction based on the word bigram dictionary.");

   BOOL_VAR_H(tessedit_enable_dict_correction, false,

              "Enable single word correction based on the dictionary.");

   INT_VAR_H(tessedit_bigram_debug, 0, "Amount of debug output for bigram "

             "correction.");

   BOOL_VAR_H(enable_noise_removal, true,

              "Remove and conditionally reassign small outlines when they"

              " confuse layout analysis, determining diacritics vs noise");

   INT_VAR_H(debug_noise_removal, 0, "Debug reassignment of small outlines");

   // Worst (min) certainty, for which a diacritic is allowed to make the base

   // character worse and still be included.

   double_VAR_H(noise_cert_basechar, -8.0, "Hingepoint for base char certainty");

   // Worst (min) certainty, for which a non-overlapping diacritic is allowed to

   // make the base character worse and still be included.

   double_VAR_H(noise_cert_disjoint, -2.5, "Hingepoint for disjoint certainty");

   // Worst (min) certainty, for which a diacritic is allowed to make a new

   // stand-alone blob.

   double_VAR_H(noise_cert_punc, -2.5, "Threshold for new punc char certainty");

   // Factor of certainty margin for adding diacritics to not count as worse.

   double_VAR_H(noise_cert_factor, 0.375,

                "Scaling on certainty diff from Hingepoint");

   INT_VAR_H(noise_maxperblob, 8, "Max diacritics to apply to a blob");

   INT_VAR_H(noise_maxperword, 16, "Max diacritics to apply to a word");

   INT_VAR_H(debug_x_ht_level, 0, "Reestimate debug");

   BOOL_VAR_H(debug_acceptable_wds, false, "Dump word pass/fail chk");

   STRING_VAR_H(chs_leading_punct, "('`\"", "Leading punctuation");

   STRING_VAR_H(chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation");

   STRING_VAR_H(chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation");

   double_VAR_H(quality_rej_pc, 0.08, "good_quality_doc lte rejection limit");

   double_VAR_H(quality_blob_pc, 0.0, "good_quality_doc gte good blobs limit");

   double_VAR_H(quality_outline_pc, 1.0,

                "good_quality_doc lte outline error limit");

   double_VAR_H(quality_char_pc, 0.95, "good_quality_doc gte good char limit");

   INT_VAR_H(quality_min_initial_alphas_reqd, 2, "alphas in a good word");

   INT_VAR_H(tessedit_tess_adaption_mode, 0x27,

             "Adaptation decision algorithm for tess");

   BOOL_VAR_H(tessedit_minimal_rej_pass1, false,

              "Do minimal rejection on pass 1 output");

   BOOL_VAR_H(tessedit_test_adaption, false, "Test adaption criteria");

   BOOL_VAR_H(tessedit_matcher_log, false, "Log matcher activity");

   INT_VAR_H(tessedit_test_adaption_mode, 3,

             "Adaptation decision algorithm for tess");

   BOOL_VAR_H(test_pt, false, "Test for point");

   double_VAR_H(test_pt_x, 99999.99, "xcoord");

   double_VAR_H(test_pt_y, 99999.99, "ycoord");

   INT_VAR_H(paragraph_debug_level, 0, "Print paragraph debug info.");

   BOOL_VAR_H(paragraph_text_based, true,

              "Run paragraph detection on the post-text-recognition "

              "(more accurate)");

   INT_VAR_H(cube_debug_level, 1, "Print cube debug info.");

   STRING_VAR_H(outlines_odd, "%| ", "Non standard number of outlines");

   STRING_VAR_H(outlines_2, "ij!?%\":;", "Non standard number of outlines");

   BOOL_VAR_H(docqual_excuse_outline_errs, false,

              "Allow outline errs in unrejection?");

   BOOL_VAR_H(tessedit_good_quality_unrej, true,

              "Reduce rejection on good docs");

   BOOL_VAR_H(tessedit_use_reject_spaces, true, "Reject spaces?");

   double_VAR_H(tessedit_reject_doc_percent, 65.00,

                "%rej allowed before rej whole doc");

   double_VAR_H(tessedit_reject_block_percent, 45.00,

                "%rej allowed before rej whole block");

   double_VAR_H(tessedit_reject_row_percent, 40.00,

                "%rej allowed before rej whole row");

   double_VAR_H(tessedit_whole_wd_rej_row_percent, 70.00,

                "Number of row rejects in whole word rejects"

                "which prevents whole row rejection");

   BOOL_VAR_H(tessedit_preserve_blk_rej_perfect_wds, true,

              "Only rej partially rejected words in block rejection");

   BOOL_VAR_H(tessedit_preserve_row_rej_perfect_wds, true,

              "Only rej partially rejected words in row rejection");

   BOOL_VAR_H(tessedit_dont_blkrej_good_wds, false,

              "Use word segmentation quality metric");

   BOOL_VAR_H(tessedit_dont_rowrej_good_wds, false,

              "Use word segmentation quality metric");

   INT_VAR_H(tessedit_preserve_min_wd_len, 2,

             "Only preserve wds longer than this");

   BOOL_VAR_H(tessedit_row_rej_good_docs, true,

              "Apply row rejection to good docs");

   double_VAR_H(tessedit_good_doc_still_rowrej_wd, 1.1,

                "rej good doc wd if more than this fraction rejected");

   BOOL_VAR_H(tessedit_reject_bad_qual_wds, true,

              "Reject all bad quality wds");

   BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats");

   BOOL_VAR_H(tessedit_debug_quality_metrics, false,

              "Output data to debug file");

   BOOL_VAR_H(bland_unrej, false, "unrej potential with no chekcs");

   double_VAR_H(quality_rowrej_pc, 1.1,

                "good_quality_doc gte good char limit");

   BOOL_VAR_H(unlv_tilde_crunching, true,

              "Mark v.bad words for tilde crunch");

   BOOL_VAR_H(hocr_font_info, false,

              "Add font info to hocr output");

   BOOL_VAR_H(crunch_early_merge_tess_fails, true, "Before word crunch?");

   BOOL_VAR_H(crunch_early_convert_bad_unlv_chs, false, "Take out ~^ early?");

   double_VAR_H(crunch_terrible_rating, 80.0, "crunch rating lt this");

   BOOL_VAR_H(crunch_terrible_garbage, true, "As it says");

   double_VAR_H(crunch_poor_garbage_cert, -9.0,

                "crunch garbage cert lt this");

   double_VAR_H(crunch_poor_garbage_rate, 60, "crunch garbage rating lt this");

   double_VAR_H(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this");

   double_VAR_H(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this");

   BOOL_VAR_H(crunch_pot_garbage, true, "POTENTIAL crunch garbage");

   double_VAR_H(crunch_del_rating, 60, "POTENTIAL crunch rating lt this");

   double_VAR_H(crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this");

   double_VAR_H(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this");

   double_VAR_H(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this");

   double_VAR_H(crunch_del_min_width, 3.0, "Del if word width lt xht x this");

   double_VAR_H(crunch_del_high_word, 1.5,

                "Del if word gt xht x this above bl");

   double_VAR_H(crunch_del_low_word, 0.5, "Del if word gt xht x this below bl");

   double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this");

   INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch");

   INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed");

   BOOL_VAR_H(crunch_leave_ok_strings, true, "Dont touch sensible strings");

   BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring");

   BOOL_VAR_H(crunch_leave_accept_strings, false,

              "Dont pot crunch sensible strings");

   BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures");

   INT_VAR_H(crunch_leave_lc_strings, 4,

             "Dont crunch words with long lower case strings");

   INT_VAR_H(crunch_leave_uc_strings, 4,

             "Dont crunch words with long lower case strings");

   INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions");

   INT_VAR_H(crunch_debug, 0, "As it says");

   INT_VAR_H(fixsp_non_noise_limit, 1,

             "How many non-noise blbs either side?");

   double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this");

   BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctation joins");

   INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing");

   INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug");

   STRING_VAR_H(numeric_punctuation, ".,",

                "Punct. chs expected WITHIN numbers");

   INT_VAR_H(x_ht_acceptance_tolerance, 8,

             "Max allowed deviation of blob top outside of font data");

   INT_VAR_H(x_ht_min_change, 8, "Min change in xht before actually trying it");

   INT_VAR_H(superscript_debug, 0, "Debug level for sub & superscript fixer");

   double_VAR_H(superscript_worse_certainty, 2.0, "How many times worse "

                "certainty does a superscript position glyph need to be for us "

                "to try classifying it as a char with a different baseline?");

   double_VAR_H(superscript_bettered_certainty, 0.97, "What reduction in "

                "badness do we think sufficient to choose a superscript over "

                "what we'd thought.  For example, a value of 0.6 means we want "

                "to reduce badness of certainty by 40%");

   double_VAR_H(superscript_scaledown_ratio, 0.4,

                "A superscript scaled down more than this is unbelievably "

                "small.  For example, 0.3 means we expect the font size to "

                "be no smaller than 30% of the text line font size.");

   double_VAR_H(subscript_max_y_top, 0.5,

                "Maximum top of a character measured as a multiple of x-height "

                "above the baseline for us to reconsider whether it's a "

                "subscript.");

   double_VAR_H(superscript_min_y_bottom, 0.3,

               "Minimum bottom of a character measured as a multiple of "

               "x-height above the baseline for us to reconsider whether it's "

               "a superscript.");

   BOOL_VAR_H(tessedit_write_block_separators, false,

              "Write block separators in output");

   BOOL_VAR_H(tessedit_write_rep_codes, false,

              "Write repetition char code");

   BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");

   BOOL_VAR_H(tessedit_create_txt, true, "Write .txt output file");

   BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");

   BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");

   STRING_VAR_H(unrecognised_char, "|",

                "Output char for unidentified blobs");

   INT_VAR_H(suspect_level, 99, "Suspect marker level");

   INT_VAR_H(suspect_space_level, 100,

             "Min suspect level for rejecting spaces");

   INT_VAR_H(suspect_short_words, 2,

             "Dont Suspect dict wds longer than this");

   BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");

   double_VAR_H(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit");

   double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");

   BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures");

   BOOL_VAR_H(tessedit_zero_rejection, false, "Dont reject ANYTHING");

   BOOL_VAR_H(tessedit_word_for_word, false,

              "Make output have exactly one word per WERD");

   BOOL_VAR_H(tessedit_zero_kelvin_rejection, false,

              "Dont reject ANYTHING AT ALL");

   BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same");

   INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm");

   BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug");

   BOOL_VAR_H(tessedit_flip_0O, true, "Contextual 0O O0 flips");

   double_VAR_H(tessedit_lower_flip_hyphen, 1.5,

                "Aspect ratio dot/hyphen test");

   double_VAR_H(tessedit_upper_flip_hyphen, 1.8,

                "Aspect ratio dot/hyphen test");

   BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector");

   BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test");

   BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Dont double check");

   BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control");

   BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control");

   BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control");

   BOOL_VAR_H(rej_use_sensible_wd, false, "Extend permuter check");

   BOOL_VAR_H(rej_alphas_in_number_perm, false, "Extend permuter check");

   double_VAR_H(rej_whole_of_mostly_reject_word_fract, 0.85, "if >this fract");

   INT_VAR_H(tessedit_image_border, 2, "Rej blbs near image edge limit");

   STRING_VAR_H(ok_repeated_ch_non_alphanum_wds, "-?*\075",

                "Allow NN to unrej");

   STRING_VAR_H(conflict_set_I_l_1, "Il1[]", "Il1 conflict set");

   INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this");

   BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes");

   INT_VAR_H(tessedit_page_number, -1,

             "-1 -> All pages, else specifc page to process");

   BOOL_VAR_H(tessedit_write_images, false, "Capture the image from the IPE");

   BOOL_VAR_H(interactive_display_mode, false, "Run interactively?");

   STRING_VAR_H(file_type, ".tif", "Filename extension");

   BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word");

   INT_VAR_H(tessdata_manager_debug_level, 0,

             "Debug level for TessdataManager functions.");

   STRING_VAR_H(tessedit_load_sublangs, "",

                "List of languages to load with this one");

   BOOL_VAR_H(tessedit_use_primary_params_model, false,

              "In multilingual mode use params model of the primary language");

   // Min acceptable orientation margin (difference in scores between top and 2nd

   // choice in OSResults::orientations) to believe the page orientation.

   double_VAR_H(min_orientation_margin, 7.0,

                "Min acceptable orientation margin");

   BOOL_VAR_H(textord_tabfind_show_vlines, false, "Debug line finding");

   BOOL_VAR_H(textord_use_cjk_fp_model, FALSE, "Use CJK fixed pitch model");

   BOOL_VAR_H(poly_allow_detailed_fx, false,

              "Allow feature extractors to see the original outline");

   BOOL_VAR_H(tessedit_init_config_only, false,

              "Only initialize with the config file. Useful if the instance is "

              "not going to be used for OCR but say only for layout analysis.");

   BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");

   BOOL_VAR_H(textord_tabfind_vertical_text, true, "Enable vertical detection");

   BOOL_VAR_H(textord_tabfind_force_vertical_text, false,

              "Force using vertical text page mode");

   double_VAR_H(textord_tabfind_vertical_text_ratio, 0.5,

                "Fraction of textlines deemed vertical to use vertical page "

                "mode");

   double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75,

                "Fraction of height used as a minimum gap for aligned blobs.");

   INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");

   BOOL_VAR_H(preserve_interword_spaces, false,

              "Preserve multiple interword spaces");

   BOOL_VAR_H(include_page_breaks, false,

              "Include page separator string in output text after each "

              "image/page.");

   STRING_VAR_H(page_separator, "\f",

                "Page separator (default is form feed control character)");


   // The following parameters were deprecated and removed from their original

   // locations. The parameters are temporarily kept here to give Tesseract

   // users a chance to updated their [lang].traineddata and config files

   // without introducing failures during Tesseract initialization.

   // TODO(ocr-team): remove these parameters from the code once we are

   // reasonably sure that Tesseract users have updated their data files.

   //

   // BEGIN DEPRECATED PARAMETERS

   BOOL_VAR_H(textord_tabfind_vertical_horizontal_mix, true,

              "find horizontal lines such as headers in vertical page mode");

   INT_VAR_H(tessedit_ok_mode, 5, "Acceptance decision algorithm");

   BOOL_VAR_H(load_fixed_length_dawgs, true,  "Load fixed length"

              " dawgs (e.g. for non-space delimited languages)");

   INT_VAR_H(segment_debug, 0, "Debug the whole segmentation process");

   BOOL_VAR_H(permute_debug, 0, "char permutation debug");

   double_VAR_H(bestrate_pruning_factor, 2.0, "Multiplying factor of"

                " current best rate to prune other hypotheses");

   BOOL_VAR_H(permute_script_word, 0,

              "Turn on word script consistency permuter");

   BOOL_VAR_H(segment_segcost_rating, 0,

              "incorporate segmentation cost in word rating?");

   double_VAR_H(segment_reward_script, 0.95,

                "Score multipler for script consistency within a word. "

                "Being a 'reward' factor, it should be <= 1. "

                "Smaller value implies bigger reward.");

   BOOL_VAR_H(permute_fixed_length_dawg, 0,

              "Turn on fixed-length phrasebook search permuter");

   BOOL_VAR_H(permute_chartype_word, 0,

              "Turn on character type (property) consistency permuter");

   double_VAR_H(segment_reward_chartype, 0.97,

                "Score multipler for char type consistency within a word. ");

   double_VAR_H(segment_reward_ngram_best_choice, 0.99,

                "Score multipler for ngram permuter's best choice"

                " (only used in the Han script path).");

   BOOL_VAR_H(ngram_permuter_activated, false,

              "Activate character-level n-gram-based permuter");

   BOOL_VAR_H(permute_only_top, false, "Run only the top choice permuter");

   INT_VAR_H(language_model_fixed_length_choices_depth, 3,

             "Depth of blob choice lists to explore"

             " when fixed length dawgs are on");

   BOOL_VAR_H(use_new_state_cost, FALSE,

              "use new state cost heuristics for segmentation state evaluation");

   double_VAR_H(heuristic_segcost_rating_base, 1.25,

                "base factor for adding segmentation cost into word rating."

                "It's a multiplying factor, the larger the value above 1, "

                "the bigger the effect of segmentation cost.");

   double_VAR_H(heuristic_weight_rating, 1,

                "weight associated with char rating in combined cost of state");

   double_VAR_H(heuristic_weight_width, 1000.0,

                "weight associated with width evidence in combined cost of"

                " state");

   double_VAR_H(heuristic_weight_seamcut, 0,

                "weight associated with seam cut in combined cost of state");

   double_VAR_H(heuristic_max_char_wh_ratio, 2.0,

                "max char width-to-height ratio allowed in segmentation");

   BOOL_VAR_H(enable_new_segsearch, false,

              "Enable new segmentation search path.");

   double_VAR_H(segsearch_max_fixed_pitch_char_wh_ratio, 2.0,

                "Maximum character width-to-height ratio for"

                "fixed pitch fonts");

   // END DEPRECATED PARAMETERS


   FILE *init_recog_training(const STRING &fname);

   void recog_training_segmented(const STRING &fname,

                                 PAGE_RES *page_res,

                                 volatile ETEXT_DESC *monitor,

                                 FILE *output_file);

   void ambigs_classify_and_output(const char *label,

                                   PAGE_RES_IT* pr_it,

                                   FILE *output_file);


 #ifndef ANDROID_BUILD

   inline CubeRecoContext *GetCubeRecoContext() { return cube_cntxt_; }

 #endif


  private:

   // The filename of a backup config file. If not null, then we currently

   // have a temporary debug config file loaded, and backup_config_file_

   // will be loaded, and set to null when debug is complete.

   const char* backup_config_file_;

   // The filename of a config file to read when processing a debug word.

   STRING word_config_;

   // Image used for input to layout analysis and tesseract recognition.

   // May be modified by the ShiroRekhaSplitter to eliminate the top-line.

   Pix* pix_binary_;

   // Unmodified image used for input to cube. Always valid.

   Pix* cube_binary_;

   // Grey-level input image if the input was not binary, otherwise NULL.

   Pix* pix_grey_;

   // Thresholds that were used to generate the thresholded image from grey.

   Pix* pix_thresholds_;

   // Input image resolution after any scaling. The resolution is not well

   // transmitted by operations on Pix, so we keep an independent record here.

   int source_resolution_;

   // The shiro-rekha splitter object which is used to split top-lines in

   // Devanagari words to provide a better word and grapheme segmentation.

   ShiroRekhaSplitter splitter_;

   // Page segmentation/layout

   Textord textord_;

   // True if the primary language uses right_to_left reading order.

   bool right_to_left_;

   Pix* scaled_color_;

   int scaled_factor_;

   FCOORD deskew_;

   FCOORD reskew_;

   TesseractStats stats_;

   // Sub-languages to be tried in addition to this.

   GenericVector<Tesseract*> sub_langs_;

   // Most recently used Tesseract out of this and sub_langs_. The default

   // language for the next word.

   Tesseract* most_recently_used_;

   // The size of the font table, ie max possible font id + 1.

   int font_table_size_;

 #ifndef ANDROID_BUILD

   // Cube objects.

   CubeRecoContext* cube_cntxt_;

   TesseractCubeCombiner *tess_cube_combiner_;

 #endif

   // Equation detector. Note: this pointer is NOT owned by the class.

   EquationDetect* equ_detect_;

 };


 }  // namespace tesseract


 #endif  // TESSERACT_CCMAIN_TESSERACTCLASS_H__

tesseract::Tesseract::tessedit_dump_pageseg_images
bool tessedit_dump_pageseg_images
Definition: tesseractclass.h:787

tesseract::Tesseract::superscript_scaledown_ratio
double superscript_scaledown_ratio
Definition: tesseractclass.h:990

TBLOB
Definition: blobs.h:261

tesseract::Tesseract::ResegmentCharBox
bool ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box, const TBOX &next_box, const char *correct_text)
Definition: applybox.cpp:340

tesseract::Tesseract::test_pt_x
double test_pt_x
Definition: tesseractclass.h:887

tesseract::Tesseract::convert_bad_unlv_chs
void convert_bad_unlv_chs(WERD_RES *word_res)
Definition: docqual.cpp:663

tesseract::Tesseract::run_cube_combiner
void run_cube_combiner(PAGE_RES *page_res)
Definition: cube_control.cpp:193

tesseract::Tesseract::docqual_excuse_outline_errs
bool docqual_excuse_outline_errs
Definition: tesseractclass.h:897

tesseract::Tesseract
Definition: tesseractclass.h:170

tesseract::Tesseract::quality_blob_pc
double quality_blob_pc
Definition: tesseractclass.h:873

tesseract::Tesseract::TrainedXheightFix
bool TrainedXheightFix(WERD_RES *word, BLOCK *block, ROW *row)
Definition: control.cpp:1402

tesseract::Tesseract::count_alphas
inT16 count_alphas(const WERD_CHOICE &word)
Definition: output.cpp:400

tesseract::Tesseract::AssignDiacriticsToNewBlobs
void AssignDiacriticsToNewBlobs(const GenericVector< C_OUTLINE * > &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< C_BLOB * > *target_blobs)
Definition: control.cpp:1029

tesseract::Tesseract::first_alphanum_offset
inT16 first_alphanum_offset(const char *word, const char *word_lengths)
Definition: reject.cpp:482

tesseract::Tesseract::CorrectClassifyWords
void CorrectClassifyWords(PAGE_RES *page_res)
Definition: applybox.cpp:772

tesseract::Tesseract::word_dumper
BOOL8 word_dumper(PAGE_RES_IT *pr_it)
Definition: pgedit.cpp:922

tesseract::TesseractStats::last_char_was_tilde
bool last_char_was_tilde
Definition: tesseractclass.h:139

tesseract::TesseractStats::adaption_word_number
inT32 adaption_word_number
Definition: tesseractclass.h:127

tesseract::Tesseract::break_noisiest_blob_word
void break_noisiest_blob_word(WERD_RES_LIST &words)
Definition: fixspace.cpp:616

tesseract::Tesseract::right_to_left
bool right_to_left() const
Definition: tesseractclass.h:248

tesseract::Tesseract::match_word_pass_n
void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK *block)
Definition: control.cpp:1549

tesseract::Tesseract::reject_mostly_rejects
void reject_mostly_rejects(WERD_RES *word)
Definition: reject.cpp:573

tesseract::Tesseract::recog_interactive
BOOL8 recog_interactive(PAGE_RES_IT *pr_it)
Definition: control.cpp:84

tesseract::Tesseract::mutable_textord
Textord * mutable_textord()
Definition: tesseractclass.h:244

tesseract::Tesseract::word_outline_errs
inT16 word_outline_errs(WERD_RES *word)
Definition: docqual.cpp:77

tesseract::Tesseract::x_ht_acceptance_tolerance
int x_ht_acceptance_tolerance
Definition: tesseractclass.h:977

tesseract::Tesseract::RetryWithLanguage
int RetryWithLanguage(const WordData &word_data, WordRecognizer recognizer, WERD_RES **in_word, PointerVector< WERD_RES > *best_words)
Definition: control.cpp:869

tesseract::Tesseract::worst_noise_blob
inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score)
Definition: fixspace.cpp:681

tesseract::Tesseract::Tesseract
Tesseract()
Definition: tesseractclass.cpp:57

WERD_RES
Definition: pageres.h:155

tesseract::Tesseract::tessedit_display_outwords
bool tessedit_display_outwords
Definition: tesseractclass.h:830

tesseract::Tesseract::numeric_punctuation
char * numeric_punctuation
Definition: tesseractclass.h:975

tesseract::Tesseract::tessedit_train_from_boxes
bool tessedit_train_from_boxes
Definition: tesseractclass.h:783

tesseract::Tesseract::crunch_include_numerals
bool crunch_include_numerals
Definition: tesseractclass.h:961

control.h

tesseract::Tesseract::word_adaptable
BOOL8 word_adaptable(WERD_RES *word, uinT16 mode)
Definition: adaptions.cpp:45

tesseract::Tesseract::rej_use_good_perm
bool rej_use_good_perm
Definition: tesseractclass.h:1036

tesseract::Tesseract::classify_word_pass2
void classify_word_pass2(const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)
Definition: control.cpp:1488

tesseract::Tesseract::noise_maxperword
int noise_maxperword
Definition: tesseractclass.h:866

tesseract::Tesseract::crunch_terrible_rating
double crunch_terrible_rating
Definition: tesseractclass.h:938

tesseract::Tesseract::crunch_del_max_ht
double crunch_del_max_ht
Definition: tesseractclass.h:949

tesseract::Tesseract::recog_training_segmented
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
Definition: recogtraining.cpp:79

tesseract::Tesseract::suspect_short_words
int suspect_short_words
Definition: tesseractclass.h:1013

devanagari_processing.h

tesseract::Tesseract::make_reject_map
void make_reject_map(WERD_RES *word, ROW *row, inT16 pass)

tesseract::Tesseract::fill_werd_res
void fill_werd_res(const BoxWord &cube_box_word, const char *cube_best_str, WERD_RES *tess_werd_res)
Definition: cube_control.cpp:413

tesseract::TesseractStats::dict_words
inT32 dict_words
Definition: tesseractclass.h:134

tesseract::Tesseract::ClassifyBlobPlusOutlines
float ClassifyBlobPlusOutlines(const GenericVector< bool > &ok_outlines, const GenericVector< C_OUTLINE * > &outlines, int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str)
Definition: control.cpp:1190

tesseract::Tesseract::potential_word_crunch
BOOL8 potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level, BOOL8 ok_dict_word)
Definition: docqual.cpp:545

tesseract::Tesseract::x_ht_min_change
int x_ht_min_change
Definition: tesseractclass.h:978

tesseract::Tesseract::one_ell_conflict
BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map)
Definition: reject.cpp:292

tesseract::Tesseract::paragraph_text_based
bool paragraph_text_based
Definition: tesseractclass.h:892

tesseract::Tesseract::applybox_learn_ngrams_mode
bool applybox_learn_ngrams_mode
Definition: tesseractclass.h:829

tesseract::Tesseract::SelectGoodDiacriticOutlines
bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold, PAGE_RES_IT *pr_it, C_BLOB *blob, const GenericVector< C_OUTLINE * > &outlines, int num_outlines, GenericVector< bool > *ok_outlines)
Definition: control.cpp:1105

tesseract::Textord
Definition: textord.h:68

tesseract::Tesseract::ResetDocumentDictionary
void ResetDocumentDictionary()
Definition: tesseractclass.cpp:668

tesseract::SetParamConstraint
SetParamConstraint
Definition: params.h:36

tesseract::Tesseract::set_pix_thresholds
void set_pix_thresholds(Pix *thresholds)
Definition: tesseractclass.h:215

tesseract::Tesseract::debug_word
void debug_word(PAGE_RES *page_res, const TBOX &selection_box)
Definition: pgedit.cpp:641

tesseract::Tesseract::rejection_passes
void rejection_passes(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config)
Definition: control.cpp:590

tesseract::Tesseract::dont_allow_1Il
void dont_allow_1Il(WERD_RES *word)
Definition: reject.cpp:526

tesseract::Tesseract::create_cube_box_word
bool create_cube_box_word(Boxa *char_boxes, int num_chars, TBOX word_box, BoxWord *box_word)
Definition: cube_control.cpp:116

tesseract::Tesseract::SubAndSuperscriptFix
bool SubAndSuperscriptFix(WERD_RES *word_res)
Definition: superscript.cpp:101

tesseract::Tesseract::tessedit_create_boxfile
bool tessedit_create_boxfile
Definition: tesseractclass.h:1045

tesseract::Tesseract::noise_cert_basechar
double noise_cert_basechar
Definition: tesseractclass.h:855

PAGE_RES_IT
Definition: pageres.h:656

tesseract::Tesseract::segment_reward_chartype
double segment_reward_chartype
Definition: tesseractclass.h:1117

tesseract::Tesseract::outlines_odd
char * outlines_odd
Definition: tesseractclass.h:894

tesseract::Tesseract::tessedit_zero_kelvin_rejection
bool tessedit_zero_kelvin_rejection
Definition: tesseractclass.h:1022

tesseract::Tesseract::bestrate_pruning_factor
double bestrate_pruning_factor
Definition: tesseractclass.h:1103

tesseract::Tesseract::tessedit_test_adaption
bool tessedit_test_adaption
Definition: tesseractclass.h:882

INT_VAR_H
#define INT_VAR_H(name, val, comment)
Definition: params.h:265

tesseract::Tesseract::fixsp_done_mode
int fixsp_done_mode
Definition: tesseractclass.h:972

tesseract::Tesseract::outlines_2
char * outlines_2
Definition: tesseractclass.h:895

tesseract::Tesseract::test_pt_y
double test_pt_y
Definition: tesseractclass.h:888

tesseract::Tesseract::SetBlackAndWhitelist
void SetBlackAndWhitelist()
Definition: tesseractclass.cpp:675

tesseract::Tesseract::ImageHeight
int ImageHeight() const
Definition: tesseractclass.h:228

tesseract::Tesseract::segment_reward_ngram_best_choice
double segment_reward_ngram_best_choice
Definition: tesseractclass.h:1120

tesseract::Tesseract::ok_repeated_ch_non_alphanum_wds
char * ok_repeated_ch_non_alphanum_wds
Definition: tesseractclass.h:1042

tesseract::CubeRecoContext
Definition: cube_reco_context.h:43

tesseract::TesseractStats::doc_char_quality
inT16 doc_char_quality
Definition: tesseractclass.h:130

tesseract::Tesseract::tessedit_fix_hyphens
bool tessedit_fix_hyphens
Definition: tesseractclass.h:837

tesseract::Tesseract::mutable_pix_binary
Pix ** mutable_pix_binary()
Definition: tesseractclass.h:191

tesseract::Tesseract::page_separator
char * page_separator
Definition: tesseractclass.h:1085

tesseract::Tesseract::tessedit_ok_mode
int tessedit_ok_mode
Definition: tesseractclass.h:1097

tesseract::WordData::prev_word
WordData * prev_word
Definition: tesseractclass.h:155

tesseract::Tesseract::ClassifyBlobAsWord
float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str, float *c2)
Definition: control.cpp:1232

tesseract::Tesseract::word_display
BOOL8 word_display(PAGE_RES_IT *pr_it)
Definition: pgedit.cpp:761

tesseract::Tesseract::ResetAdaptiveClassifier
void ResetAdaptiveClassifier()
Definition: tesseractclass.cpp:660

tesseract::Tesseract::noise_cert_disjoint
double noise_cert_disjoint
Definition: tesseractclass.h:858

tesseract::Tesseract::quality_min_initial_alphas_reqd
int quality_min_initial_alphas_reqd
Definition: tesseractclass.h:877

tesseract::WordData::lang_words
PointerVector< WERD_RES > lang_words
Definition: tesseractclass.h:156

tesseract::ColumnFinder
Definition: colfind.h:52

tesseract::Tesseract::ReassignDiacritics
bool ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next_word_fuzzy)
Definition: control.cpp:910

tesseract::Tesseract::tessedit_debug_block_rejection
bool tessedit_debug_block_rejection
Definition: tesseractclass.h:842

ocrclass.h

tesseract::Tesseract::tessedit_write_params_to_file
char * tessedit_write_params_to_file
Definition: tesseractclass.h:813

tesseract::Tesseract::textord_equation_detect
bool textord_equation_detect
Definition: tesseractclass.h:1069

tesseract::ScriptPos
ScriptPos
Definition: ratngs.h:260

tesseract::Tesseract::crunch_long_repetitions
int crunch_long_repetitions
Definition: tesseractclass.h:966

tesseract::Tesseract::blamer_pass
void blamer_pass(PAGE_RES *page_res)
Definition: control.cpp:686

tesseract::Tesseract::tessedit_minimal_rej_pass1
bool tessedit_minimal_rej_pass1
Definition: tesseractclass.h:881

tesseract::Tesseract::tessedit_write_unlv
bool tessedit_write_unlv
Definition: tesseractclass.h:1003

tesseract::TesseractStats::doc_good_char_quality
inT16 doc_good_char_quality
Definition: tesseractclass.h:132

tesseract::Tesseract::output_pass
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box)
Definition: output.cpp:68

tesseract::Tesseract::tessedit_consistent_reps
bool tessedit_consistent_reps
Definition: tesseractclass.h:1023

tesseract::Tesseract::recognize_page
void recognize_page(STRING &image_name)

tesseract::Tesseract::quality_rowrej_pc
double quality_rowrej_pc
Definition: tesseractclass.h:931

tesseract::TessdataManager
Definition: tessdatamanager.h:133

tesseract::Tesseract::recog_word
void recog_word(WERD_RES *word)
Definition: tfacepp.cpp:46

tesseract::PointerVector< WERD_RES >

tesseract::Tesseract::tessedit_char_blacklist
char * tessedit_char_blacklist
Definition: tesseractclass.h:797

BOOL8
unsigned char BOOL8
Definition: host.h:113

tesseract::Tesseract::rej_alphas_in_number_perm
bool rej_alphas_in_number_perm
Definition: tesseractclass.h:1038

tesseract::Tesseract::split_word
void split_word(WERD_RES *word, int split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const
Definition: tfacepp.cpp:182

tesseract::PageSegMode
PageSegMode
Definition: publictypes.h:151

tesseract::WordData::word
WERD_RES * word
Definition: tesseractclass.h:152

tesseract::Tesseract::tessedit_timing_debug
bool tessedit_timing_debug
Definition: tesseractclass.h:832

tesseract::Tesseract::permute_debug
bool permute_debug
Definition: tesseractclass.h:1101

tesseract::Tesseract::tessedit_reject_mode
int tessedit_reject_mode
Definition: tesseractclass.h:1024

tesseract::Tesseract::set_pix_grey
void set_pix_grey(Pix *grey_pix)
Definition: tesseractclass.h:201

tesseract::Tesseract::ComputeCompatibleXheight
float ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift)
Definition: fixxht.cpp:101

tesseract::Tesseract::chs_leading_punct
char * chs_leading_punct
Definition: tesseractclass.h:869

tesseract::Tesseract::set_source_resolution
void set_source_resolution(int ppi)
Definition: tesseractclass.h:222

tesseract::Tesseract::tessedit_create_txt
bool tessedit_create_txt
Definition: tesseractclass.h:1004

tesseract::CCUtil::tessdata_manager
TessdataManager tessdata_manager
Definition: ccutil.h:71

tesseract::Tesseract::tessedit_use_reject_spaces
bool tessedit_use_reject_spaces
Definition: tesseractclass.h:900

tesseract::TesseractStats::doc_outline_errs
inT16 doc_outline_errs
Definition: tesseractclass.h:129

WERD_CHOICE
Definition: ratngs.h:271

tesseract::Tesseract::ReportXhtFixResult
void ReportXhtFixResult(bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word)
Definition: control.cpp:1381

tesseract::Tesseract::crunch_leave_ok_strings
bool crunch_leave_ok_strings
Definition: tesseractclass.h:957

tesseract::Tesseract::tilde_crunch
void tilde_crunch(PAGE_RES_IT &page_res_it)
Definition: docqual.cpp:421

tesseract::TesseractStats::doc_blob_quality
inT16 doc_blob_quality
Definition: tesseractclass.h:128

mode
CMD_EVENTS mode
Definition: pgedit.cpp:116

tesseract::Tesseract::get_sub_lang
Tesseract * get_sub_lang(int index) const
Definition: tesseractclass.h:254

ETEXT_DESC
Definition: ocrclass.h:112

tesseract::Tesseract::debug_fix_space_level
int debug_fix_space_level
Definition: tesseractclass.h:973

tesseract::Tesseract::PrepareForPageseg
void PrepareForPageseg()
Definition: tesseractclass.cpp:690

tesseract::Tesseract::source_resolution
int source_resolution() const
Definition: tesseractclass.h:219

SVEvent
Definition: scrollview.h:61

tesseract::Tesseract::init_recog_training
FILE * init_recog_training(const STRING &fname)
Definition: recogtraining.cpp:36

tesseract::Tesseract::quality_rej_pc
double quality_rej_pc
Definition: tesseractclass.h:872

tesseract::Tesseract::tessedit_write_images
bool tessedit_write_images
Definition: tesseractclass.h:1048

tesseract::WordData::WordData
WordData(const PAGE_RES_IT &page_res_it)
Definition: tesseractclass.h:146

tesseract::Tesseract::ApplyBoxes
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:117

tesseract::Tesseract::AssignDiacriticsToOverlappingBlobs
void AssignDiacriticsToOverlappingBlobs(const GenericVector< C_OUTLINE * > &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< bool > *overlapped_any_blob, GenericVector< C_BLOB * > *target_blobs)
Definition: control.cpp:976

tesseract::Tesseract::hocr_font_info
bool hocr_font_info
Definition: tesseractclass.h:935

tesseract::Tesseract::GetSubAndSuperscriptCandidates
void GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading, ScriptPos *leading_pos, float *leading_certainty, int *num_rebuilt_trailing, ScriptPos *trailing_pos, float *trailing_certainty, float *avg_certainty, float *unlikely_threshold)
Definition: superscript.cpp:253

tesseract::Tesseract::tessedit_word_for_word
bool tessedit_word_for_word
Definition: tesseractclass.h:1020

tesseract::Tesseract::rej_1Il_trust_permuter_type
bool rej_1Il_trust_permuter_type
Definition: tesseractclass.h:1033

tesseract::Tesseract::ParseLanguageString
void ParseLanguageString(const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load)
Definition: tessedit.cpp:249

tesseract::Tesseract::recog_all_words
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:287

tesseract::Tesseract::SegmentPage
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
Definition: pagesegmain.cpp:109

tesseract::Tesseract::superscript_debug
int superscript_debug
Definition: tesseractclass.h:979

tesseract::Tesseract::tessedit_pageseg_mode
int tessedit_pageseg_mode
Definition: tesseractclass.h:791

tesseract::WordData::WordData
WordData()
Definition: tesseractclass.h:145

tesseract::Tesseract::tessedit_rejection_debug
bool tessedit_rejection_debug
Definition: tesseractclass.h:1025

tesseract::Tesseract::safe_dict_word
inT16 safe_dict_word(const WERD_RES *werd_res)
Definition: reject.cpp:607

tesseract::Tesseract::bidi_debug
int bidi_debug
Definition: tesseractclass.h:816

tesseract::Tesseract::CountMisfitTops
int CountMisfitTops(WERD_RES *word_res)
Definition: fixxht.cpp:69

tesseract::Tesseract::eval_word_spacing
inT16 eval_word_spacing(WERD_RES_LIST &word_res_list)
Definition: fixspace.cpp:240

tesseract::Tesseract::tessedit_create_hocr
bool tessedit_create_hocr
Definition: tesseractclass.h:1005

tesseract::Tesseract::SetScaledColor
void SetScaledColor(int factor, Pix *color)
Definition: tesseractclass.h:237

STRING_VAR_H
#define STRING_VAR_H(name, val, comment)
Definition: params.h:271

tesseract-c_api-demo.filename
string filename
Definition: tesseract-c_api-demo.py:29

tesseract::Tesseract::split_and_recog_word
void split_and_recog_word(WERD_RES *word)
Definition: tfacepp.cpp:144

tesseract::Tesseract::write_results
void write_results(PAGE_RES_IT &page_res_it, char newline_type, BOOL8 force_eol)
Definition: output.cpp:132

tesseract::WordData
Definition: tesseractclass.h:144

ROW
Definition: ocrrow.h:32

tesseract::ShiroRekhaSplitter::NO_SPLIT
Definition: devanagari_processing.h:68

tesseract::Tesseract::script_pos_pass
void script_pos_pass(PAGE_RES *page_res)
Definition: control.cpp:710

tesseract::TesseractStats::good_char_count
inT16 good_char_count
Definition: tesseractclass.h:131

tesseract::Tesseract::suspect_accept_rating
double suspect_accept_rating
Definition: tesseractclass.h:1016

tesseract::Tesseract::load_fixed_length_dawgs
bool load_fixed_length_dawgs
Definition: tesseractclass.h:1099

tesseract::Tesseract::debug_noise_removal
int debug_noise_removal
Definition: tesseractclass.h:852

tesseract::Tesseract::count_alphanums
inT16 count_alphanums(const WERD_CHOICE &word)
Definition: output.cpp:410

tesseract::Tesseract::scaled_color
Pix * scaled_color() const
Definition: tesseractclass.h:231

tesseract::Tesseract::tessedit_reject_bad_qual_wds
bool tessedit_reject_bad_qual_wds
Definition: tesseractclass.h:925

tesseract::Tesseract::tessedit_enable_dict_correction
bool tessedit_enable_dict_correction
Definition: tesseractclass.h:846

tesseract::Tesseract::unrej_good_quality_words
void unrej_good_quality_words(PAGE_RES_IT &page_res_it)
Definition: docqual.cpp:163

tesseract::Tesseract::ngram_permuter_activated
bool ngram_permuter_activated
Definition: tesseractclass.h:1122

tesseract::Tesseract::tessedit_good_quality_unrej
bool tessedit_good_quality_unrej
Definition: tesseractclass.h:899

tesseract::Tesseract::tessedit_whole_wd_rej_row_percent
double tessedit_whole_wd_rej_row_percent
Definition: tesseractclass.h:909

tesseract::Tesseract::word_deletable
CRUNCH_MODE word_deletable(WERD_RES *word, inT16 &delete_mode)
Definition: docqual.cpp:898

tesseract::Tesseract::process_selected_words
void process_selected_words(PAGE_RES *page_res, TBOX &selection_box, BOOL8(tesseract::Tesseract::*word_processor)(PAGE_RES_IT *pr_it))
Definition: pagewalk.cpp:30

tesseract::Tesseract::ImageWidth
int ImageWidth() const
Definition: tesseractclass.h:225

tesseract::Tesseract::quality_char_pc
double quality_char_pc
Definition: tesseractclass.h:876

tesseract::Tesseract::enable_new_segsearch
bool enable_new_segsearch
Definition: tesseractclass.h:1143

tesseract::Tesseract::crunch_small_outlines_size
double crunch_small_outlines_size
Definition: tesseractclass.h:954

tesseract::Tesseract::tessedit_reject_block_percent
double tessedit_reject_block_percent
Definition: tesseractclass.h:904

tesseract::WordData::block
BLOCK * block
Definition: tesseractclass.h:154

tesseract::Tesseract::segment_reward_script
double segment_reward_script
Definition: tesseractclass.h:1111

tesseract::Tesseract::textord_tabfind_vertical_text_ratio
double textord_tabfind_vertical_text_ratio
Definition: tesseractclass.h:1075

tesseract::Tesseract::FindSegmentation
bool FindSegmentation(const GenericVector< UNICHAR_ID > &target_text, WERD_RES *word_res)
Definition: applybox.cpp:559

tesseract::Tesseract::applybox_debug
int applybox_debug
Definition: tesseractclass.h:817

tesseract::Tesseract::noise_cert_punc
double noise_cert_punc
Definition: tesseractclass.h:861

tesseract::Tesseract::tessedit_char_unblacklist
char * tessedit_char_unblacklist
Definition: tesseractclass.h:801

tesseract::Tesseract::alpha_count
inT16 alpha_count(const char *word, const char *word_lengths)
Definition: reject.cpp:495

tesseract::Tesseract::quality_based_rejection
void quality_based_rejection(PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc)
Definition: docqual.cpp:140

tesseract::Tesseract::TidyUp
void TidyUp(PAGE_RES *page_res)
Definition: applybox.cpp:706

tesseract::Tesseract::tessedit_zero_rejection
bool tessedit_zero_rejection
Definition: tesseractclass.h:1018

tesseract::Tesseract::crunch_pot_indicators
int crunch_pot_indicators
Definition: tesseractclass.h:956

tesseract::Tesseract::fix_fuzzy_space_list
void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block)
Definition: fixspace.cpp:145

tesseract::TesseractStats::TesseractStats
TesseractStats()
Definition: tesseractclass.h:113

SVMenuNode
Definition: svmnode.h:35

tesseract::Tesseract::tessedit_dont_blkrej_good_wds
bool tessedit_dont_blkrej_good_wds
Definition: tesseractclass.h:915

tesseract::Tesseract::crunch_early_convert_bad_unlv_chs
bool crunch_early_convert_bad_unlv_chs
Definition: tesseractclass.h:937

tesseract::Tesseract::paragraph_debug_level
int paragraph_debug_level
Definition: tesseractclass.h:889

tesseract::Tesseract::scaled_factor
int scaled_factor() const
Definition: tesseractclass.h:234

tesseract::TesseractStats
Definition: tesseractclass.h:112

tesseract::WordData::row
ROW * row
Definition: tesseractclass.h:153

tesseract::Tesseract::pageseg_devanagari_split_strategy
int pageseg_devanagari_split_strategy
Definition: tesseractclass.h:807

tesseract::Tesseract::GetCubeRecoContext
CubeRecoContext * GetCubeRecoContext()
Definition: tesseractclass.h:1160

tesseract::Tesseract::first_alphanum_index
inT16 first_alphanum_index(const char *word, const char *word_lengths)
Definition: reject.cpp:469

tesseract::Tesseract::noise_cert_factor
double noise_cert_factor
Definition: tesseractclass.h:864

tesseract::Tesseract::acceptable_number_string
BOOL8 acceptable_number_string(const char *s, const char *lengths)
Definition: output.cpp:421

tesseract::Tesseract::reject_edge_blobs
void reject_edge_blobs(WERD_RES *word)
Definition: reject.cpp:263

wordrec.h

tesseract::Tesseract::textord_tabfind_vertical_horizontal_mix
bool textord_tabfind_vertical_horizontal_mix
Definition: tesseractclass.h:1096

tesseract::Tesseract::failure_count
inT16 failure_count(WERD_RES *word)
Definition: docqual.cpp:969

tesseract::Tesseract::count_outline_errs
inT16 count_outline_errs(char c, inT16 outline_count)
Definition: docqual.cpp:128

tesseract::Tesseract::word_set_display
BOOL8 word_set_display(PAGE_RES_IT *pr_it)
Definition: pgedit.cpp:946

tesseract::TesseractStats::word_count
inT32 word_count
Definition: tesseractclass.h:133

tesseract::Tesseract::check_debug_pt
BOOL8 check_debug_pt(WERD_RES *word, int location)
Definition: control.cpp:1767

tesseract::Tesseract::tessedit_fix_fuzzy_spaces
bool tessedit_fix_fuzzy_spaces
Definition: tesseractclass.h:834

tesseract::Tesseract::word_char_quality
void word_char_quality(WERD_RES *word, ROW *row, inT16 *match_count, inT16 *accepted_match_count)
Definition: docqual.cpp:97

tesseract::OEM_TESSERACT_ONLY
Definition: publictypes.h:257

tesseract::Tesseract::ProcessTargetWord
bool ProcessTargetWord(const TBOX &word_box, const TBOX &target_word_box, const char *word_config, int pass)
Definition: control.cpp:118

CRUNCH_MODE
CRUNCH_MODE
Definition: pageres.h:145

tesseract::Tesseract::PrepareForTessOCR
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
Definition: tesseractclass.cpp:726

tesseract::Tesseract::tessedit_test_adaption_mode
int tessedit_test_adaption_mode
Definition: tesseractclass.h:885

tesseract::Tesseract::rej_use_sensible_wd
bool rej_use_sensible_wd
Definition: tesseractclass.h:1037

tesseract::Tesseract::process_image_event
void process_image_event(const SVEvent &event)
Definition: pgedit.cpp:565

tesseract::Tesseract::ReportFailedBox
void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg)
Definition: applybox.cpp:764

tesseract::Tesseract::tessedit_enable_bigram_correction
bool tessedit_enable_bigram_correction
Definition: tesseractclass.h:844

tesseract::Tesseract::num_sub_langs
int num_sub_langs() const
Definition: tesseractclass.h:251

tesseract::Tesseract::tessedit_resegment_from_line_boxes
bool tessedit_resegment_from_line_boxes
Definition: tesseractclass.h:781

tesseract::Tesseract::interactive_display_mode
bool interactive_display_mode
Definition: tesseractclass.h:1049

tesseract::Tesseract::crunch_pot_garbage
bool crunch_pot_garbage
Definition: tesseractclass.h:945

tesseract::Tesseract::segment_segcost_rating
bool segment_segcost_rating
Definition: tesseractclass.h:1107

tesseract::Tesseract::tessedit_tess_adaption_mode
int tessedit_tess_adaption_mode
Definition: tesseractclass.h:879

PAGE_RES
Definition: pageres.h:58

tesseract::Tesseract::join_words
void join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const
Definition: tfacepp.cpp:240

tesseract::Tesseract::permute_chartype_word
bool permute_chartype_word
Definition: tesseractclass.h:1115

BLOCK
Definition: ocrblock.h:30

tesseract::Tesseract::conflict_set_I_l_1
char * conflict_set_I_l_1
Definition: tesseractclass.h:1043

tesseract::Tesseract::crunch_rating_max
int crunch_rating_max
Definition: tesseractclass.h:955

tesseract::Tesseract::extract_cube_state
bool extract_cube_state(CubeObject *cube_obj, int *num_chars, Boxa **char_boxes, CharSamp ***char_samples)
Definition: cube_control.cpp:65

tesseract::Tesseract::nn_match_word
void nn_match_word(WERD_RES *word, ROW *row)

tesseract::Tesseract::word_blank_and_set_display
BOOL8 word_blank_and_set_display(PAGE_RES_IT *pr_its)
Definition: pgedit.cpp:717

ACCEPTABLE_WERD_TYPE
ACCEPTABLE_WERD_TYPE
Definition: control.h:34

tesseract::Tesseract::permute_only_top
bool permute_only_top
Definition: tesseractclass.h:1123

tesseract::CubeObject
Definition: cube_object.h:90

tesseract::Tesseract::read_config_file
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:52

tesseract::Tesseract::textord
const Textord & textord() const
Definition: tesseractclass.h:241

tesseract::Tesseract::tessedit_ocr_engine_mode
int tessedit_ocr_engine_mode
Definition: tesseractclass.h:795

tesseract::Tesseract::RunOldFixXht
bool RunOldFixXht(WERD_RES *word, BLOCK *block, ROW *row)

tesseract::Tesseract::poly_allow_detailed_fx
bool poly_allow_detailed_fx
Definition: tesseractclass.h:1065

tesseract::Tesseract::unlv_tilde_crunching
bool unlv_tilde_crunching
Definition: tesseractclass.h:933

textord.h

tesseract::Tesseract::applybox_exposure_pattern
char * applybox_exposure_pattern
Definition: tesseractclass.h:822

tesseract::OEM_CUBE_ONLY
Definition: publictypes.h:258

tesseract::Tesseract::crunch_debug
int crunch_debug
Definition: tesseractclass.h:967

tesseract::Tesseract::heuristic_weight_width
double heuristic_weight_width
Definition: tesseractclass.h:1137

tesseract::Tesseract::permute_script_word
bool permute_script_word
Definition: tesseractclass.h:1105

tesseract::Tesseract::tessedit_flip_0O
bool tessedit_flip_0O
Definition: tesseractclass.h:1026

tesseract::Tesseract::process_cmd_win_event
BOOL8 process_cmd_win_event(inT32 cmd_event, char *new_value)
Definition: pgedit.cpp:397

tesseract::Tesseract::init_cube_objects
bool init_cube_objects(bool load_combiner, TessdataManager *tessdata_manager)
Definition: cube_control.cpp:154

tesseract::Tesseract::classify_word_pass1
void classify_word_pass1(const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)
Definition: control.cpp:1344

tesseract::Tesseract::word_contains_non_1_digit
BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths)
Definition: reject.cpp:509

tesseract::Tesseract::RecogAllWordsPassN
bool RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT *pr_it, GenericVector< WordData > *words)
Definition: control.cpp:207

tesseract::Tesseract::dictionary_correction_pass
void dictionary_correction_pass(PAGE_RES *page_res)
Definition: control.cpp:2015

tesseract::Tesseract::debug_acceptable_wds
bool debug_acceptable_wds
Definition: tesseractclass.h:868

tesseract::Tesseract::file_type
char * file_type
Definition: tesseractclass.h:1050

tesseract::Tesseract::crunch_poor_garbage_cert
double crunch_poor_garbage_cert
Definition: tesseractclass.h:941

tesseract::Tesseract::tessedit_lower_flip_hyphen
double tessedit_lower_flip_hyphen
Definition: tesseractclass.h:1028

tesseract::Tesseract::pgeditor_main
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:337

tesseract::Tesseract::crunch_del_min_ht
double crunch_del_min_ht
Definition: tesseractclass.h:948

tesseract::Tesseract::pix_grey
Pix * pix_grey() const
Definition: tesseractclass.h:198

tesseract::Tesseract::tessedit_unrej_any_wd
bool tessedit_unrej_any_wd
Definition: tesseractclass.h:836

tesseract::Tesseract::fixsp_non_noise_limit
int fixsp_non_noise_limit
Definition: tesseractclass.h:969

tesseract::Tesseract::suspect_space_level
int suspect_space_level
Definition: tesseractclass.h:1011

tesseract::Tesseract::font_recognition_pass
void font_recognition_pass(PAGE_RES *page_res)
Definition: control.cpp:1958

tesseract::Tesseract::tessedit_resegment_from_boxes
bool tessedit_resegment_from_boxes
Definition: tesseractclass.h:779

tesseract::Tesseract::tessedit_enable_doc_dict
bool tessedit_enable_doc_dict
Definition: tesseractclass.h:840

tesseract::Tesseract::crunch_pot_poor_rate
double crunch_pot_poor_rate
Definition: tesseractclass.h:943

tesseract::Tesseract::SearchForText
void SearchForText(const GenericVector< BLOB_CHOICE_LIST * > *choices, int choices_pos, int choices_length, const GenericVector< UNICHAR_ID > &target_text, int text_index, float rating, GenericVector< int > *segmentation, float *best_rating, GenericVector< int > *best_segmentation)
Definition: applybox.cpp:629

tesseract::Tesseract::get_rep_char
UNICHAR_ID get_rep_char(WERD_RES *word)
Definition: output.cpp:285

tesseract::TesseractStats::write_results_empty_block
bool write_results_empty_block
Definition: tesseractclass.h:140

double_VAR_H
#define double_VAR_H(name, val, comment)
Definition: params.h:274

tesseract::Tesseract::garbage_word
GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word)
Definition: docqual.cpp:683

tesseract::Tesseract::acceptable_word_string
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
Definition: control.cpp:1663

tesseract::Tesseract::tessedit_matcher_log
bool tessedit_matcher_log
Definition: tesseractclass.h:883

tesseract::Tesseract::word_bln_display
BOOL8 word_bln_display(PAGE_RES_IT *pr_it)
Definition: pgedit.cpp:729

tesseract::Tesseract::tessedit_ambigs_training
bool tessedit_ambigs_training
Definition: tesseractclass.h:803

UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:33

tesseract::Tesseract::cube_debug_level
int cube_debug_level
Definition: tesseractclass.h:893

tesseract::Tesseract::fix_noisy_space_list
void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block)
Definition: fixspace.cpp:570

tesseract::Tesseract::bigram_correction_pass
void bigram_correction_pass(PAGE_RES *page_res)
Definition: control.cpp:442

tesseract::Tesseract::tessedit_debug_fonts
bool tessedit_debug_fonts
Definition: tesseractclass.h:841

params.h

tesseract::Tesseract::init_tesseract_internal
int init_tesseract_internal(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
Definition: tessedit.cpp:389

WERD
Definition: werd.h:60

tesseract::Tesseract::init_tesseract_lang_data
bool init_tesseract_lang_data(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
Definition: tessedit.cpp:83

tesseract::Tesseract::SetupWordPassN
void SetupWordPassN(int pass_n, WordData *word)
Definition: control.cpp:171

tesseract::Tesseract::crunch_leave_uc_strings
int crunch_leave_uc_strings
Definition: tesseractclass.h:965

tesseract::Tesseract::debug_x_ht_level
int debug_x_ht_level
Definition: tesseractclass.h:867

tesseract::Tesseract::BestPix
Pix * BestPix() const
Definition: tesseractclass.h:212

tesseract::Tesseract::tess_segment_pass_n
void tess_segment_pass_n(int pass_n, WERD_RES *word)
Definition: tessbox.cpp:39

tesseract::Tesseract::tessdata_manager_debug_level
int tessdata_manager_debug_level
Definition: tesseractclass.h:1053

tesseract::Tesseract::fixsp_small_outlines_size
double fixsp_small_outlines_size
Definition: tesseractclass.h:970

tesseract::Tesseract::tessedit_reject_row_percent
double tessedit_reject_row_percent
Definition: tesseractclass.h:906

tesseract::Tesseract::cube_word_pass1
void cube_word_pass1(BLOCK *block, ROW *row, WERD_RES *word)
Definition: cube_control.cpp:235

tesseract::Tesseract::word_blob_quality
inT16 word_blob_quality(WERD_RES *word, ROW *row)
Definition: docqual.cpp:65

tesseract::PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:160

tesseract::Tesseract::doc_and_block_rejection
void doc_and_block_rejection(PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc)
Definition: docqual.cpp:235

tesseract::WordData::WordData
WordData(BLOCK *block_in, ROW *row_in, WERD_RES *word_res)
Definition: tesseractclass.h:149

tesseract::Tesseract::enable_noise_removal
bool enable_noise_removal
Definition: tesseractclass.h:851

tesseract::Tesseract::tessedit_debug_doc_rejection
bool tessedit_debug_doc_rejection
Definition: tesseractclass.h:926

tesseract::TesseractCubeCombiner
Definition: tesseract_cube_combiner.h:47

tesseract::Tesseract::init_tesseract
int init_tesseract(const char *datapath, const char *language, OcrEngineMode oem)
Definition: tesseractclass.h:487

tesseract::Tesseract::heuristic_segcost_rating_base
double heuristic_segcost_rating_base
Definition: tesseractclass.h:1132

tesseract::Tesseract::tessedit_load_sublangs
char * tessedit_load_sublangs
Definition: tesseractclass.h:1055

tesseract::Tesseract::set_unlv_suspects
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:307

tesseract::Tesseract::terrible_word_crunch
BOOL8 terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level)
Definition: docqual.cpp:507

tesseract::Tesseract::tessedit_override_permuter
bool tessedit_override_permuter
Definition: tesseractclass.h:1051

tesseract::Tesseract::crunch_early_merge_tess_fails
bool crunch_early_merge_tess_fails
Definition: tesseractclass.h:936

tesseract::Tesseract::Clear
void Clear()
Definition: tesseractclass.cpp:640

tesseract::Tesseract::match_current_words
void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK *block)
Definition: fixspace.cpp:196

tesseract::Tesseract::SetupAllWordsPassN
void SetupAllWordsPassN(int pass_n, const TBOX *target_word_box, const char *word_config, PAGE_RES *page_res, GenericVector< WordData > *words)
Definition: control.cpp:148

tesseract::Tesseract::min_sane_x_ht_pixels
int min_sane_x_ht_pixels
Definition: tesseractclass.h:1044

tesseract::Tesseract::use_new_state_cost
bool use_new_state_cost
Definition: tesseractclass.h:1128

tesseract::Tesseract::rej_whole_of_mostly_reject_word_fract
double rej_whole_of_mostly_reject_word_fract
Definition: tesseractclass.h:1039

tesseract::Tesseract::tessedit_use_primary_params_model
bool tessedit_use_primary_params_model
Definition: tesseractclass.h:1057

tesseract::Tesseract::AnyTessLang
bool AnyTessLang() const
Definition: tesseractclass.h:258

tesseract::CharSamp
Definition: char_samp.h:39

tesseract::Tesseract::crunch_del_rating
double crunch_del_rating
Definition: tesseractclass.h:946

tesseract::Tesseract::fix_rep_char
void fix_rep_char(PAGE_RES_IT *page_res_it)
Definition: control.cpp:1624

OSResults
Definition: osdetect.h:47

tesseract::Tesseract::blob_noise_score
float blob_noise_score(TBLOB *blob)
Definition: fixspace.cpp:761

tesseract::Tesseract::ResegmentWordBox
bool ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const TBOX &next_box, const char *correct_text)
Definition: applybox.cpp:438

tesseract::Tesseract::ocr_devanagari_split_strategy
int ocr_devanagari_split_strategy
Definition: tesseractclass.h:811

tesseract::Tesseract::AutoPageSeg
int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr)
Definition: pagesegmain.cpp:232

tesseract::Tesseract::tessedit_row_rej_good_docs
bool tessedit_row_rej_good_docs
Definition: tesseractclass.h:921

tesseract::Tesseract::classify_word_and_language
void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data)
Definition: control.cpp:1268

tesseract::Tesseract::recog_pseudo_word
void recog_pseudo_word(PAGE_RES *page_res, TBOX &selection_box)
Definition: control.cpp:68

docqual.h

tesseract::Tesseract::applybox_page
int applybox_page
Definition: tesseractclass.h:818

tesseract::Tesseract::ApplyBoxTraining
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
Definition: applybox.cpp:796

tesseract::Tesseract::blob_feature_display
void blob_feature_display(PAGE_RES *page_res, const TBOX &selection_box)
Definition: pgedit.cpp:960

tesseract::Tesseract::crunch_del_cert
double crunch_del_cert
Definition: tesseractclass.h:947

tesseract::Tesseract::tessedit_create_pdf
bool tessedit_create_pdf
Definition: tesseractclass.h:1006

tesseract::Tesseract::~Tesseract
~Tesseract()
Definition: tesseractclass.cpp:623

tesseract::Tesseract::min_orientation_margin
double min_orientation_margin
Definition: tesseractclass.h:1061

tesseract::Tesseract::tessedit_preserve_row_rej_perfect_wds
bool tessedit_preserve_row_rej_perfect_wds
Definition: tesseractclass.h:913

tesseract::Tesseract::tessedit_upper_flip_hyphen
double tessedit_upper_flip_hyphen
Definition: tesseractclass.h:1030

tesseract::Tesseract::fixspace_thinks_word_done
BOOL8 fixspace_thinks_word_done(WERD_RES *word)
Definition: fixspace.cpp:504

tesseract::Tesseract::fp_eval_word_spacing
inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list)
Definition: fixspace.cpp:831

tesseract::Tesseract::set_word_fonts
void set_word_fonts(WERD_RES *word)
Definition: control.cpp:1880

FALSE
#define FALSE
Definition: capi.h:29

tesseract
Definition: baseapi.cpp:83

tesseract::Tesseract::init_tesseract
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
Definition: tessedit.cpp:285

tesseract::Tesseract::tessedit_good_doc_still_rowrej_wd
double tessedit_good_doc_still_rowrej_wd
Definition: tesseractclass.h:923

tesseract::Tesseract::TrySuperscriptSplits
WERD_RES * TrySuperscriptSplits(int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, bool *is_good, int *retry_leading, int *retry_trailing)
Definition: superscript.cpp:382

tesseract::Tesseract::non_O_upper
BOOL8 non_O_upper(const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
Definition: reject.cpp:785

GARBAGE_LEVEL
GARBAGE_LEVEL
Definition: docqual.h:25

tesseract::Tesseract::include_page_breaks
bool include_page_breaks
Definition: tesseractclass.h:1083

tesseract::Tesseract::segment_debug
int segment_debug
Definition: tesseractclass.h:1100

tesseract::Tesseract::rej_1Il_use_dict_word
bool rej_1Il_use_dict_word
Definition: tesseractclass.h:1032

tesseract::Wordrec
Definition: wordrec.h:123

tesseract::Tesseract::TestNewNormalization
bool TestNewNormalization(int original_misfits, float baseline_shift, float new_x_ht, WERD_RES *word, BLOCK *block, ROW *row)
Definition: control.cpp:1437

tesseract::Tesseract::unrej_good_chs
void unrej_good_chs(WERD_RES *word, ROW *row)
Definition: docqual.cpp:117

tesseract::Tesseract::SetEquationDetect
void SetEquationDetect(EquationDetect *detector)
Definition: tesseractclass.cpp:654

tesseract::Tesseract::heuristic_max_char_wh_ratio
double heuristic_max_char_wh_ratio
Definition: tesseractclass.h:1141

tesseract::Tesseract::nn_recover_rejects
void nn_recover_rejects(WERD_RES *word, ROW *row)

tesseract::Tesseract::tessedit_init_config_only
bool tessedit_init_config_only
Definition: tesseractclass.h:1068

tesseract::TesseractStats::tilde_crunch_written
bool tilde_crunch_written
Definition: tesseractclass.h:137

tesseract::Tesseract::textord_tabfind_show_vlines
bool textord_tabfind_show_vlines
Definition: tesseractclass.h:1062

tesseract::Tesseract::flip_hyphens
void flip_hyphens(WERD_RES *word)
Definition: reject.cpp:616

tesseract::Tesseract::bland_unrej
bool bland_unrej
Definition: tesseractclass.h:929

tesseract::Tesseract::noise_outlines
BOOL8 noise_outlines(TWERD *word)
Definition: docqual.cpp:981

tesseract::Tesseract::suspect_rating_per_ch
double suspect_rating_per_ch
Definition: tesseractclass.h:1015

tesseract::Tesseract::permute_fixed_length_dawg
bool permute_fixed_length_dawg
Definition: tesseractclass.h:1113

tesseract::Tesseract::tessedit_dont_rowrej_good_wds
bool tessedit_dont_rowrej_good_wds
Definition: tesseractclass.h:917

TBOX
Definition: rect.h:30

tesseract::Tesseract::rej_use_tess_blanks
bool rej_use_tess_blanks
Definition: tesseractclass.h:1035

tesseract::Tesseract::suspect_constrain_1Il
bool suspect_constrain_1Il
Definition: tesseractclass.h:1014

tesseract::Tesseract::do_re_display
void do_re_display(BOOL8(tesseract::Tesseract::*word_painter)(PAGE_RES_IT *pr_it))
Definition: pgedit.cpp:308

tesseract::Tesseract::recog_word_recursive
void recog_word_recursive(WERD_RES *word)
Definition: tfacepp.cpp:110

tesseract::Tesseract::crunch_pot_poor_cert
double crunch_pot_poor_cert
Definition: tesseractclass.h:944

tesseract::Tesseract::rej_use_tess_accepted
bool rej_use_tess_accepted
Definition: tesseractclass.h:1034

tesseract::WordRecognizer
void(Tesseract::* WordRecognizer)(const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)
Definition: tesseractclass.h:166

tesseract::Tesseract::tess_acceptable_word
bool tess_acceptable_word(WERD_RES *word)
Definition: tessbox.cpp:69

UNICHARSET
Definition: unicharset.h:139

tesseract::Tesseract::MaximallyChopWord
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
Definition: applybox.cpp:253

tesseract::Tesseract::SetupUniversalFontIds
void SetupUniversalFontIds()
Definition: tessedit.cpp:439

tesseract::Tesseract::cube_recognize
bool cube_recognize(CubeObject *cube_obj, BLOCK *block, WERD_RES *word)
Definition: cube_control.cpp:326

tesseract::Tesseract::tessedit_reject_doc_percent
double tessedit_reject_doc_percent
Definition: tesseractclass.h:902

tesseract::Tesseract::SetupWordScripts
void SetupWordScripts(BLOCK_LIST *blocks)

tesseract::Tesseract::tessedit_adaption_debug
bool tessedit_adaption_debug
Definition: tesseractclass.h:815

tesseract::TesseractStats::dump_words_str
STRING dump_words_str
Definition: tesseractclass.h:135

tesseract::Tesseract::tessedit_redo_xheight
bool tessedit_redo_xheight
Definition: tesseractclass.h:838

tesseract::Tesseract::tessedit_write_rep_codes
bool tessedit_write_rep_codes
Definition: tesseractclass.h:1002

tesseract::Tesseract::reskew
const FCOORD & reskew() const
Definition: tesseractclass.h:187

tesseract::Tesseract::crunch_del_low_word
double crunch_del_low_word
Definition: tesseractclass.h:953

tesseract::Tesseract::PrerecAllWordsPar
void PrerecAllWordsPar(const GenericVector< WordData > &words)
Definition: par_control.cpp:36

BlamerBundle
Definition: blamer.h:88

tesseract::Tesseract::cube_combine_word
void cube_combine_word(CubeObject *cube_obj, WERD_RES *cube_word, WERD_RES *tess_word)
Definition: cube_control.cpp:283

STRING
Definition: strngs.h:44

tesseract::Tesseract::suspect_level
int suspect_level
Definition: tesseractclass.h:1009

tesseract::Tesseract::tessedit_preserve_min_wd_len
int tessedit_preserve_min_wd_len
Definition: tesseractclass.h:919

tesseract::Tesseract::tessedit_page_number
int tessedit_page_number
Definition: tesseractclass.h:1047

C_BLOB
Definition: stepblob.h:30

tesseract::Tesseract::crunch_del_high_word
double crunch_del_high_word
Definition: tesseractclass.h:952

tesseract::Tesseract::textord_tabfind_vertical_text
bool textord_tabfind_vertical_text
Definition: tesseractclass.h:1070

NULL
#define NULL
Definition: host.h:144

tesseract::Tesseract::crunch_del_min_width
double crunch_del_min_width
Definition: tesseractclass.h:950

tesseract::Tesseract::end_tesseract
void end_tesseract()
Definition: tessedit.cpp:471

tesseract::ShiroRekhaSplitter
Definition: devanagari_processing.h:65

tesseract::Tesseract::tessedit_char_whitelist
char * tessedit_char_whitelist
Definition: tesseractclass.h:799

tesseract::Tesseract::SetupPageSegAndDetectOrientation
ColumnFinder * SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix)
Definition: pagesegmain.cpp:309

tesseract::Tesseract::crunch_accept_ok
bool crunch_accept_ok
Definition: tesseractclass.h:958

tesseract::BoxWord
Definition: boxword.h:39

tesseract::Tesseract::fix_sp_fp_word
void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
Definition: fixspace.cpp:536

tesseract::Tesseract::superscript_worse_certainty
double superscript_worse_certainty
Definition: tesseractclass.h:982

tesseract::Tesseract::pix_binary
Pix * pix_binary() const
Definition: tesseractclass.h:195

tesseract::Tesseract::crunch_leave_lc_strings
int crunch_leave_lc_strings
Definition: tesseractclass.h:963

tesseract::Tesseract::preserve_interword_spaces
bool preserve_interword_spaces
Definition: tesseractclass.h:1080

tesseract::Tesseract::tessedit_dump_choices
bool tessedit_dump_choices
Definition: tesseractclass.h:831

TWERD
Definition: blobs.h:395

tesseract::Tesseract::chs_trailing_punct1
char * chs_trailing_punct1
Definition: tesseractclass.h:870

tesseract::Tesseract::tessedit_write_block_separators
bool tessedit_write_block_separators
Definition: tesseractclass.h:1000

tesseract::OcrEngineMode
OcrEngineMode
Definition: publictypes.h:256

tesseract::Tesseract::SetupApplyBoxes
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:217

tesseract::Tesseract::test_pt
bool test_pt
Definition: tesseractclass.h:886

tesseract::EquationDetect
Definition: equationdetect.h:42

tesseract::Tesseract::crunch_poor_garbage_rate
double crunch_poor_garbage_rate
Definition: tesseractclass.h:942

tesseract::TesseractStats::last_char_was_newline
bool last_char_was_newline
Definition: tesseractclass.h:138

tesseract::Tesseract::reject_I_1_L
void reject_I_1_L(WERD_RES *word)
Definition: reject.cpp:191

GenericVector
Definition: baseapi.h:41

tesseract::Tesseract::quality_outline_pc
double quality_outline_pc
Definition: tesseractclass.h:875

tesseract::Tesseract::set_done
void set_done(WERD_RES *word, inT16 pass)

tesseract::Tesseract::tessedit_minimal_rejection
bool tessedit_minimal_rejection
Definition: tesseractclass.h:1017

tesseract::Tesseract::tessedit_bigram_debug
int tessedit_bigram_debug
Definition: tesseractclass.h:848

tesseract::Tesseract::PreenXHeights
void PreenXHeights(BLOCK_LIST *block_list)
Definition: applybox.cpp:193

tesseract::Tesseract::ConvertStringToUnichars
bool ConvertStringToUnichars(const char *utf8, GenericVector< UNICHAR_ID > *class_ids)
Definition: applybox.cpp:535

tesseract::Tesseract::repeated_nonalphanum_wd
BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row)
Definition: reject.cpp:582

tesseract::Tesseract::build_menu_new
SVMenuNode * build_menu_new()
Definition: pgedit.cpp:257

tesseract::Tesseract::crunch_leave_accept_strings
bool crunch_leave_accept_strings
Definition: tesseractclass.h:960

tesseract::Tesseract::tilde_delete
void tilde_delete(PAGE_RES_IT &page_res_it)
Definition: docqual.cpp:593

tesseract::Tesseract::textord_tabfind_force_vertical_text
bool textord_tabfind_force_vertical_text
Definition: tesseractclass.h:1072

tesseract::Tesseract::segsearch_max_fixed_pitch_char_wh_ratio
double segsearch_max_fixed_pitch_char_wh_ratio
Definition: tesseractclass.h:1146

tesseract::Tesseract::BelievableSuperscript
bool BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold, int *left_ok, int *right_ok) const
Definition: superscript.cpp:520

tesseract::Tesseract::cube_recognize_word
CubeObject * cube_recognize_word(BLOCK *block, WERD_RES *word)
Definition: cube_control.cpp:246

tesseract::Tesseract::crunch_terrible_garbage
bool crunch_terrible_garbage
Definition: tesseractclass.h:939

tesseract::Tesseract::tessedit_debug_quality_metrics
bool tessedit_debug_quality_metrics
Definition: tesseractclass.h:928

tesseract::Tesseract::applybox_learn_chars_and_char_frags_mode
bool applybox_learn_chars_and_char_frags_mode
Definition: tesseractclass.h:826

tesseract::Tesseract::noise_maxperblob
int noise_maxperblob
Definition: tesseractclass.h:865

tesseract::Tesseract::tessedit_make_boxes_from_boxes
bool tessedit_make_boxes_from_boxes
Definition: tesseractclass.h:785

BOOL_VAR_H
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:268

tesseract::Tesseract::unrecognised_char
char * unrecognised_char
Definition: tesseractclass.h:1008

tesseract::Tesseract::tessedit_prefer_joined_punct
bool tessedit_prefer_joined_punct
Definition: tesseractclass.h:971

tesseract::Tesseract::chs_trailing_punct2
char * chs_trailing_punct2
Definition: tesseractclass.h:871

tesseract::Tesseract::subscript_max_y_top
double subscript_max_y_top
Definition: tesseractclass.h:994

FCOORD
Definition: points.h:189

tesseract::Tesseract::rej_trust_doc_dawg
bool rej_trust_doc_dawg
Definition: tesseractclass.h:1031

tesseract::Tesseract::heuristic_weight_seamcut
double heuristic_weight_seamcut
Definition: tesseractclass.h:1139

tesseract::Tesseract::ambigs_classify_and_output
void ambigs_classify_and_output(const char *label, PAGE_RES_IT *pr_it, FILE *output_file)
Definition: recogtraining.cpp:203

tesseract::Tesseract::tessedit_preserve_blk_rej_perfect_wds
bool tessedit_preserve_blk_rej_perfect_wds
Definition: tesseractclass.h:911

tesseract::Tesseract::textord_use_cjk_fp_model
bool textord_use_cjk_fp_model
Definition: tesseractclass.h:1063

tesseract::Tesseract::fix_fuzzy_spaces
void fix_fuzzy_spaces(ETEXT_DESC *monitor, inT32 word_count, PAGE_RES *page_res)
Definition: fixspace.cpp:48

tesseract::Tesseract::textord_tabfind_aligned_gap_fraction
double textord_tabfind_aligned_gap_fraction
Definition: tesseractclass.h:1077

tesseract::Tesseract::tessedit_parallelize
int tessedit_parallelize
Definition: tesseractclass.h:1078

tesseract::Tesseract::digit_or_numeric_punct
BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position)
Definition: fixspace.cpp:344

tesseract::Tesseract::init_tesseract_lm
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language)
Definition: tessedit.cpp:460

tesseract::Tesseract::flip_0O
void flip_0O(WERD_RES *word)
Definition: reject.cpp:673

tesseract::Tesseract::dump_words
void dump_words(WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved)
Definition: fixspace.cpp:450

tesseract::Tesseract::superscript_min_y_bottom
double superscript_min_y_bottom
Definition: tesseractclass.h:998

tesseract::Tesseract::ReSegmentByClassification
void ReSegmentByClassification(PAGE_RES *page_res)
Definition: applybox.cpp:509

genericvector.h

tesseract::Tesseract::tessedit_image_border
int tessedit_image_border
Definition: tesseractclass.h:1040

uinT16
unsigned short uinT16
Definition: host.h:101

tesseract::Tesseract::heuristic_weight_rating
double heuristic_weight_rating
Definition: tesseractclass.h:1134

tesseract::Tesseract::superscript_bettered_certainty
double superscript_bettered_certainty
Definition: tesseractclass.h:986

tesseract::Tesseract::non_0_digit
BOOL8 non_0_digit(const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
Definition: reject.cpp:789

inT16
short inT16
Definition: host.h:100

tesseract::Tesseract::language_model_fixed_length_choices_depth
int language_model_fixed_length_choices_depth
Definition: tesseractclass.h:1126

inT32
int inT32
Definition: host.h:102

tesseract::Tesseract::tess_add_doc_word
void tess_add_doc_word(WERD_CHOICE *word_choice)
Definition: tessbox.cpp:79