tessapi/4.0.0/a00191_source.html

 // File:        tesseractclass.h
 // Description: The Tesseract class. It holds/owns everything needed
 //              to run Tesseract on a single language, and also a set of
 //              sub-Tesseracts to run sub-languages. For thread safety, *every*
 //              global variable goes in here, directly, or indirectly.
 //              This makes it safe to run multiple Tesseracts in different
 //              threads in parallel, and keeps the different language
 //              instances separate.
 // Author:      Ray Smith
 // Created:     Fri Mar 07 08:17:01 PST 2008
 //
 // (C) Copyright 2008, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //

 #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H_
 #define TESSERACT_CCMAIN_TESSERACTCLASS_H_

 #include <cstdint>                  // for int16_t, int32_t, uint16_t
 #include <cstdio>                   // for FILE
 #include "allheaders.h"             // for pixDestroy, pixGetWidth, pixGetHe...
 #include "control.h"                // for ACCEPTABLE_WERD_TYPE
 #include "debugpixa.h"              // for DebugPixa
 #include "devanagari_processing.h"  // for ShiroRekhaSplitter
 #include "docqual.h"                // for GARBAGE_LEVEL
 #include "genericvector.h"          // for GenericVector, PointerVector
 #include "host.h"                   // for BOOL8
 #include "pageres.h"                // for WERD_RES (ptr only), PAGE_RES (pt...
 #include "params.h"                 // for BOOL_VAR_H, BoolParam, DoubleParam
 #include "points.h"                 // for FCOORD
 #include "publictypes.h"            // for OcrEngineMode, PageSegMode, OEM_L...
 #include "ratngs.h"                 // for ScriptPos, WERD_CHOICE (ptr only)
 #include "strngs.h"                 // for STRING
 #include "tessdatamanager.h"        // for TessdataManager
 #include "textord.h"                // for Textord
 #include "unichar.h"                // for UNICHAR_ID
 #include "wordrec.h"                // for Wordrec

 class BLOCK_LIST;
 class ETEXT_DESC;
 struct OSResults;
 class PAGE_RES;
 class PAGE_RES_IT;
 struct Pix;
 class ROW;
 class SVMenuNode;
 class TBOX;
 class TO_BLOCK_LIST;
 class WERD;
 class WERD_CHOICE;
 class WERD_RES;


 // Top-level class for all tesseract global instance data.
 // This class either holds or points to all data used by an instance
 // of Tesseract, including the memory allocator. When this is
 // complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT!
 //
 // NOTE to developers: Do not create cyclic dependencies through this class!
 // The directory dependency tree must remain a tree! The keep this clean,
 // lower-level code (eg in ccutil, the bottom level) must never need to
 // know about the content of a higher-level directory.
 // The following scheme will grant the easiest access to lower-level
 // global members without creating a cyclic dependency:
 //
 // Class Hierarchy (^ = inheritance):
 //
 //             CCUtil (ccutil/ccutil.h)
 //                         ^      Members include: UNICHARSET
 //            CUtil (cutil/cutil_class.h)
 //                         ^       Members include: TBLOB*, TEXTBLOCK*
 //           CCStruct (ccstruct/ccstruct.h)
 //                         ^       Members include: Image
 //           Classify (classify/classify.h)
 //                         ^       Members include: Dict
 //             WordRec (wordrec/wordrec.h)
 //                         ^       Members include: WERD*, DENORM*
 //        Tesseract (ccmain/tesseractclass.h)
 //                                 Members include: Pix*
 //
 // Other important classes:
 //
 //  TessBaseAPI (api/baseapi.h)
 //                                 Members include: BLOCK_LIST*, PAGE_RES*,
 //                                 Tesseract*, ImageThresholder*
 //  Dict (dict/dict.h)
 //                                 Members include: Image* (private)
 //
 // NOTE: that each level contains members that correspond to global
 // data that is defined (and used) at that level, not necessarily where
 // the type is defined so for instance:
 // BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs");
 // goes inside the Textord class, not the cc_util class.

 namespace tesseract {

 class ColumnFinder;
 class DocumentData;
 class EquationDetect;
 class ImageData;
 class LSTMRecognizer;
 class Tesseract;

 // A collection of various variables for statistics and debugging.
 struct TesseractStats {
   TesseractStats()
     : adaption_word_number(0),
       doc_blob_quality(0),
       doc_outline_errs(0),
       doc_char_quality(0),
       good_char_count(0),
       doc_good_char_quality(0),
       word_count(0),
       dict_words(0),
       tilde_crunch_written(false),
       last_char_was_newline(true),
       last_char_was_tilde(false),
       write_results_empty_block(true) {}

   int32_t adaption_word_number;
   int16_t doc_blob_quality;
   int16_t doc_outline_errs;
   int16_t doc_char_quality;
   int16_t good_char_count;
   int16_t doc_good_char_quality;
   int32_t word_count;  // count of word in the document
   int32_t dict_words;  // number of dicitionary words in the document
   STRING dump_words_str;  // accumulator used by dump_words()
   // Flags used by write_results()
   bool tilde_crunch_written;
   bool last_char_was_newline;
   bool last_char_was_tilde;
   bool write_results_empty_block;
 };

 // Struct to hold all the pointers to relevant data for processing a word.
 struct WordData {
   WordData() : word(nullptr), row(nullptr), block(nullptr), prev_word(nullptr) {}
   explicit WordData(const PAGE_RES_IT& page_res_it)
     : word(page_res_it.word()), row(page_res_it.row()->row),
       block(page_res_it.block()->block), prev_word(nullptr) {}
   WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res)
     : word(word_res), row(row_in), block(block_in), prev_word(nullptr) {}

   WERD_RES* word;
   ROW* row;
   BLOCK* block;
   WordData* prev_word;
   PointerVector<WERD_RES> lang_words;
 };

 // Definition of a Tesseract WordRecognizer. The WordData provides the context
 // of row/block, in_word holds an initialized, possibly pre-classified word,
 // that the recognizer may or may not consume (but if so it sets *in_word=nullptr)
 // and produces one or more output words in out_words, which may be the
 // consumed in_word, or may be generated independently.
 // This api allows both a conventional tesseract classifier to work, or a
 // line-level classifier that generates multiple words from a merged input.
 typedef void (Tesseract::*WordRecognizer)(const WordData& word_data,
                                           WERD_RES** in_word,
                                           PointerVector<WERD_RES>* out_words);

 class Tesseract : public Wordrec {
  public:
   Tesseract();
   ~Tesseract();

   // Return appropriate dictionary
   Dict& getDict() override;

   // Clear as much used memory as possible without resetting the adaptive
   // classifier or losing any other classifier data.
   void Clear();
   // Clear all memory of adaption for this and all subclassifiers.
   void ResetAdaptiveClassifier();
   // Clear the document dictionary for this and all subclassifiers.
   void ResetDocumentDictionary();

   // Set the equation detector.
   void SetEquationDetect(EquationDetect* detector);

   // Simple accessors.
   const FCOORD& reskew() const {
     return reskew_;
   }
   // Destroy any existing pix and return a pointer to the pointer.
   Pix** mutable_pix_binary() {
     pixDestroy(&pix_binary_);
     return &pix_binary_;
   }
   Pix* pix_binary() const {
     return pix_binary_;
   }
   Pix* pix_grey() const {
     return pix_grey_;
   }
   void set_pix_grey(Pix* grey_pix) {
     pixDestroy(&pix_grey_);
     pix_grey_ = grey_pix;
   }
   Pix* pix_original() const { return pix_original_; }
   // Takes ownership of the given original_pix.
   void set_pix_original(Pix* original_pix) {
     pixDestroy(&pix_original_);
     pix_original_ = original_pix;
     // Clone to sublangs as well.
     for (int i = 0; i < sub_langs_.size(); ++i)
       sub_langs_[i]->set_pix_original(original_pix ? pixClone(original_pix)
                                                    : nullptr);
   }
   // Returns a pointer to a Pix representing the best available resolution image
   // of the page, with best available bit depth as second priority. Result can
   // be of any bit depth, but never color-mapped, as that has always been
   // removed. Note that in grey and color, 0 is black and 255 is
   // white. If the input was binary, then black is 1 and white is 0.
   // To tell the difference pixGetDepth() will return 32, 8 or 1.
   // In any case, the return value is a borrowed Pix, and should not be
   // deleted or pixDestroyed.
   Pix* BestPix() const {
     if (pixGetWidth(pix_original_) == ImageWidth())
       return pix_original_;
     else if (pix_grey_ != nullptr)
       return pix_grey_;
     else
       return pix_binary_;
   }
   void set_pix_thresholds(Pix* thresholds) {
     pixDestroy(&pix_thresholds_);
     pix_thresholds_ = thresholds;
   }
   int source_resolution() const {
     return source_resolution_;
   }
   void set_source_resolution(int ppi) {
     source_resolution_ = ppi;
   }
   int ImageWidth() const {
     return pixGetWidth(pix_binary_);
   }
   int ImageHeight() const {
     return pixGetHeight(pix_binary_);
   }
   Pix* scaled_color() const {
     return scaled_color_;
   }
   int scaled_factor() const {
     return scaled_factor_;
   }
   void SetScaledColor(int factor, Pix* color) {
     scaled_factor_ = factor;
     scaled_color_ = color;
   }
   const Textord& textord() const {
     return textord_;
   }
   Textord* mutable_textord() {
     return &textord_;
   }

   bool right_to_left() const {
     return right_to_left_;
   }
   int num_sub_langs() const {
     return sub_langs_.size();
   }
   Tesseract* get_sub_lang(int index) const {
     return sub_langs_[index];
   }
   // Returns true if any language uses Tesseract (as opposed to LSTM).
   bool AnyTessLang() const {
     if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
     for (int i = 0; i < sub_langs_.size(); ++i) {
       if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
     }
     return false;
   }
   // Returns true if any language uses the LSTM.
   bool AnyLSTMLang() const {
     if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) return true;
     for (int i = 0; i < sub_langs_.size(); ++i) {
       if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY)
         return true;
     }
     return false;
   }

   void SetBlackAndWhitelist();

   // Perform steps to prepare underlying binary image/other data structures for
   // page segmentation. Uses the strategy specified in the global variable
   // pageseg_devanagari_split_strategy for perform splitting while preparing for
   // page segmentation.
   void PrepareForPageseg();

   // Perform steps to prepare underlying binary image/other data structures for
   // Tesseract OCR. The current segmentation is required by this method.
   // Uses the strategy specified in the global variable
   // ocr_devanagari_split_strategy for performing splitting while preparing for
   // Tesseract ocr.
   void PrepareForTessOCR(BLOCK_LIST* block_list,
                          Tesseract* osd_tess, OSResults* osr);

   int SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
                   Tesseract* osd_tess, OSResults* osr);
   void SetupWordScripts(BLOCK_LIST* blocks);
   int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
                   TO_BLOCK_LIST* to_blocks, BLOBNBOX_LIST* diacritic_blobs,
                   Tesseract* osd_tess, OSResults* osr);
   ColumnFinder* SetupPageSegAndDetectOrientation(
       PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
       OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
       Pix** music_mask_pix);
   // par_control.cpp
   void PrerecAllWordsPar(const GenericVector<WordData>& words);

   // Generates training data for training a line recognizer, eg LSTM.
   // Breaks the page into lines, according to the boxes, and writes them to a
   // serialized DocumentData based on output_basename.
   void TrainLineRecognizer(const STRING& input_imagename,
                            const STRING& output_basename,
                            BLOCK_LIST *block_list);
   // Generates training data for training a line recognizer, eg LSTM.
   // Breaks the boxes into lines, normalizes them, converts to ImageData and
   // appends them to the given training_data.
   void TrainFromBoxes(const GenericVector<TBOX>& boxes,
                       const GenericVector<STRING>& texts,
                       BLOCK_LIST *block_list,
                       DocumentData* training_data);

   // Returns an Imagedata containing the image of the given textline,
   // and ground truth boxes/truth text if available in the input.
   // The image is not normalized in any way.
   ImageData* GetLineData(const TBOX& line_box,
                          const GenericVector<TBOX>& boxes,
                          const GenericVector<STRING>& texts,
                          int start_box, int end_box,
                          const BLOCK& block);
   // Helper gets the image of a rectangle, using the block.re_rotation() if
   // needed to get to the image, and rotating the result back to horizontal
   // layout. (CJK characters will be on their left sides) The vertical text flag
   // is set in the returned ImageData if the text was originally vertical, which
   // can be used to invoke a different CJK recognition engine. The revised_box
   // is also returned to enable calculation of output bounding boxes.
   ImageData* GetRectImage(const TBOX& box, const BLOCK& block, int padding,
                           TBOX* revised_box) const;
   // Recognizes a word or group of words, converting to WERD_RES in *words.
   // Analogous to classify_word_pass1, but can handle a group of words as well.
   void LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
                          PointerVector<WERD_RES>* words);
   // Apply segmentation search to the given set of words, within the constraints
   // of the existing ratings matrix. If there is already a best_choice on a word
   // leaves it untouched and just sets the done/accepted etc flags.
   void SearchWords(PointerVector<WERD_RES>* words);

   bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,
                          const char* word_config, int pass);
   // Sets up the words ready for whichever engine is to be run
   void SetupAllWordsPassN(int pass_n,
                           const TBOX* target_word_box,
                           const char* word_config,
                           PAGE_RES* page_res,
                           GenericVector<WordData>* words);
   // Sets up the single word ready for whichever engine is to be run.
   void SetupWordPassN(int pass_n, WordData* word);
   // Runs word recognition on all the words.
   bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
                           PAGE_RES_IT* pr_it,
                           GenericVector<WordData>* words);
   bool recog_all_words(PAGE_RES* page_res,
                        ETEXT_DESC* monitor,
                        const TBOX* target_word_box,
                        const char* word_config,
                        int dopasses);
   void rejection_passes(PAGE_RES* page_res,
                         ETEXT_DESC* monitor,
                         const TBOX* target_word_box,
                         const char* word_config);
   void bigram_correction_pass(PAGE_RES *page_res);
   void blamer_pass(PAGE_RES* page_res);
   // Sets script positions and detects smallcaps on all output words.
   void script_pos_pass(PAGE_RES* page_res);
   // Helper to recognize the word using the given (language-specific) tesseract.
   // Returns positive if this recognizer found more new best words than the
   // number kept from best_words.
   int RetryWithLanguage(const WordData& word_data, WordRecognizer recognizer,
                         bool debug, WERD_RES** in_word,
                         PointerVector<WERD_RES>* best_words);
   // Moves good-looking "noise"/diacritics from the reject list to the main
   // blob list on the current word. Returns true if anything was done, and
   // sets make_next_word_fuzzy if blob(s) were added to the end of the word.
   bool ReassignDiacritics(int pass, PAGE_RES_IT* pr_it,
                           bool* make_next_word_fuzzy);
   // Attempts to put noise/diacritic outlines into the blobs that they overlap.
   // Input: a set of noisy outlines that probably belong to the real_word.
   // Output: outlines that overlapped blobs are set to nullptr and put back into
   // the word, either in the blobs or in the reject list.
   void AssignDiacriticsToOverlappingBlobs(
       const GenericVector<C_OUTLINE*>& outlines, int pass, WERD* real_word,
       PAGE_RES_IT* pr_it, GenericVector<bool>* word_wanted,
       GenericVector<bool>* overlapped_any_blob,
       GenericVector<C_BLOB*>* target_blobs);
   // Attempts to assign non-overlapping outlines to their nearest blobs or
   // make new blobs out of them.
   void AssignDiacriticsToNewBlobs(const GenericVector<C_OUTLINE*>& outlines,
                                   int pass, WERD* real_word, PAGE_RES_IT* pr_it,
                                   GenericVector<bool>* word_wanted,
                                   GenericVector<C_BLOB*>* target_blobs);
   // Starting with ok_outlines set to indicate which outlines overlap the blob,
   // chooses the optimal set (approximately) and returns true if any outlines
   // are desired, in which case ok_outlines indicates which ones.
   bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
                                    PAGE_RES_IT* pr_it, C_BLOB* blob,
                                    const GenericVector<C_OUTLINE*>& outlines,
                                    int num_outlines,
                                    GenericVector<bool>* ok_outlines);
   // Classifies the given blob plus the outlines flagged by ok_outlines, undoes
   // the inclusion of the outlines, and returns the certainty of the raw choice.
   float ClassifyBlobPlusOutlines(const GenericVector<bool>& ok_outlines,
                                  const GenericVector<C_OUTLINE*>& outlines,
                                  int pass_n, PAGE_RES_IT* pr_it, C_BLOB* blob,
                                  STRING* best_str);
   // Classifies the given blob (part of word_data->word->word) as an individual
   // word, using languages, chopper etc, returning only the certainty of the
   // best raw choice, and undoing all the work done to fake out the word.
   float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT* pr_it, C_BLOB* blob,
                            STRING* best_str, float* c2);
   void classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
                                   WordData* word_data);
   void classify_word_pass1(const WordData& word_data,
                            WERD_RES** in_word,
                            PointerVector<WERD_RES>* out_words);
   void recog_pseudo_word(PAGE_RES* page_res,  // blocks to check
                          TBOX &selection_box);

   void fix_rep_char(PAGE_RES_IT* page_res_it);

   ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET& char_set,
                                               const char *s,
                                               const char *lengths);
   void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);
   void classify_word_pass2(const WordData& word_data,
                            WERD_RES** in_word,
                            PointerVector<WERD_RES>* out_words);
   void ReportXhtFixResult(bool accept_new_word, float new_x_ht,
                           WERD_RES* word, WERD_RES* new_word);
   bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);
   bool TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row);
   // Runs recognition with the test baseline shift and x-height and returns true
   // if there was an improvement in recognition result.
   bool TestNewNormalization(int original_misfits, float baseline_shift,
                             float new_x_ht, WERD_RES *word, BLOCK* block,
                             ROW *row);
   bool recog_interactive(PAGE_RES_IT* pr_it);

   // Set fonts of this word.
   void set_word_fonts(WERD_RES *word);
   void font_recognition_pass(PAGE_RES* page_res);
   void dictionary_correction_pass(PAGE_RES* page_res);
   bool check_debug_pt(WERD_RES* word, int location);

   bool SubAndSuperscriptFix(WERD_RES *word_res);
   void GetSubAndSuperscriptCandidates(const WERD_RES *word,
                                       int *num_rebuilt_leading,
                                       ScriptPos *leading_pos,
                                       float *leading_certainty,
                                       int *num_rebuilt_trailing,
                                       ScriptPos *trailing_pos,
                                       float *trailing_certainty,
                                       float *avg_certainty,
                                       float *unlikely_threshold);
   WERD_RES *TrySuperscriptSplits(int num_chopped_leading,
                                  float leading_certainty,
                                  ScriptPos leading_pos,
                                  int num_chopped_trailing,
                                  float trailing_certainty,
                                  ScriptPos trailing_pos,
                                  WERD_RES *word,
                                  bool *is_good,
                                  int *retry_leading,
                                  int *retry_trailing);
   bool BelievableSuperscript(bool debug,
                              const WERD_RES &word,
                              float certainty_threshold,
                              int *left_ok,
                              int *right_ok) const;


   void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box);
   void write_results(PAGE_RES_IT& page_res_it,  // full info
                      char newline_type,         // type of newline
                      bool force_eol            // override tilde crunch?
   );
   void set_unlv_suspects(WERD_RES *word);
   UNICHAR_ID get_rep_char(WERD_RES *word);  // what char is repeated?
   bool acceptable_number_string(const char* s,
                                 const char* lengths);
   int16_t count_alphanums(const WERD_CHOICE &word);
   int16_t count_alphas(const WERD_CHOICE &word);
   void read_config_file(const char *filename, SetParamConstraint constraint);
   // Initialize for potentially a set of languages defined by the language
   // string and recursively any additional languages required by any language
   // traineddata file (via tessedit_load_sublangs in its config) that is loaded.
   // See init_tesseract_internal for args.
   int init_tesseract(const char* arg0, const char* textbase,
                      const char* language, OcrEngineMode oem, char** configs,
                      int configs_size, const GenericVector<STRING>* vars_vec,
                      const GenericVector<STRING>* vars_values,
                      bool set_only_init_params, TessdataManager* mgr);
   int init_tesseract(const char *datapath,
                      const char *language,
                      OcrEngineMode oem) {
     TessdataManager mgr;
     return init_tesseract(datapath, nullptr, language, oem, nullptr, 0, nullptr, nullptr,
                           false, &mgr);
   }
   // Common initialization for a single language.
   // arg0 is the datapath for the tessdata directory, which could be the
   // path of the tessdata directory with no trailing /, or (if tessdata
   // lives in the same directory as the executable, the path of the executable,
   // hence the name arg0.
   // textbase is an optional output file basename (used only for training)
   // language is the language code to load.
   // oem controls which engine(s) will operate on the image
   // configs (argv) is an array of config filenames to load variables from.
   // May be nullptr.
   // configs_size (argc) is the number of elements in configs.
   // vars_vec is an optional vector of variables to set.
   // vars_values is an optional corresponding vector of values for the variables
   // in vars_vec.
   // If set_only_init_params is true, then only the initialization variables
   // will be set.
   int init_tesseract_internal(const char* arg0, const char* textbase,
                               const char* language, OcrEngineMode oem,
                               char** configs, int configs_size,
                               const GenericVector<STRING>* vars_vec,
                               const GenericVector<STRING>* vars_values,
                               bool set_only_init_params, TessdataManager* mgr);

   // Set the universal_id member of each font to be unique among all
   // instances of the same font loaded.
   void SetupUniversalFontIds();

   int init_tesseract_lm(const char* arg0, const char* textbase,
                         const char* language, TessdataManager* mgr);

   void recognize_page(STRING& image_name);
   void end_tesseract();

   bool init_tesseract_lang_data(const char* arg0, const char* textbase,
                                 const char* language, OcrEngineMode oem,
                                 char** configs, int configs_size,
                                 const GenericVector<STRING>* vars_vec,
                                 const GenericVector<STRING>* vars_values,
                                 bool set_only_init_params,
                                 TessdataManager* mgr);

   void ParseLanguageString(const char* lang_str,
                            GenericVector<STRING>* to_load,
                            GenericVector<STRING>* not_to_load);

   SVMenuNode *build_menu_new();
   #ifndef GRAPHICS_DISABLED
   void pgeditor_main(int width, int height, PAGE_RES* page_res);
   #endif  // GRAPHICS_DISABLED
   void process_image_event( // action in image win
                            const SVEvent &event);
   bool process_cmd_win_event(                 // UI command semantics
           int32_t cmd_event,  // which menu item?
           char* new_value   // any prompt data
   );
   void debug_word(PAGE_RES* page_res, const TBOX &selection_box);
   void do_re_display(
           bool (tesseract::Tesseract::* word_painter)(PAGE_RES_IT* pr_it));
   bool word_display(PAGE_RES_IT* pr_it);
   bool word_bln_display(PAGE_RES_IT* pr_it);
   bool word_blank_and_set_display(PAGE_RES_IT* pr_its);
   bool word_set_display(PAGE_RES_IT* pr_it);
   // #ifndef GRAPHICS_DISABLED
   bool word_dumper(PAGE_RES_IT* pr_it);
   // #endif  // GRAPHICS_DISABLED
   void blob_feature_display(PAGE_RES* page_res, const TBOX& selection_box);
   // make rej map for word
   void make_reject_map(WERD_RES *word, ROW *row, int16_t pass);
   bool one_ell_conflict(WERD_RES* word_res, bool update_map);
   int16_t first_alphanum_index(const char *word,
                              const char *word_lengths);
   int16_t first_alphanum_offset(const char *word,
                               const char *word_lengths);
   int16_t alpha_count(const char *word,
                     const char *word_lengths);
   bool word_contains_non_1_digit(const char* word,
                                  const char* word_lengths);
   void dont_allow_1Il(WERD_RES *word);
   int16_t count_alphanums(  //how many alphanums
                         WERD_RES *word);
   void flip_0O(WERD_RES *word);
   bool non_0_digit(const UNICHARSET& ch_set, UNICHAR_ID unichar_id);
   bool non_O_upper(const UNICHARSET& ch_set, UNICHAR_ID unichar_id);
   bool repeated_nonalphanum_wd(WERD_RES* word, ROW* row);
   void nn_match_word(  //Match a word
                      WERD_RES *word,
                      ROW *row);
   void nn_recover_rejects(WERD_RES *word, ROW *row);
   void set_done(  //set done flag
                 WERD_RES *word,
                 int16_t pass);
   int16_t safe_dict_word(const WERD_RES *werd_res);  // is best_choice in dict?
   void flip_hyphens(WERD_RES *word);
   void reject_I_1_L(WERD_RES *word);
   void reject_edge_blobs(WERD_RES *word);
   void reject_mostly_rejects(WERD_RES *word);
   bool word_adaptable(  //should we adapt?
           WERD_RES* word,
           uint16_t mode);

   void recog_word_recursive(WERD_RES* word);
   void recog_word(WERD_RES *word);
   void split_and_recog_word(WERD_RES* word);
   void split_word(WERD_RES *word,
                   int split_pt,
                   WERD_RES **right_piece,
                   BlamerBundle **orig_blamer_bundle) const;
   void join_words(WERD_RES *word,
                   WERD_RES *word2,
                   BlamerBundle *orig_bb) const;
   bool digit_or_numeric_punct(WERD_RES *word, int char_position);
   int16_t eval_word_spacing(WERD_RES_LIST &word_res_list);
   void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block);
   int16_t fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
   void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
   void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
   void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block);
   void fix_fuzzy_spaces(                      //find fuzzy words
                         ETEXT_DESC *monitor,  //progress monitor
                         int32_t word_count,     //count of words in doc
                         PAGE_RES *page_res);
   void dump_words(WERD_RES_LIST &perm, int16_t score,
                   int16_t mode, bool improved);
   bool fixspace_thinks_word_done(WERD_RES *word);
   int16_t worst_noise_blob(WERD_RES *word_res, float *worst_noise_score);
   float blob_noise_score(TBLOB *blob);
   void break_noisiest_blob_word(WERD_RES_LIST &words);
   GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word);
   bool potential_word_crunch(WERD_RES* word,
                              GARBAGE_LEVEL garbage_level,
                              bool ok_dict_word);
   void tilde_crunch(PAGE_RES_IT &page_res_it);
   void unrej_good_quality_words(  //unreject potential
                                 PAGE_RES_IT &page_res_it);
   void doc_and_block_rejection(  //reject big chunks
                                PAGE_RES_IT &page_res_it,
                                bool good_quality_doc);
   void quality_based_rejection(PAGE_RES_IT &page_res_it,
                                bool good_quality_doc);
   void convert_bad_unlv_chs(WERD_RES *word_res);
   void tilde_delete(PAGE_RES_IT &page_res_it);
   int16_t word_blob_quality(WERD_RES *word, ROW *row);
   void word_char_quality(WERD_RES *word, ROW *row, int16_t *match_count,
                          int16_t *accepted_match_count);
   void unrej_good_chs(WERD_RES *word, ROW *row);
   int16_t count_outline_errs(char c, int16_t outline_count);
   int16_t word_outline_errs(WERD_RES *word);
   bool terrible_word_crunch(WERD_RES* word, GARBAGE_LEVEL garbage_level);
   CRUNCH_MODE word_deletable(WERD_RES *word, int16_t &delete_mode);
   int16_t failure_count(WERD_RES *word);
   bool noise_outlines(TWERD* word);
   void
   process_selected_words(
           PAGE_RES* page_res, // blocks to check
           //function to call
           TBOX& selection_box,
           bool (tesseract::Tesseract::* word_processor)(PAGE_RES_IT* pr_it));
   void tess_add_doc_word(                          //test acceptability
                          WERD_CHOICE *word_choice  //after context
                         );
   void tess_segment_pass_n(int pass_n, WERD_RES *word);
   bool tess_acceptable_word(WERD_RES *word);

   // Applies the box file based on the image name fname, and resegments
   // the words in the block_list (page), with:
   // blob-mode: one blob per line in the box file, words as input.
   // word/line-mode: one blob per space-delimited unit after the #, and one word
   // per line in the box file. (See comment above for box file format.)
   // If find_segmentation is true, (word/line mode) then the classifier is used
   // to re-segment words/lines to match the space-delimited truth string for
   // each box. In this case, the input box may be for a word or even a whole
   // text line, and the output words will contain multiple blobs corresponding
   // to the space-delimited input string.
   // With find_segmentation false, no classifier is needed, but the chopper
   // can still be used to correctly segment touching characters with the help
   // of the input boxes.
   // In the returned PAGE_RES, the WERD_RES are setup as they would be returned
   // from normal classification, ie. with a word, chopped_word, rebuild_word,
   // seam_array, denorm, box_word, and best_state, but NO best_choice or
   // raw_choice, as they would require a UNICHARSET, which we aim to avoid.
   // Instead, the correct_text member of WERD_RES is set, and this may be later
   // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
   // is not required before calling ApplyBoxTraining.
   PAGE_RES* ApplyBoxes(const STRING& fname, bool find_segmentation,
                        BLOCK_LIST *block_list);

   // Any row xheight that is significantly different from the median is set
   // to the median.
   void PreenXHeights(BLOCK_LIST *block_list);

   // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
   // All fuzzy spaces are removed, and all the words are maximally chopped.
   PAGE_RES* SetupApplyBoxes(const GenericVector<TBOX>& boxes,
                             BLOCK_LIST *block_list);
   // Tests the chopper by exhaustively running chop_one_blob.
   // The word_res will contain filled chopped_word, seam_array, denorm,
   // box_word and best_state for the maximally chopped word.
   void MaximallyChopWord(const GenericVector<TBOX>& boxes,
                          BLOCK* block, ROW* row, WERD_RES* word_res);
   // Gather consecutive blobs that match the given box into the best_state
   // and corresponding correct_text.
   // Fights over which box owns which blobs are settled by pre-chopping and
   // applying the blobs to box or next_box with the least non-overlap.
   // Returns false if the box was in error, which can only be caused by
   // failing to find an appropriate blob for a box.
   // This means that occasionally, blobs may be incorrectly segmented if the
   // chopper fails to find a suitable chop point.
   bool ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box,
                         const TBOX& box, const TBOX* next_box,
                         const char* correct_text);
   // Consume all source blobs that strongly overlap the given box,
   // putting them into a new word, with the correct_text label.
   // Fights over which box owns which blobs are settled by
   // applying the blobs to box or next_box with the least non-overlap.
   // Returns false if the box was in error, which can only be caused by
   // failing to find an overlapping blob for a box.
   bool ResegmentWordBox(BLOCK_LIST* block_list,
                         const TBOX& box, const TBOX* next_box,
                         const char* correct_text);
   // Resegments the words by running the classifier in an attempt to find the
   // correct segmentation that produces the required string.
   void ReSegmentByClassification(PAGE_RES* page_res);
   // Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
   // Returns false if an invalid UNICHAR_ID is encountered.
   bool ConvertStringToUnichars(const char* utf8,
                                GenericVector<UNICHAR_ID>* class_ids);
   // Resegments the word to achieve the target_text from the classifier.
   // Returns false if the re-segmentation fails.
   // Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and
   // applies a full search on the classifier results to find the best classified
   // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity
   // substitutions ARE used.
   bool FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
                         WERD_RES* word_res);
   // Recursive helper to find a match to the target_text (from text_index
   // position) in the choices (from choices_pos position).
   // Choices is an array of GenericVectors, of length choices_length, with each
   // element representing a starting position in the word, and the
   // GenericVector holding classification results for a sequence of consecutive
   // blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
   void SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
                      int choices_pos, int choices_length,
                      const GenericVector<UNICHAR_ID>& target_text,
                      int text_index,
                      float rating, GenericVector<int>* segmentation,
                      float* best_rating, GenericVector<int>* best_segmentation);
   // Counts up the labelled words and the blobs within.
   // Deletes all unused or emptied words, counting the unused ones.
   // Resets W_BOL and W_EOL flags correctly.
   // Builds the rebuild_word and rebuilds the box_word.
   void TidyUp(PAGE_RES* page_res);
   // Logs a bad box by line in the box file and box coords.
   void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch,
                        const char *err_msg);
   // Creates a fake best_choice entry in each WERD_RES with the correct text.
   void CorrectClassifyWords(PAGE_RES* page_res);
   // Call LearnWord to extract features for labelled blobs within each word.
   // Features are stored in an internal buffer.
   void ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res);

   // Returns the number of misfit blob tops in this word.
   int CountMisfitTops(WERD_RES *word_res);
   // Returns a new x-height in pixels (original image coords) that is
   // maximally compatible with the result in word_res.
   // Returns 0.0f if no x-height is found that is better than the current
   // estimate.
   float ComputeCompatibleXheight(WERD_RES *word_res, float* baseline_shift);
   // TODO(ocr-team): Find and remove obsolete parameters.
   BOOL_VAR_H(tessedit_resegment_from_boxes, false,
              "Take segmentation and labeling from box file");
   BOOL_VAR_H(tessedit_resegment_from_line_boxes, false,
               "Conversion of word/line box file to char box file");
   BOOL_VAR_H(tessedit_train_from_boxes, false,
              "Generate training data from boxed chars");
   BOOL_VAR_H(tessedit_make_boxes_from_boxes, false,
              "Generate more boxes from boxed chars");
   BOOL_VAR_H(tessedit_train_line_recognizer, false,
              "Break input into lines and remap boxes if present");
   BOOL_VAR_H(tessedit_dump_pageseg_images, false,
              "Dump intermediate images made during page segmentation");
   INT_VAR_H(tessedit_pageseg_mode, PSM_SINGLE_BLOCK,
             "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
             " 5=line, 6=word, 7=char"
             " (Values from PageSegMode enum in publictypes.h)");
   INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
             "Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults"
             " to loading and running the most accurate available.");
   STRING_VAR_H(tessedit_char_blacklist, "",
                "Blacklist of chars not to recognize");
   STRING_VAR_H(tessedit_char_whitelist, "",
                "Whitelist of chars to recognize");
   STRING_VAR_H(tessedit_char_unblacklist, "",
                "List of chars to override tessedit_char_blacklist");
   BOOL_VAR_H(tessedit_ambigs_training, false,
              "Perform training for ambiguities");
   INT_VAR_H(pageseg_devanagari_split_strategy,
             tesseract::ShiroRekhaSplitter::NO_SPLIT,
             "Whether to use the top-line splitting process for Devanagari "
             "documents while performing page-segmentation.");
   INT_VAR_H(ocr_devanagari_split_strategy,
             tesseract::ShiroRekhaSplitter::NO_SPLIT,
             "Whether to use the top-line splitting process for Devanagari "
             "documents while performing ocr.");
   STRING_VAR_H(tessedit_write_params_to_file, "",
                "Write all parameters to the given file.");
   BOOL_VAR_H(tessedit_adaption_debug, false,
              "Generate and print debug information for adaption");
   INT_VAR_H(bidi_debug, 0, "Debug level for BiDi");
   INT_VAR_H(applybox_debug, 1, "Debug level");
   INT_VAR_H(applybox_page, 0, "Page number to apply boxes from");
   STRING_VAR_H(applybox_exposure_pattern, ".exp",
                "Exposure value follows this pattern in the image"
                " filename. The name of the image files are expected"
                " to be in the form [lang].[fontname].exp[num].tif");
   BOOL_VAR_H(applybox_learn_chars_and_char_frags_mode, false,
              "Learn both character fragments (as is done in the"
              " special low exposure mode) as well as unfragmented"
              " characters.");
   BOOL_VAR_H(applybox_learn_ngrams_mode, false,
              "Each bounding box is assumed to contain ngrams. Only"
              " learn the ngrams whose outlines overlap horizontally.");
   BOOL_VAR_H(tessedit_display_outwords, false, "Draw output words");
   BOOL_VAR_H(tessedit_dump_choices, false, "Dump char choices");
   BOOL_VAR_H(tessedit_timing_debug, false, "Print timing stats");
   BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,
              "Try to improve fuzzy spaces");
   BOOL_VAR_H(tessedit_unrej_any_wd, false,
              "Don't bother with word plausibility");
   BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?");
   BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height");
   BOOL_VAR_H(tessedit_enable_doc_dict, true,
              "Add words to the document dictionary");
   BOOL_VAR_H(tessedit_debug_fonts, false, "Output font info per char");
   BOOL_VAR_H(tessedit_debug_block_rejection, false, "Block and Row stats");
   BOOL_VAR_H(tessedit_enable_bigram_correction, true,
              "Enable correction based on the word bigram dictionary.");
   BOOL_VAR_H(tessedit_enable_dict_correction, false,
              "Enable single word correction based on the dictionary.");
   INT_VAR_H(tessedit_bigram_debug, 0, "Amount of debug output for bigram "
             "correction.");
   BOOL_VAR_H(enable_noise_removal, true,
              "Remove and conditionally reassign small outlines when they"
              " confuse layout analysis, determining diacritics vs noise");
   INT_VAR_H(debug_noise_removal, 0, "Debug reassignment of small outlines");
   // Worst (min) certainty, for which a diacritic is allowed to make the base
   // character worse and still be included.
   double_VAR_H(noise_cert_basechar, -8.0, "Hingepoint for base char certainty");
   // Worst (min) certainty, for which a non-overlapping diacritic is allowed to
   // make the base character worse and still be included.
   double_VAR_H(noise_cert_disjoint, -2.5, "Hingepoint for disjoint certainty");
   // Worst (min) certainty, for which a diacritic is allowed to make a new
   // stand-alone blob.
   double_VAR_H(noise_cert_punc, -2.5, "Threshold for new punc char certainty");
   // Factor of certainty margin for adding diacritics to not count as worse.
   double_VAR_H(noise_cert_factor, 0.375,
                "Scaling on certainty diff from Hingepoint");
   INT_VAR_H(noise_maxperblob, 8, "Max diacritics to apply to a blob");
   INT_VAR_H(noise_maxperword, 16, "Max diacritics to apply to a word");
   INT_VAR_H(debug_x_ht_level, 0, "Reestimate debug");
   BOOL_VAR_H(debug_acceptable_wds, false, "Dump word pass/fail chk");
   STRING_VAR_H(chs_leading_punct, "('`\"", "Leading punctuation");
   STRING_VAR_H(chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation");
   STRING_VAR_H(chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation");
   double_VAR_H(quality_rej_pc, 0.08, "good_quality_doc lte rejection limit");
   double_VAR_H(quality_blob_pc, 0.0, "good_quality_doc gte good blobs limit");
   double_VAR_H(quality_outline_pc, 1.0,
                "good_quality_doc lte outline error limit");
   double_VAR_H(quality_char_pc, 0.95, "good_quality_doc gte good char limit");
   INT_VAR_H(quality_min_initial_alphas_reqd, 2, "alphas in a good word");
   INT_VAR_H(tessedit_tess_adaption_mode, 0x27,
             "Adaptation decision algorithm for tess");
   BOOL_VAR_H(tessedit_minimal_rej_pass1, false,
              "Do minimal rejection on pass 1 output");
   BOOL_VAR_H(tessedit_test_adaption, false, "Test adaption criteria");
   BOOL_VAR_H(tessedit_matcher_log, false, "Log matcher activity");
   INT_VAR_H(tessedit_test_adaption_mode, 3,
             "Adaptation decision algorithm for tess");
   BOOL_VAR_H(test_pt, false, "Test for point");
   double_VAR_H(test_pt_x, 99999.99, "xcoord");
   double_VAR_H(test_pt_y, 99999.99, "ycoord");
   INT_VAR_H(multilang_debug_level, 0, "Print multilang debug info.");
   INT_VAR_H(paragraph_debug_level, 0, "Print paragraph debug info.");
   BOOL_VAR_H(paragraph_text_based, true,
              "Run paragraph detection on the post-text-recognition "
              "(more accurate)");
   BOOL_VAR_H(lstm_use_matrix, 1, "Use ratings matrix/beam searct with lstm");
   STRING_VAR_H(outlines_odd, "%| ", "Non standard number of outlines");
   STRING_VAR_H(outlines_2, "ij!?%\":;", "Non standard number of outlines");
   BOOL_VAR_H(docqual_excuse_outline_errs, false,
              "Allow outline errs in unrejection?");
   BOOL_VAR_H(tessedit_good_quality_unrej, true,
              "Reduce rejection on good docs");
   BOOL_VAR_H(tessedit_use_reject_spaces, true, "Reject spaces?");
   double_VAR_H(tessedit_reject_doc_percent, 65.00,
                "%rej allowed before rej whole doc");
   double_VAR_H(tessedit_reject_block_percent, 45.00,
                "%rej allowed before rej whole block");
   double_VAR_H(tessedit_reject_row_percent, 40.00,
                "%rej allowed before rej whole row");
   double_VAR_H(tessedit_whole_wd_rej_row_percent, 70.00,
                "Number of row rejects in whole word rejects"
                "which prevents whole row rejection");
   BOOL_VAR_H(tessedit_preserve_blk_rej_perfect_wds, true,
              "Only rej partially rejected words in block rejection");
   BOOL_VAR_H(tessedit_preserve_row_rej_perfect_wds, true,
              "Only rej partially rejected words in row rejection");
   BOOL_VAR_H(tessedit_dont_blkrej_good_wds, false,
              "Use word segmentation quality metric");
   BOOL_VAR_H(tessedit_dont_rowrej_good_wds, false,
              "Use word segmentation quality metric");
   INT_VAR_H(tessedit_preserve_min_wd_len, 2,
             "Only preserve wds longer than this");
   BOOL_VAR_H(tessedit_row_rej_good_docs, true,
              "Apply row rejection to good docs");
   double_VAR_H(tessedit_good_doc_still_rowrej_wd, 1.1,
                "rej good doc wd if more than this fraction rejected");
   BOOL_VAR_H(tessedit_reject_bad_qual_wds, true,
              "Reject all bad quality wds");
   BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats");
   BOOL_VAR_H(tessedit_debug_quality_metrics, false,
              "Output data to debug file");
   BOOL_VAR_H(bland_unrej, false, "unrej potential with no checks");
   double_VAR_H(quality_rowrej_pc, 1.1,
                "good_quality_doc gte good char limit");
   BOOL_VAR_H(unlv_tilde_crunching, false,
              "Mark v.bad words for tilde crunch");
   BOOL_VAR_H(hocr_font_info, false,
              "Add font info to hocr output");
   BOOL_VAR_H(crunch_early_merge_tess_fails, true, "Before word crunch?");
   BOOL_VAR_H(crunch_early_convert_bad_unlv_chs, false, "Take out ~^ early?");
   double_VAR_H(crunch_terrible_rating, 80.0, "crunch rating lt this");
   BOOL_VAR_H(crunch_terrible_garbage, true, "As it says");
   double_VAR_H(crunch_poor_garbage_cert, -9.0,
                "crunch garbage cert lt this");
   double_VAR_H(crunch_poor_garbage_rate, 60, "crunch garbage rating lt this");
   double_VAR_H(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this");
   double_VAR_H(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this");
   BOOL_VAR_H(crunch_pot_garbage, true, "POTENTIAL crunch garbage");
   double_VAR_H(crunch_del_rating, 60, "POTENTIAL crunch rating lt this");
   double_VAR_H(crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this");
   double_VAR_H(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this");
   double_VAR_H(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this");
   double_VAR_H(crunch_del_min_width, 3.0, "Del if word width lt xht x this");
   double_VAR_H(crunch_del_high_word, 1.5,
                "Del if word gt xht x this above bl");
   double_VAR_H(crunch_del_low_word, 0.5, "Del if word gt xht x this below bl");
   double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this");
   INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch");
   INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed");
   BOOL_VAR_H(crunch_leave_ok_strings, true, "Don't touch sensible strings");
   BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring");
   BOOL_VAR_H(crunch_leave_accept_strings, false,
              "Don't pot crunch sensible strings");
   BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures");
   INT_VAR_H(crunch_leave_lc_strings, 4,
             "Don't crunch words with long lower case strings");
   INT_VAR_H(crunch_leave_uc_strings, 4,
             "Don't crunch words with long lower case strings");
   INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions");
   INT_VAR_H(crunch_debug, 0, "As it says");
   INT_VAR_H(fixsp_non_noise_limit, 1,
             "How many non-noise blbs either side?");
   double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this");
   BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctuation joins");
   INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing");
   INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug");
   STRING_VAR_H(numeric_punctuation, ".,",
                "Punct. chs expected WITHIN numbers");
   INT_VAR_H(x_ht_acceptance_tolerance, 8,
             "Max allowed deviation of blob top outside of font data");
   INT_VAR_H(x_ht_min_change, 8, "Min change in xht before actually trying it");
   INT_VAR_H(superscript_debug, 0, "Debug level for sub & superscript fixer");
   double_VAR_H(superscript_worse_certainty, 2.0, "How many times worse "
                "certainty does a superscript position glyph need to be for us "
                "to try classifying it as a char with a different baseline?");
   double_VAR_H(superscript_bettered_certainty, 0.97, "What reduction in "
                "badness do we think sufficient to choose a superscript over "
                "what we'd thought.  For example, a value of 0.6 means we want "
                "to reduce badness of certainty by 40%");
   double_VAR_H(superscript_scaledown_ratio, 0.4,
                "A superscript scaled down more than this is unbelievably "
                "small.  For example, 0.3 means we expect the font size to "
                "be no smaller than 30% of the text line font size.");
   double_VAR_H(subscript_max_y_top, 0.5,
                "Maximum top of a character measured as a multiple of x-height "
                "above the baseline for us to reconsider whether it's a "
                "subscript.");
   double_VAR_H(superscript_min_y_bottom, 0.3,
               "Minimum bottom of a character measured as a multiple of "
               "x-height above the baseline for us to reconsider whether it's "
               "a superscript.");
   BOOL_VAR_H(tessedit_write_block_separators, false,
              "Write block separators in output");
   BOOL_VAR_H(tessedit_write_rep_codes, false,
              "Write repetition char code");
   BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");
   BOOL_VAR_H(tessedit_create_txt, false, "Write .txt output file");
   BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
   BOOL_VAR_H(tessedit_create_tsv, false, "Write .tsv output file");
   BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
   BOOL_VAR_H(textonly_pdf, false,
              "Create PDF with only one invisible text layer");
   INT_VAR_H(jpg_quality, 85, "Set JPEG quality level");
   INT_VAR_H(user_defined_dpi, 0, "Specify DPI for input image");
   INT_VAR_H(min_characters_to_try, 50,
             "Specify minimum characters to try during OSD");
   STRING_VAR_H(unrecognised_char, "|",
                "Output char for unidentified blobs");
   INT_VAR_H(suspect_level, 99, "Suspect marker level");
   INT_VAR_H(suspect_space_level, 100,
             "Min suspect level for rejecting spaces");
   INT_VAR_H(suspect_short_words, 2, "Don't Suspect dict wds longer than this");
   BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
   double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit");
   double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
   BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures");
   BOOL_VAR_H(tessedit_zero_rejection, false, "Don't reject ANYTHING");
   BOOL_VAR_H(tessedit_word_for_word, false,
              "Make output have exactly one word per WERD");
   BOOL_VAR_H(tessedit_zero_kelvin_rejection, false,
              "Don't reject ANYTHING AT ALL");
   BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same");
   INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm");
   BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug");
   BOOL_VAR_H(tessedit_flip_0O, true, "Contextual 0O O0 flips");
   double_VAR_H(tessedit_lower_flip_hyphen, 1.5,
                "Aspect ratio dot/hyphen test");
   double_VAR_H(tessedit_upper_flip_hyphen, 1.8,
                "Aspect ratio dot/hyphen test");
   BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector");
   BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test");
   BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Don't double check");
   BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control");
   BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control");
   BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control");
   BOOL_VAR_H(rej_use_sensible_wd, false, "Extend permuter check");
   BOOL_VAR_H(rej_alphas_in_number_perm, false, "Extend permuter check");
   double_VAR_H(rej_whole_of_mostly_reject_word_fract, 0.85, "if >this fract");
   INT_VAR_H(tessedit_image_border, 2, "Rej blbs near image edge limit");
   STRING_VAR_H(ok_repeated_ch_non_alphanum_wds, "-?*\075",
                "Allow NN to unrej");
   STRING_VAR_H(conflict_set_I_l_1, "Il1[]", "Il1 conflict set");
   INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this");
   BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes");
   INT_VAR_H(tessedit_page_number, -1,
             "-1 -> All pages, else specific page to process");
   BOOL_VAR_H(tessedit_write_images, false, "Capture the image from the IPE");
   BOOL_VAR_H(interactive_display_mode, false, "Run interactively?");
   STRING_VAR_H(file_type, ".tif", "Filename extension");
   BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word");
   STRING_VAR_H(tessedit_load_sublangs, "",
                "List of languages to load with this one");
   BOOL_VAR_H(tessedit_use_primary_params_model, false,
              "In multilingual mode use params model of the primary language");
   // Min acceptable orientation margin (difference in scores between top and 2nd
   // choice in OSResults::orientations) to believe the page orientation.
   double_VAR_H(min_orientation_margin, 7.0,
                "Min acceptable orientation margin");
   BOOL_VAR_H(textord_tabfind_show_vlines, false, "Debug line finding");
   BOOL_VAR_H(textord_use_cjk_fp_model, FALSE, "Use CJK fixed pitch model");
   BOOL_VAR_H(poly_allow_detailed_fx, false,
              "Allow feature extractors to see the original outline");
   BOOL_VAR_H(tessedit_init_config_only, false,
              "Only initialize with the config file. Useful if the instance is "
              "not going to be used for OCR but say only for layout analysis.");
   BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");
   BOOL_VAR_H(textord_tabfind_vertical_text, true, "Enable vertical detection");
   BOOL_VAR_H(textord_tabfind_force_vertical_text, false,
              "Force using vertical text page mode");
   double_VAR_H(textord_tabfind_vertical_text_ratio, 0.5,
                "Fraction of textlines deemed vertical to use vertical page "
                "mode");
   double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75,
                "Fraction of height used as a minimum gap for aligned blobs.");
   INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
   BOOL_VAR_H(preserve_interword_spaces, false,
              "Preserve multiple interword spaces");
   STRING_VAR_H(page_separator, "\f",
                "Page separator (default is form feed control character)");
   INT_VAR_H(lstm_choice_mode, 0,
             "Allows to include alternative symbols choices in the hOCR output. "
             "Valid input values are 0, 1 and 2. 0 is the default value. "
             "With 1 the alternative symbol choices per timestep are included. "
             "With 2 the alternative symbol choices are accumulated per character.");

   FILE *init_recog_training(const STRING &fname);
   void recog_training_segmented(const STRING &fname,
                                 PAGE_RES *page_res,
                                 volatile ETEXT_DESC *monitor,
                                 FILE *output_file);
   void ambigs_classify_and_output(const char *label,
                                   PAGE_RES_IT* pr_it,
                                   FILE *output_file);

  private:
   // The filename of a backup config file. If not null, then we currently
   // have a temporary debug config file loaded, and backup_config_file_
   // will be loaded, and set to null when debug is complete.
   const char* backup_config_file_;
   // The filename of a config file to read when processing a debug word.
   STRING word_config_;
   // Image used for input to layout analysis and tesseract recognition.
   // May be modified by the ShiroRekhaSplitter to eliminate the top-line.
   Pix* pix_binary_;
   // Grey-level input image if the input was not binary, otherwise nullptr.
   Pix* pix_grey_;
   // Original input image. Color if the input was color.
   Pix* pix_original_;
   // Thresholds that were used to generate the thresholded image from grey.
   Pix* pix_thresholds_;
   // Debug images. If non-empty, will be written on destruction.
   DebugPixa pixa_debug_;
   // Input image resolution after any scaling. The resolution is not well
   // transmitted by operations on Pix, so we keep an independent record here.
   int source_resolution_;
   // The shiro-rekha splitter object which is used to split top-lines in
   // Devanagari words to provide a better word and grapheme segmentation.
   ShiroRekhaSplitter splitter_;
   // Page segmentation/layout
   Textord textord_;
   // True if the primary language uses right_to_left reading order.
   bool right_to_left_;
   Pix* scaled_color_;
   int scaled_factor_;
   FCOORD deskew_;
   FCOORD reskew_;
   TesseractStats stats_;
   // Sub-languages to be tried in addition to this.
   GenericVector<Tesseract*> sub_langs_;
   // Most recently used Tesseract out of this and sub_langs_. The default
   // language for the next word.
   Tesseract* most_recently_used_;
   // The size of the font table, ie max possible font id + 1.
   int font_table_size_;
   // Equation detector. Note: this pointer is NOT owned by the class.
   EquationDetect* equ_detect_;
   // LSTM recognizer, if available.
   LSTMRecognizer* lstm_recognizer_;
   // Output "page" number (actually line number) using TrainLineRecognizer.
   int train_line_page_num_;
 };

 }  // namespace tesseract

 #endif  // TESSERACT_CCMAIN_TESSERACTCLASS_H_
tesseract::Tesseract::applybox_learn_ngrams_mode
bool applybox_learn_ngrams_mode
Definition: tesseractclass.h:862

tesseract::Tesseract::textord_tabfind_vertical_text
bool textord_tabfind_vertical_text
Definition: tesseractclass.h:1108

tesseract::Tesseract::set_unlv_suspects
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:280

tesseract::Tesseract::subscript_max_y_top
double subscript_max_y_top
Definition: tesseractclass.h:1028

tesseract::Tesseract::SetupAllWordsPassN
void SetupAllWordsPassN(int pass_n, const TBOX *target_word_box, const char *word_config, PAGE_RES *page_res, GenericVector< WordData > *words)
Definition: control.cpp:159

tesseract::Tesseract::superscript_bettered_certainty
double superscript_bettered_certainty
Definition: tesseractclass.h:1020

tesseract::Tesseract::outlines_2
char * outlines_2
Definition: tesseractclass.h:929

tesseract::Tesseract::mutable_pix_binary
Pix ** mutable_pix_binary()
Definition: tesseractclass.h:197

tesseract::Tesseract::alpha_count
int16_t alpha_count(const char *word, const char *word_lengths)
Definition: reject.cpp:500

tesseract::Tesseract::crunch_leave_uc_strings
int crunch_leave_uc_strings
Definition: tesseractclass.h:999

tesseract::Tesseract::debug_noise_removal
int debug_noise_removal
Definition: tesseractclass.h:885

tesseract::Tesseract::MaximallyChopWord
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)

tesseract::Tesseract::first_alphanum_index
int16_t first_alphanum_index(const char *word, const char *word_lengths)
Definition: reject.cpp:474

tesseract::Tesseract::failure_count
int16_t failure_count(WERD_RES *word)
Definition: docqual.cpp:966

tesseract::Tesseract::ClassifyBlobAsWord
float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str, float *c2)
Definition: control.cpp:1287

tesseract::Tesseract::tessedit_reject_mode
int tessedit_reject_mode
Definition: tesseractclass.h:1064

tesseract::Tesseract::scaled_factor
int scaled_factor() const
Definition: tesseractclass.h:256

tesseract::Tesseract::suspect_space_level
int suspect_space_level
Definition: tesseractclass.h:1052

tesseract::Tesseract::acceptable_number_string
bool acceptable_number_string(const char *s, const char *lengths)
Definition: output.cpp:394

tesseract::Tesseract::tessedit_create_hocr
bool tessedit_create_hocr
Definition: tesseractclass.h:1039

tesseract::Tesseract::ReSegmentByClassification
void ReSegmentByClassification(PAGE_RES *page_res)

tesseract::Tesseract::page_separator
char * page_separator
Definition: tesseractclass.h:1120

tesseract::Tesseract::paragraph_text_based
bool paragraph_text_based
Definition: tesseractclass.h:926

INT_VAR_H
#define INT_VAR_H(name, val, comment)
Definition: params.h:264

tesseract::Tesseract::tessedit_char_whitelist
char * tessedit_char_whitelist
Definition: tesseractclass.h:832

tesseract::Tesseract::font_recognition_pass
void font_recognition_pass(PAGE_RES *page_res)
Definition: control.cpp:2060

tesseract::Tesseract::tessedit_prefer_joined_punct
bool tessedit_prefer_joined_punct
Definition: tesseractclass.h:1005

tesseract::Tesseract::tessedit_write_images
bool tessedit_write_images
Definition: tesseractclass.h:1088

tesseract::Tesseract::min_sane_x_ht_pixels
int min_sane_x_ht_pixels
Definition: tesseractclass.h:1084

UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:35

tesseract::Tesseract::SegmentPage
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
Definition: pagesegmain.cpp:100

tesseract::Tesseract::tessedit_parallelize
int tessedit_parallelize
Definition: tesseractclass.h:1116

tesseract::Tesseract::ComputeCompatibleXheight
float ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift)
Definition: fixxht.cpp:102

tesseract::Tesseract::pix_grey
Pix * pix_grey() const
Definition: tesseractclass.h:204

tesseract::Tesseract::join_words
void join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const
Definition: tfacepp.cpp:234

tesseract::Tesseract::tessedit_minimal_rejection
bool tessedit_minimal_rejection
Definition: tesseractclass.h:1057

tesseract::Tesseract::classify_word_pass2
void classify_word_pass2(const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)
Definition: control.cpp:1591

tessdatamanager.h

tesseract::Tesseract::AssignDiacriticsToNewBlobs
void AssignDiacriticsToNewBlobs(const GenericVector< C_OUTLINE *> &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< C_BLOB *> *target_blobs)
Definition: control.cpp:1074

tesseract::Tesseract::tessedit_zero_kelvin_rejection
bool tessedit_zero_kelvin_rejection
Definition: tesseractclass.h:1062

tesseract::ShiroRekhaSplitter::NO_SPLIT
Definition: devanagari_processing.h:75

tesseract::Tesseract::digit_or_numeric_punct
bool digit_or_numeric_punct(WERD_RES *word, int char_position)
Definition: fixspace.cpp:373

tesseract::Tesseract::tessedit_load_sublangs
char * tessedit_load_sublangs
Definition: tesseractclass.h:1093

tesseract::Tesseract::tessedit_write_params_to_file
char * tessedit_write_params_to_file
Definition: tesseractclass.h:846

tesseract::Tesseract::ResegmentWordBox
bool ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const TBOX *next_box, const char *correct_text)

tesseract::Tesseract::dont_allow_1Il
void dont_allow_1Il(WERD_RES *word)
Definition: reject.cpp:531

TWERD
Definition: blobs.h:402

tesseract::Tesseract::superscript_worse_certainty
double superscript_worse_certainty
Definition: tesseractclass.h:1016

tesseract::Tesseract::lstm_use_matrix
bool lstm_use_matrix
Definition: tesseractclass.h:927

tesseract::Tesseract::SetScaledColor
void SetScaledColor(int factor, Pix *color)
Definition: tesseractclass.h:259

tesseract::Tesseract::TrainFromBoxes
void TrainFromBoxes(const GenericVector< TBOX > &boxes, const GenericVector< STRING > &texts, BLOCK_LIST *block_list, DocumentData *training_data)
Definition: linerec.cpp:74

tesseract::Tesseract::PrepareForTessOCR
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
Definition: tesseractclass.cpp:655

tesseract::PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:172

tesseract::Tesseract::test_pt_x
double test_pt_x
Definition: tesseractclass.h:920

pageres.h

tesseract::Tesseract::set_pix_original
void set_pix_original(Pix *original_pix)
Definition: tesseractclass.h:213

tesseract::Tesseract::rej_1Il_use_dict_word
bool rej_1Il_use_dict_word
Definition: tesseractclass.h:1072

tesseract::Tesseract::getDict
Dict & getDict() override
Definition: tesseractclass.cpp:556

tesseract::Tesseract::ClassifyBlobPlusOutlines
float ClassifyBlobPlusOutlines(const GenericVector< bool > &ok_outlines, const GenericVector< C_OUTLINE *> &outlines, int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str)
Definition: control.cpp:1239

tesseract::Tesseract::tessedit_fix_hyphens
bool tessedit_fix_hyphens
Definition: tesseractclass.h:870

tesseract::Tesseract::min_orientation_margin
double min_orientation_margin
Definition: tesseractclass.h:1099

tesseract::Tesseract::LSTMRecognizeWord
void LSTMRecognizeWord(const BLOCK &block, ROW *row, WERD_RES *word, PointerVector< WERD_RES > *words)
Definition: linerec.cpp:222

tesseract::Tesseract::lstm_choice_mode
int lstm_choice_mode
Definition: tesseractclass.h:1125

tesseract::Tesseract::tessedit_rejection_debug
bool tessedit_rejection_debug
Definition: tesseractclass.h:1065

tesseract::WordData::block
BLOCK * block
Definition: tesseractclass.h:157

tesseract::Tesseract::quality_blob_pc
double quality_blob_pc
Definition: tesseractclass.h:906

tesseract::Tesseract::tessedit_debug_block_rejection
bool tessedit_debug_block_rejection
Definition: tesseractclass.h:875

ratngs.h

tesseract::Tesseract::tessedit_create_tsv
bool tessedit_create_tsv
Definition: tesseractclass.h:1040

tesseract::Tesseract::word_display
bool word_display(PAGE_RES_IT *pr_it)
Definition: pgedit.cpp:759

host.h

tesseract::TesseractStats::write_results_empty_block
bool write_results_empty_block
Definition: tesseractclass.h:143

BOOL_VAR_H
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:267

tesseract::Tesseract::tessedit_create_txt
bool tessedit_create_txt
Definition: tesseractclass.h:1038

wordrec.h

tesseract::TesseractStats::doc_blob_quality
int16_t doc_blob_quality
Definition: tesseractclass.h:131

tesseract::TesseractStats::dict_words
int32_t dict_words
Definition: tesseractclass.h:137

tesseract::Tesseract::tess_add_doc_word
void tess_add_doc_word(WERD_CHOICE *word_choice)
Definition: tessbox.cpp:72

tesseract::Tesseract::tessedit_ocr_engine_mode
int tessedit_ocr_engine_mode
Definition: tesseractclass.h:828

tesseract::Tesseract::ImageWidth
int ImageWidth() const
Definition: tesseractclass.h:247

tesseract::Tesseract::tessedit_char_unblacklist
char * tessedit_char_unblacklist
Definition: tesseractclass.h:834

tesseract::Tesseract::user_defined_dpi
int user_defined_dpi
Definition: tesseractclass.h:1045

tesseract::Tesseract::noise_cert_disjoint
double noise_cert_disjoint
Definition: tesseractclass.h:891

tesseract::Tesseract::classify_word_pass1
void classify_word_pass1(const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)
Definition: control.cpp:1420

tesseract::Tesseract::init_tesseract_lm
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
Definition: tessedit.cpp:462

tesseract::Tesseract::suspect_constrain_1Il
bool suspect_constrain_1Il
Definition: tesseractclass.h:1054

tesseract::TesseractStats::good_char_count
int16_t good_char_count
Definition: tesseractclass.h:134

tesseract::Tesseract::crunch_leave_accept_strings
bool crunch_leave_accept_strings
Definition: tesseractclass.h:994

tesseract::Tesseract::end_tesseract
void end_tesseract()
Definition: tessedit.cpp:475

tesseract::Tesseract::terrible_word_crunch
bool terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level)
Definition: docqual.cpp:504

TBOX
Definition: rect.h:34

tesseract::Tesseract::pix_original
Pix * pix_original() const
Definition: tesseractclass.h:211

tesseract::Tesseract::fix_sp_fp_word
void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
Definition: fixspace.cpp:565

double_VAR_H
#define double_VAR_H(name, val, comment)
Definition: params.h:273

tesseract::Tesseract::tessedit_word_for_word
bool tessedit_word_for_word
Definition: tesseractclass.h:1060

tesseract::Tesseract::tessedit_debug_quality_metrics
bool tessedit_debug_quality_metrics
Definition: tesseractclass.h:962

tesseract::Tesseract::RetryWithLanguage
int RetryWithLanguage(const WordData &word_data, WordRecognizer recognizer, bool debug, WERD_RES **in_word, PointerVector< WERD_RES > *best_words)
Definition: control.cpp:910

points.h

tesseract::Tesseract::crunch_include_numerals
bool crunch_include_numerals
Definition: tesseractclass.h:995

tesseract::Tesseract::word_contains_non_1_digit
bool word_contains_non_1_digit(const char *word, const char *word_lengths)
Definition: reject.cpp:514

tesseract::Tesseract::tessedit_timing_debug
bool tessedit_timing_debug
Definition: tesseractclass.h:865

tesseract::Tesseract::do_re_display
void do_re_display(bool(tesseract::Tesseract::*word_painter)(PAGE_RES_IT *pr_it))
Definition: pgedit.cpp:298

tesseract::Tesseract::count_alphas
int16_t count_alphas(const WERD_CHOICE &word)
Definition: output.cpp:373

tesseract::Tesseract::tessedit_dump_choices
bool tessedit_dump_choices
Definition: tesseractclass.h:864

tesseract::ScriptPos
ScriptPos
Definition: ratngs.h:262

tesseract::Tesseract::scaled_color
Pix * scaled_color() const
Definition: tesseractclass.h:253

tesseract::Tesseract::tessedit_zero_rejection
bool tessedit_zero_rejection
Definition: tesseractclass.h:1058

tesseract::Tesseract::tessedit_resegment_from_boxes
bool tessedit_resegment_from_boxes
Definition: tesseractclass.h:811

tesseract::Tesseract::ApplyBoxTraining
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)

tesseract::Tesseract::tessedit_enable_bigram_correction
bool tessedit_enable_bigram_correction
Definition: tesseractclass.h:877

tesseract::Tesseract::set_pix_grey
void set_pix_grey(Pix *grey_pix)
Definition: tesseractclass.h:207

tesseract::Tesseract::GetRectImage
ImageData * GetRectImage(const TBOX &box, const BLOCK &block, int padding, TBOX *revised_box) const
Definition: linerec.cpp:163

tesseract::Tesseract::tessedit_reject_doc_percent
double tessedit_reject_doc_percent
Definition: tesseractclass.h:936

tesseract::Tesseract::set_word_fonts
void set_word_fonts(WERD_RES *word)
Definition: control.cpp:1981

tesseract::Tesseract::tessedit_reject_block_percent
double tessedit_reject_block_percent
Definition: tesseractclass.h:938

tesseract::Tesseract::blob_feature_display
void blob_feature_display(PAGE_RES *page_res, const TBOX &selection_box)
Definition: pgedit.cpp:959

tesseract::Tesseract::crunch_del_min_ht
double crunch_del_min_ht
Definition: tesseractclass.h:982

tesseract::Tesseract::tessedit_adaption_debug
bool tessedit_adaption_debug
Definition: tesseractclass.h:848

tesseract::Tesseract::suspect_accept_rating
double suspect_accept_rating
Definition: tesseractclass.h:1056

WERD_RES
Definition: pageres.h:169

STRING_VAR_H
#define STRING_VAR_H(name, val, comment)
Definition: params.h:270

tesseract::Tesseract::crunch_early_convert_bad_unlv_chs
bool crunch_early_convert_bad_unlv_chs
Definition: tesseractclass.h:971

tesseract::Tesseract::enable_noise_removal
bool enable_noise_removal
Definition: tesseractclass.h:884

tesseract::Tesseract::noise_outlines
bool noise_outlines(TWERD *word)
Definition: docqual.cpp:978

tesseract::DebugPixa
Definition: debugpixa.h:10

tesseract::Tesseract::init_tesseract_lang_data
bool init_tesseract_lang_data(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:91

tesseract::Tesseract::pageseg_devanagari_split_strategy
int pageseg_devanagari_split_strategy
Definition: tesseractclass.h:840

tesseract::Tesseract::tessedit_override_permuter
bool tessedit_override_permuter
Definition: tesseractclass.h:1091

tesseract::Tesseract::eval_word_spacing
int16_t eval_word_spacing(WERD_RES_LIST &word_res_list)
Definition: fixspace.cpp:269

tesseract::PointerVector< WERD_RES >

strngs.h

PAGE_RES
Definition: pageres.h:77

tesseract::Tesseract::superscript_scaledown_ratio
double superscript_scaledown_ratio
Definition: tesseractclass.h:1024

tesseract::Tesseract::tilde_crunch
void tilde_crunch(PAGE_RES_IT &page_res_it)
Definition: docqual.cpp:418

tesseract::Tesseract::rej_use_good_perm
bool rej_use_good_perm
Definition: tesseractclass.h:1076

tesseract::Tesseract::tessedit_minimal_rej_pass1
bool tessedit_minimal_rej_pass1
Definition: tesseractclass.h:914

tesseract::Tesseract::x_ht_min_change
int x_ht_min_change
Definition: tesseractclass.h:1012

tesseract::Tesseract::repeated_nonalphanum_wd
bool repeated_nonalphanum_wd(WERD_RES *word, ROW *row)
Definition: reject.cpp:587

tesseract::Tesseract::dump_words
void dump_words(WERD_RES_LIST &perm, int16_t score, int16_t mode, bool improved)
Definition: fixspace.cpp:479

tesseract::Tesseract::crunch_pot_indicators
int crunch_pot_indicators
Definition: tesseractclass.h:990

tesseract::Tesseract::process_image_event
void process_image_event(const SVEvent &event)
Definition: pgedit.cpp:559

tesseract::Tesseract::SubAndSuperscriptFix
bool SubAndSuperscriptFix(WERD_RES *word_res)
Definition: superscript.cpp:102

tesseract::Tesseract::tessedit_preserve_min_wd_len
int tessedit_preserve_min_wd_len
Definition: tesseractclass.h:953

tesseract::Tesseract::applybox_page
int applybox_page
Definition: tesseractclass.h:851

tesseract::Tesseract::non_0_digit
bool non_0_digit(const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
Definition: reject.cpp:794

tesseract::Tesseract::tessedit_reject_row_percent
double tessedit_reject_row_percent
Definition: tesseractclass.h:940

tesseract::Tesseract::tessedit_resegment_from_line_boxes
bool tessedit_resegment_from_line_boxes
Definition: tesseractclass.h:813

genericvector.h

tesseract::Tesseract::tessedit_make_boxes_from_boxes
bool tessedit_make_boxes_from_boxes
Definition: tesseractclass.h:817

UNICHARSET
Definition: unicharset.h:146

tesseract::Tesseract::crunch_del_low_word
double crunch_del_low_word
Definition: tesseractclass.h:987

tesseract::Tesseract::FindSegmentation
bool FindSegmentation(const GenericVector< UNICHAR_ID > &target_text, WERD_RES *word_res)

tesseract::Tesseract::convert_bad_unlv_chs
void convert_bad_unlv_chs(WERD_RES *word_res)
Definition: docqual.cpp:660

tesseract::Tesseract::test_pt_y
double test_pt_y
Definition: tesseractclass.h:921

tesseract::Tesseract::chs_trailing_punct2
char * chs_trailing_punct2
Definition: tesseractclass.h:904

tesseract::Tesseract::BestPix
Pix * BestPix() const
Definition: tesseractclass.h:229

tesseract::Tesseract::docqual_excuse_outline_errs
bool docqual_excuse_outline_errs
Definition: tesseractclass.h:931

tesseract::Tesseract::crunch_rating_max
int crunch_rating_max
Definition: tesseractclass.h:989

tesseract::Tesseract::write_results
void write_results(PAGE_RES_IT &page_res_it, char newline_type, bool force_eol)
Definition: output.cpp:105

tesseract::Tesseract::tessedit_dump_pageseg_images
bool tessedit_dump_pageseg_images
Definition: tesseractclass.h:821

tesseract::Tesseract::blob_noise_score
float blob_noise_score(TBLOB *blob)
Definition: fixspace.cpp:790

tesseract::Tesseract::noise_cert_punc
double noise_cert_punc
Definition: tesseractclass.h:894

tesseract::WordData::lang_words
PointerVector< WERD_RES > lang_words
Definition: tesseractclass.h:159

tesseract::Tesseract::numeric_punctuation
char * numeric_punctuation
Definition: tesseractclass.h:1009

tesseract::Tesseract::tessedit_preserve_blk_rej_perfect_wds
bool tessedit_preserve_blk_rej_perfect_wds
Definition: tesseractclass.h:945

tesseract::Tesseract::tessedit_init_config_only
bool tessedit_init_config_only
Definition: tesseractclass.h:1106

tesseract::Tesseract::SearchWords
void SearchWords(PointerVector< WERD_RES > *words)
Definition: linerec.cpp:250

tesseract::Tesseract::applybox_learn_chars_and_char_frags_mode
bool applybox_learn_chars_and_char_frags_mode
Definition: tesseractclass.h:859

tesseract::Tesseract::tessedit_good_doc_still_rowrej_wd
double tessedit_good_doc_still_rowrej_wd
Definition: tesseractclass.h:957

tesseract::Tesseract::hocr_font_info
bool hocr_font_info
Definition: tesseractclass.h:969

tesseract::Tesseract::quality_min_initial_alphas_reqd
int quality_min_initial_alphas_reqd
Definition: tesseractclass.h:910

tesseract::Tesseract::noise_cert_factor
double noise_cert_factor
Definition: tesseractclass.h:897

tesseract::Tesseract::superscript_min_y_bottom
double superscript_min_y_bottom
Definition: tesseractclass.h:1032

tesseract::SetParamConstraint
SetParamConstraint
Definition: params.h:36

tesseract::Tesseract::match_word_pass_n
void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK *block)
Definition: control.cpp:1649

tesseract::Tesseract::first_alphanum_offset
int16_t first_alphanum_offset(const char *word, const char *word_lengths)
Definition: reject.cpp:487

tesseract::Tesseract::init_tesseract
int init_tesseract(const char *datapath, const char *language, OcrEngineMode oem)
Definition: tesseractclass.h:524

tesseract::Tesseract::tessedit_lower_flip_hyphen
double tessedit_lower_flip_hyphen
Definition: tesseractclass.h:1068

tesseract::Tesseract::interactive_display_mode
bool interactive_display_mode
Definition: tesseractclass.h:1089

tesseract::Tesseract::crunch_del_max_ht
double crunch_del_max_ht
Definition: tesseractclass.h:983

tesseract::Tesseract::textord_tabfind_force_vertical_text
bool textord_tabfind_force_vertical_text
Definition: tesseractclass.h:1110

tesseract::Tesseract::word_blob_quality
int16_t word_blob_quality(WERD_RES *word, ROW *row)
Definition: docqual.cpp:61

tesseract::OEM_LSTM_ONLY
Definition: publictypes.h:270

tesseract::Tesseract::fixsp_small_outlines_size
double fixsp_small_outlines_size
Definition: tesseractclass.h:1004

tesseract::Tesseract::Clear
void Clear()
Definition: tesseractclass.cpp:569

tesseract::TesseractStats
Definition: tesseractclass.h:115

tesseract::Tesseract::preserve_interword_spaces
bool preserve_interword_spaces
Definition: tesseractclass.h:1118

tesseract::Tesseract::quality_rej_pc
double quality_rej_pc
Definition: tesseractclass.h:905

tesseract::Tesseract::SetEquationDetect
void SetEquationDetect(EquationDetect *detector)
Definition: tesseractclass.cpp:586

tesseract::Tesseract::init_tesseract
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:296

tesseract::Tesseract::tessedit_char_blacklist
char * tessedit_char_blacklist
Definition: tesseractclass.h:830

tesseract::Tesseract::recog_word
void recog_word(WERD_RES *word)
Definition: tfacepp.cpp:40

tesseract::Tesseract::rej_trust_doc_dawg
bool rej_trust_doc_dawg
Definition: tesseractclass.h:1071

tesseract::Tesseract::tessedit_image_border
int tessedit_image_border
Definition: tesseractclass.h:1080

tesseract::Tesseract::unrej_good_quality_words
void unrej_good_quality_words(PAGE_RES_IT &page_res_it)
Definition: docqual.cpp:161

tesseract::Tesseract::TrainLineRecognizer
void TrainLineRecognizer(const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list)
Definition: linerec.cpp:43

tesseract::Tesseract::tessedit_enable_doc_dict
bool tessedit_enable_doc_dict
Definition: tesseractclass.h:873

SVMenuNode
Definition: svmnode.h:35

tesseract::Tesseract::tessedit_redo_xheight
bool tessedit_redo_xheight
Definition: tesseractclass.h:871

tesseract::Tesseract::nn_match_word
void nn_match_word(WERD_RES *word, ROW *row)

tesseract::Tesseract::SetupApplyBoxes
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)

tesseract::Tesseract::crunch_del_cert
double crunch_del_cert
Definition: tesseractclass.h:981

tesseract::Tesseract::debug_acceptable_wds
bool debug_acceptable_wds
Definition: tesseractclass.h:901

tesseract::Tesseract::chs_leading_punct
char * chs_leading_punct
Definition: tesseractclass.h:902

tesseract::Tesseract::tessedit_test_adaption
bool tessedit_test_adaption
Definition: tesseractclass.h:915

tesseract::TessdataManager
Definition: tessdatamanager.h:126

tesseract::Tesseract::Tesseract
Tesseract()
Definition: tesseractclass.cpp:54

tesseract::Tesseract::textord
const Textord & textord() const
Definition: tesseractclass.h:263

tesseract::WordData::word
WERD_RES * word
Definition: tesseractclass.h:155

tesseract::Tesseract::rej_alphas_in_number_perm
bool rej_alphas_in_number_perm
Definition: tesseractclass.h:1078

tesseract::Tesseract::reskew
const FCOORD & reskew() const
Definition: tesseractclass.h:193

SVEvent
Definition: scrollview.h:61

tesseract::Tesseract::ProcessTargetWord
bool ProcessTargetWord(const TBOX &word_box, const TBOX &target_word_box, const char *word_config, int pass)
Definition: control.cpp:125

tesseract::WordData::WordData
WordData()
Definition: tesseractclass.h:148

tesseract::Tesseract::RunOldFixXht
bool RunOldFixXht(WERD_RES *word, BLOCK *block, ROW *row)

tesseract::Tesseract::unlv_tilde_crunching
bool unlv_tilde_crunching
Definition: tesseractclass.h:967

tesseract::Tesseract::tessedit_create_pdf
bool tessedit_create_pdf
Definition: tesseractclass.h:1041

tesseract::TesseractStats::dump_words_str
STRING dump_words_str
Definition: tesseractclass.h:138

tesseract::WordData::WordData
WordData(BLOCK *block_in, ROW *row_in, WERD_RES *word_res)
Definition: tesseractclass.h:152

tesseract::Tesseract::SetupWordScripts
void SetupWordScripts(BLOCK_LIST *blocks)

tesseract::Tesseract::non_O_upper
bool non_O_upper(const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
Definition: reject.cpp:790

tesseract::Tesseract::TestNewNormalization
bool TestNewNormalization(int original_misfits, float baseline_shift, float new_x_ht, WERD_RES *word, BLOCK *block, ROW *row)
Definition: control.cpp:1538

tesseract::Tesseract::textord_tabfind_vertical_text_ratio
double textord_tabfind_vertical_text_ratio
Definition: tesseractclass.h:1113

tesseract::Tesseract::ReportXhtFixResult
void ReportXhtFixResult(bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word)
Definition: control.cpp:1481

tesseract::DocumentData
Definition: imagedata.h:205

CRUNCH_MODE
CRUNCH_MODE
Definition: pageres.h:159

tesseract::Tesseract::crunch_debug
int crunch_debug
Definition: tesseractclass.h:1001

tesseract::Tesseract::tessedit_write_block_separators
bool tessedit_write_block_separators
Definition: tesseractclass.h:1034

tesseract::TesseractStats::last_char_was_tilde
bool last_char_was_tilde
Definition: tesseractclass.h:142

GenericVector
Definition: baseapi.h:37

tesseract::Tesseract::tessedit_ambigs_training
bool tessedit_ambigs_training
Definition: tesseractclass.h:836

tesseract::Tesseract::safe_dict_word
int16_t safe_dict_word(const WERD_RES *werd_res)
Definition: reject.cpp:612

tesseract::Tesseract::tessedit_whole_wd_rej_row_percent
double tessedit_whole_wd_rej_row_percent
Definition: tesseractclass.h:943

tesseract::Tesseract::debug_fix_space_level
int debug_fix_space_level
Definition: tesseractclass.h:1007

tesseract::WordRecognizer
void(Tesseract::* WordRecognizer)(const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)
Definition: tesseractclass.h:169

tesseract::Tesseract::SetupUniversalFontIds
void SetupUniversalFontIds()
Definition: tessedit.cpp:441

tesseract::TesseractStats::doc_char_quality
int16_t doc_char_quality
Definition: tesseractclass.h:133

BlamerBundle
Definition: blamer.h:100

tesseract::Tesseract::word_adaptable
bool word_adaptable(WERD_RES *word, uint16_t mode)
Definition: adaptions.cpp:35

tesseract::Tesseract::script_pos_pass
void script_pos_pass(PAGE_RES *page_res)
Definition: control.cpp:740

tesseract::Tesseract::ResetAdaptiveClassifier
void ResetAdaptiveClassifier()
Definition: tesseractclass.cpp:592

tesseract::Tesseract::tessedit_train_from_boxes
bool tessedit_train_from_boxes
Definition: tesseractclass.h:815

tesseract::Tesseract::rej_1Il_trust_permuter_type
bool rej_1Il_trust_permuter_type
Definition: tesseractclass.h:1073

tesseract::Tesseract::word_blank_and_set_display
bool word_blank_and_set_display(PAGE_RES_IT *pr_its)
Definition: pgedit.cpp:715

WERD_CHOICE
Definition: ratngs.h:273

tesseract::Tesseract::source_resolution
int source_resolution() const
Definition: tesseractclass.h:241

tesseract::Tesseract::tessedit_matcher_log
bool tessedit_matcher_log
Definition: tesseractclass.h:916

tesseract::Tesseract::textord_use_cjk_fp_model
bool textord_use_cjk_fp_model
Definition: tesseractclass.h:1101

FALSE
#define FALSE
Definition: capi.h:52

tesseract::TesseractStats::doc_good_char_quality
int16_t doc_good_char_quality
Definition: tesseractclass.h:135

tesseract::Tesseract::pgeditor_main
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:327

tesseract::Tesseract::split_and_recog_word
void split_and_recog_word(WERD_RES *word)
Definition: tfacepp.cpp:138

tesseract::Tesseract::applybox_debug
int applybox_debug
Definition: tesseractclass.h:850

tesseract::Tesseract::ApplyBoxes
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)

tesseract::Tesseract::recog_all_words
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:308

tesseract::Tesseract::build_menu_new
SVMenuNode * build_menu_new()
Definition: pgedit.cpp:247

tesseract::Tesseract::outlines_odd
char * outlines_odd
Definition: tesseractclass.h:928

tesseract::Tesseract::fixspace_thinks_word_done
bool fixspace_thinks_word_done(WERD_RES *word)
Definition: fixspace.cpp:533

tesseract::Tesseract::ambigs_classify_and_output
void ambigs_classify_and_output(const char *label, PAGE_RES_IT *pr_it, FILE *output_file)
Definition: recogtraining.cpp:209

tesseract::Tesseract::process_cmd_win_event
bool process_cmd_win_event(int32_t cmd_event, char *new_value)
Definition: pgedit.cpp:387

tesseract::Dict
Definition: dict.h:88

tesseract::Tesseract::textord_equation_detect
bool textord_equation_detect
Definition: tesseractclass.h:1107

tesseract::Tesseract::ParseLanguageString
void ParseLanguageString(const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load)
Definition: tessedit.cpp:262

tesseract::Tesseract::paragraph_debug_level
int paragraph_debug_level
Definition: tesseractclass.h:923

tesseract::Tesseract::noise_maxperword
int noise_maxperword
Definition: tesseractclass.h:899

tesseract::Tesseract::word_bln_display
bool word_bln_display(PAGE_RES_IT *pr_it)
Definition: pgedit.cpp:727

tesseract::Tesseract::potential_word_crunch
bool potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level, bool ok_dict_word)
Definition: docqual.cpp:542

tesseract::Tesseract::right_to_left
bool right_to_left() const
Definition: tesseractclass.h:270

tesseract::Tesseract::ConvertStringToUnichars
bool ConvertStringToUnichars(const char *utf8, GenericVector< UNICHAR_ID > *class_ids)

tesseract::Tesseract::tessedit_use_primary_params_model
bool tessedit_use_primary_params_model
Definition: tesseractclass.h:1095

tesseract::Tesseract::test_pt
bool test_pt
Definition: tesseractclass.h:919

tesseract::Tesseract::debug_x_ht_level
int debug_x_ht_level
Definition: tesseractclass.h:900

tesseract::Tesseract::AutoPageSeg
int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr)
Definition: pagesegmain.cpp:201

tesseract::Tesseract::tessedit_fix_fuzzy_spaces
bool tessedit_fix_fuzzy_spaces
Definition: tesseractclass.h:867

tesseract::Tesseract::fixsp_done_mode
int fixsp_done_mode
Definition: tesseractclass.h:1006

tesseract::Tesseract::tessedit_preserve_row_rej_perfect_wds
bool tessedit_preserve_row_rej_perfect_wds
Definition: tesseractclass.h:947

tesseract::Tesseract::word_set_display
bool word_set_display(PAGE_RES_IT *pr_it)
Definition: pgedit.cpp:944

tesseract::WordData::WordData
WordData(const PAGE_RES_IT &page_res_it)
Definition: tesseractclass.h:149

tesseract::Tesseract::set_pix_thresholds
void set_pix_thresholds(Pix *thresholds)
Definition: tesseractclass.h:237

tesseract::Tesseract::SearchForText
void SearchForText(const GenericVector< BLOB_CHOICE_LIST *> *choices, int choices_pos, int choices_length, const GenericVector< UNICHAR_ID > &target_text, int text_index, float rating, GenericVector< int > *segmentation, float *best_rating, GenericVector< int > *best_segmentation)

tesseract::TesseractStats::last_char_was_newline
bool last_char_was_newline
Definition: tesseractclass.h:141

tesseract::Tesseract::make_reject_map
void make_reject_map(WERD_RES *word, ROW *row, int16_t pass)

unichar.h

tesseract::Tesseract::count_outline_errs
int16_t count_outline_errs(char c, int16_t outline_count)
Definition: docqual.cpp:127

tesseract::Tesseract::ok_repeated_ch_non_alphanum_wds
char * ok_repeated_ch_non_alphanum_wds
Definition: tesseractclass.h:1082

tesseract::Tesseract::quality_char_pc
double quality_char_pc
Definition: tesseractclass.h:909

tesseract::Tesseract::set_done
void set_done(WERD_RES *word, int16_t pass)

tesseract::Tesseract::tessedit_bigram_debug
int tessedit_bigram_debug
Definition: tesseractclass.h:881

WERD
Definition: werd.h:59

tesseract::Tesseract::BelievableSuperscript
bool BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold, int *left_ok, int *right_ok) const
Definition: superscript.cpp:522

tesseract::Tesseract::min_characters_to_try
int min_characters_to_try
Definition: tesseractclass.h:1047

tesseract::Tesseract::pix_binary
Pix * pix_binary() const
Definition: tesseractclass.h:201

tesseract::Tesseract::suspect_level
int suspect_level
Definition: tesseractclass.h:1050

tesseract::Tesseract::tessedit_reject_bad_qual_wds
bool tessedit_reject_bad_qual_wds
Definition: tesseractclass.h:959

tesseract::Tesseract::suspect_rating_per_ch
double suspect_rating_per_ch
Definition: tesseractclass.h:1055

tesseract::Tesseract::tessedit_upper_flip_hyphen
double tessedit_upper_flip_hyphen
Definition: tesseractclass.h:1070

tesseract::Tesseract::GetLineData
ImageData * GetLineData(const TBOX &line_box, const GenericVector< TBOX > &boxes, const GenericVector< STRING > &texts, int start_box, int end_box, const BLOCK &block)
Definition: linerec.cpp:129

BOOL8
unsigned char BOOL8
Definition: host.h:34

tesseract::Tesseract::quality_rowrej_pc
double quality_rowrej_pc
Definition: tesseractclass.h:965

ROW
Definition: ocrrow.h:36

tesseract::Tesseract::init_tesseract_internal
int init_tesseract_internal(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:396

tesseract::Tesseract::ImageHeight
int ImageHeight() const
Definition: tesseractclass.h:250

tesseract::Tesseract::fix_fuzzy_space_list
void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block)
Definition: fixspace.cpp:175

tesseract::Tesseract::word_outline_errs
int16_t word_outline_errs(WERD_RES *word)
Definition: docqual.cpp:73

tesseract::Tesseract::get_rep_char
UNICHAR_ID get_rep_char(WERD_RES *word)
Definition: output.cpp:258

tesseract::Tesseract::acceptable_word_string
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
Definition: control.cpp:1764

tesseract::Tesseract::crunch_leave_ok_strings
bool crunch_leave_ok_strings
Definition: tesseractclass.h:991

BLOCK
Definition: ocrblock.h:30

tesseract::Tesseract::ResetDocumentDictionary
void ResetDocumentDictionary()
Definition: tesseractclass.cpp:602

tesseract::Tesseract::mutable_textord
Textord * mutable_textord()
Definition: tesseractclass.h:266

tesseract::Tesseract::flip_0O
void flip_0O(WERD_RES *word)
Definition: reject.cpp:678

debugpixa.h

tesseract::Tesseract::init_recog_training
FILE * init_recog_training(const STRING &fname)
Definition: recogtraining.cpp:35

tesseract::OEM_DEFAULT
Definition: publictypes.h:274

tesseract::Tesseract::crunch_accept_ok
bool crunch_accept_ok
Definition: tesseractclass.h:992

tesseract::Tesseract::crunch_del_high_word
double crunch_del_high_word
Definition: tesseractclass.h:986

tesseract::Tesseract::reject_mostly_rejects
void reject_mostly_rejects(WERD_RES *word)
Definition: reject.cpp:578

tesseract
Definition: baseapi.cpp:94

tesseract::Tesseract::set_source_resolution
void set_source_resolution(int ppi)
Definition: tesseractclass.h:244

tesseract::Tesseract::TidyUp
void TidyUp(PAGE_RES *page_res)

tesseract::Tesseract::jpg_quality
int jpg_quality
Definition: tesseractclass.h:1044

tesseract::Tesseract::textonly_pdf
bool textonly_pdf
Definition: tesseractclass.h:1043

tesseract::TesseractStats::adaption_word_number
int32_t adaption_word_number
Definition: tesseractclass.h:130

tesseract::Tesseract::crunch_long_repetitions
int crunch_long_repetitions
Definition: tesseractclass.h:1000

tesseract::Tesseract
Definition: tesseractclass.h:173

GARBAGE_LEVEL
GARBAGE_LEVEL
Definition: docqual.h:29

tesseract::Tesseract::rejection_passes
void rejection_passes(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config)
Definition: control.cpp:618

tesseract::Tesseract::tessedit_debug_fonts
bool tessedit_debug_fonts
Definition: tesseractclass.h:874

tesseract::OcrEngineMode
OcrEngineMode
Definition: publictypes.h:268

tesseract::Tesseract::fix_rep_char
void fix_rep_char(PAGE_RES_IT *page_res_it)
Definition: control.cpp:1725

tesseract::Tesseract::crunch_terrible_rating
double crunch_terrible_rating
Definition: tesseractclass.h:972

tesseract::Tesseract::crunch_early_merge_tess_fails
bool crunch_early_merge_tess_fails
Definition: tesseractclass.h:970

tesseract::Tesseract::recog_pseudo_word
void recog_pseudo_word(PAGE_RES *page_res, TBOX &selection_box)
Definition: control.cpp:67

tesseract::EquationDetect
Definition: equationdetect.h:39

tesseract::Tesseract::SelectGoodDiacriticOutlines
bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold, PAGE_RES_IT *pr_it, C_BLOB *blob, const GenericVector< C_OUTLINE *> &outlines, int num_outlines, GenericVector< bool > *ok_outlines)
Definition: control.cpp:1152

tesseract::Tesseract::tessedit_flip_0O
bool tessedit_flip_0O
Definition: tesseractclass.h:1066

PAGE_RES_IT
Definition: pageres.h:675

tesseract::WordData::row
ROW * row
Definition: tesseractclass.h:156

tesseract::Tesseract::tess_acceptable_word
bool tess_acceptable_word(WERD_RES *word)
Definition: tessbox.cpp:62

tesseract::Tesseract::tessedit_unrej_any_wd
bool tessedit_unrej_any_wd
Definition: tesseractclass.h:869

tesseract::Tesseract::recognize_page
void recognize_page(STRING &image_name)

tesseract::Tesseract::word_dumper
bool word_dumper(PAGE_RES_IT *pr_it)
Definition: pgedit.cpp:920

tesseract::Tesseract::bigram_correction_pass
void bigram_correction_pass(PAGE_RES *page_res)
Definition: control.cpp:473

tesseract::Tesseract::file_type
char * file_type
Definition: tesseractclass.h:1090

STRING
Definition: strngs.h:45

tesseract::Tesseract::crunch_del_min_width
double crunch_del_min_width
Definition: tesseractclass.h:984

C_BLOB
Definition: stepblob.h:37

tesseract::Tesseract::chs_trailing_punct1
char * chs_trailing_punct1
Definition: tesseractclass.h:903

tesseract::Tesseract::doc_and_block_rejection
void doc_and_block_rejection(PAGE_RES_IT &page_res_it, bool good_quality_doc)
Definition: docqual.cpp:233

tesseract::Tesseract::SetupWordPassN
void SetupWordPassN(int pass_n, WordData *word)
Definition: control.cpp:182

tesseract::Tesseract::tessedit_enable_dict_correction
bool tessedit_enable_dict_correction
Definition: tesseractclass.h:879

tesseract::Tesseract::fix_fuzzy_spaces
void fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_RES *page_res)
Definition: fixspace.cpp:78

tesseract::Tesseract::suspect_short_words
int suspect_short_words
Definition: tesseractclass.h:1053

tesseract::Tesseract::worst_noise_blob
int16_t worst_noise_blob(WERD_RES *word_res, float *worst_noise_score)
Definition: fixspace.cpp:710

tesseract::Tesseract::RecogAllWordsPassN
bool RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT *pr_it, GenericVector< WordData > *words)
Definition: control.cpp:218

tesseract::Wordrec
Definition: wordrec.h:192

tesseract::Tesseract::get_sub_lang
Tesseract * get_sub_lang(int index) const
Definition: tesseractclass.h:276

control.h

params.h

tesseract::ColumnFinder
Definition: colfind.h:53

tesseract::Tesseract::CountMisfitTops
int CountMisfitTops(WERD_RES *word_res)
Definition: fixxht.cpp:70

tesseract::Tesseract::reject_edge_blobs
void reject_edge_blobs(WERD_RES *word)
Definition: reject.cpp:268

tesseract::Tesseract::tessedit_page_number
int tessedit_page_number
Definition: tesseractclass.h:1087

tesseract::Tesseract::crunch_poor_garbage_cert
double crunch_poor_garbage_cert
Definition: tesseractclass.h:975

tesseract::Tesseract::tessedit_good_quality_unrej
bool tessedit_good_quality_unrej
Definition: tesseractclass.h:933

tesseract::Tesseract::~Tesseract
~Tesseract()
Definition: tesseractclass.cpp:545

tesseract::TesseractStats::word_count
int32_t word_count
Definition: tesseractclass.h:136

tesseract::Tesseract::tessedit_pageseg_mode
int tessedit_pageseg_mode
Definition: tesseractclass.h:825

tesseract::Tesseract::tessedit_dont_blkrej_good_wds
bool tessedit_dont_blkrej_good_wds
Definition: tesseractclass.h:949

tesseract::ImageData
Definition: imagedata.h:105

tesseract::Tesseract::ocr_devanagari_split_strategy
int ocr_devanagari_split_strategy
Definition: tesseractclass.h:844

tesseract::Tesseract::match_current_words
void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK *block)
Definition: fixspace.cpp:226

tesseract::Tesseract::recog_word_recursive
void recog_word_recursive(WERD_RES *word)
Definition: tfacepp.cpp:104

tesseract::Tesseract::superscript_debug
int superscript_debug
Definition: tesseractclass.h:1013

FCOORD
Definition: points.h:189

tesseract::Tesseract::tilde_delete
void tilde_delete(PAGE_RES_IT &page_res_it)
Definition: docqual.cpp:590

OSResults
Definition: osdetect.h:49

tesseract::Tesseract::conflict_set_I_l_1
char * conflict_set_I_l_1
Definition: tesseractclass.h:1083

tesseract::Tesseract::fix_noisy_space_list
void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block)
Definition: fixspace.cpp:599

tesseract::Tesseract::crunch_leave_lc_strings
int crunch_leave_lc_strings
Definition: tesseractclass.h:997

tesseract::Tesseract::noise_cert_basechar
double noise_cert_basechar
Definition: tesseractclass.h:888

tesseract::Tesseract::GetSubAndSuperscriptCandidates
void GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading, ScriptPos *leading_pos, float *leading_certainty, int *num_rebuilt_trailing, ScriptPos *trailing_pos, float *trailing_certainty, float *avg_certainty, float *unlikely_threshold)
Definition: superscript.cpp:254

tesseract::Tesseract::tessedit_train_line_recognizer
bool tessedit_train_line_recognizer
Definition: tesseractclass.h:819

tesseract::Tesseract::check_debug_pt
bool check_debug_pt(WERD_RES *word, int location)
Definition: control.cpp:1868

tesseract::Tesseract::process_selected_words
void process_selected_words(PAGE_RES *page_res, TBOX &selection_box, bool(tesseract::Tesseract::*word_processor)(PAGE_RES_IT *pr_it))
Definition: pagewalk.cpp:30

tesseract::WordData
Definition: tesseractclass.h:147

tesseract::Tesseract::AnyTessLang
bool AnyTessLang() const
Definition: tesseractclass.h:280

tesseract::Tesseract::tessedit_dont_rowrej_good_wds
bool tessedit_dont_rowrej_good_wds
Definition: tesseractclass.h:951

tesseract::Tesseract::SetBlackAndWhitelist
void SetBlackAndWhitelist()
Definition: tesseractclass.cpp:609

tesseract::Tesseract::rej_use_tess_accepted
bool rej_use_tess_accepted
Definition: tesseractclass.h:1074

tesseract::Tesseract::applybox_exposure_pattern
char * applybox_exposure_pattern
Definition: tesseractclass.h:855

tesseract::Tesseract::crunch_pot_garbage
bool crunch_pot_garbage
Definition: tesseractclass.h:979

tesseract::LSTMRecognizer
Definition: lstmrecognizer.h:53

tesseract::Tesseract::tessedit_use_reject_spaces
bool tessedit_use_reject_spaces
Definition: tesseractclass.h:934

tesseract::Tesseract::crunch_poor_garbage_rate
double crunch_poor_garbage_rate
Definition: tesseractclass.h:976

tesseract::Tesseract::noise_maxperblob
int noise_maxperblob
Definition: tesseractclass.h:898

tesseract::Tesseract::tessedit_debug_doc_rejection
bool tessedit_debug_doc_rejection
Definition: tesseractclass.h:960

tesseract::Tesseract::unrecognised_char
char * unrecognised_char
Definition: tesseractclass.h:1049

ACCEPTABLE_WERD_TYPE
ACCEPTABLE_WERD_TYPE
Definition: control.h:28

tesseract::ShiroRekhaSplitter
Definition: devanagari_processing.h:72

tesseract::Tesseract::quality_based_rejection
void quality_based_rejection(PAGE_RES_IT &page_res_it, bool good_quality_doc)
Definition: docqual.cpp:139

tesseract::Tesseract::TrySuperscriptSplits
WERD_RES * TrySuperscriptSplits(int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, bool *is_good, int *retry_leading, int *retry_trailing)
Definition: superscript.cpp:383

tesseract::TesseractStats::TesseractStats
TesseractStats()
Definition: tesseractclass.h:116

tesseract::Tesseract::read_config_file
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:60

tesseract::Tesseract::quality_outline_pc
double quality_outline_pc
Definition: tesseractclass.h:908

tesseract::Tesseract::split_word
void split_word(WERD_RES *word, int split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const
Definition: tfacepp.cpp:176

tesseract::Tesseract::one_ell_conflict
bool one_ell_conflict(WERD_RES *word_res, bool update_map)
Definition: reject.cpp:297

tesseract::Tesseract::bland_unrej
bool bland_unrej
Definition: tesseractclass.h:963

tesseract::Tesseract::garbage_word
GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word)
Definition: docqual.cpp:680

tesseract::OEM_TESSERACT_ONLY
Definition: publictypes.h:269

tesseract::Textord
Definition: textord.h:68

tesseract::Tesseract::nn_recover_rejects
void nn_recover_rejects(WERD_RES *word, ROW *row)

TBLOB
Definition: blobs.h:268

tesseract::Tesseract::tessedit_row_rej_good_docs
bool tessedit_row_rej_good_docs
Definition: tesseractclass.h:955

tesseract::Tesseract::textord_tabfind_show_vlines
bool textord_tabfind_show_vlines
Definition: tesseractclass.h:1100

tesseract::Tesseract::tessedit_write_unlv
bool tessedit_write_unlv
Definition: tesseractclass.h:1037

tesseract::Tesseract::CorrectClassifyWords
void CorrectClassifyWords(PAGE_RES *page_res)

tesseract::Tesseract::word_deletable
CRUNCH_MODE word_deletable(WERD_RES *word, int16_t &delete_mode)
Definition: docqual.cpp:895

tesseract::Tesseract::word_char_quality
void word_char_quality(WERD_RES *word, ROW *row, int16_t *match_count, int16_t *accepted_match_count)
Definition: docqual.cpp:93

tesseract::Tesseract::ReassignDiacritics
bool ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next_word_fuzzy)
Definition: control.cpp:949

tesseract::WordData::prev_word
WordData * prev_word
Definition: tesseractclass.h:158

tesseract::Tesseract::crunch_small_outlines_size
double crunch_small_outlines_size
Definition: tesseractclass.h:988

docqual.h

tesseract::Tesseract::tessedit_consistent_reps
bool tessedit_consistent_reps
Definition: tesseractclass.h:1063

tesseract::Tesseract::tessedit_tess_adaption_mode
int tessedit_tess_adaption_mode
Definition: tesseractclass.h:912

tesseract::Tesseract::blamer_pass
void blamer_pass(PAGE_RES *page_res)
Definition: control.cpp:716

devanagari_processing.h

tesseract::Tesseract::recog_interactive
bool recog_interactive(PAGE_RES_IT *pr_it)
Definition: control.cpp:82

tesseract::Tesseract::recog_training_segmented
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
Definition: recogtraining.cpp:82

tesseract::Tesseract::rej_use_tess_blanks
bool rej_use_tess_blanks
Definition: tesseractclass.h:1075

publictypes.h

tesseract::Tesseract::rej_use_sensible_wd
bool rej_use_sensible_wd
Definition: tesseractclass.h:1077

ETEXT_DESC
Definition: ocrclass.h:119

tesseract::Tesseract::rej_whole_of_mostly_reject_word_fract
double rej_whole_of_mostly_reject_word_fract
Definition: tesseractclass.h:1079

tesseract::Tesseract::tessedit_create_boxfile
bool tessedit_create_boxfile
Definition: tesseractclass.h:1085

tesseract::Tesseract::textord_tabfind_aligned_gap_fraction
double textord_tabfind_aligned_gap_fraction
Definition: tesseractclass.h:1115

tesseract::Tesseract::tessedit_display_outwords
bool tessedit_display_outwords
Definition: tesseractclass.h:863

tesseract::Tesseract::multilang_debug_level
int multilang_debug_level
Definition: tesseractclass.h:922

tesseract::Tesseract::dictionary_correction_pass
void dictionary_correction_pass(PAGE_RES *page_res)
Definition: control.cpp:2117

tesseract::Tesseract::tess_segment_pass_n
void tess_segment_pass_n(int pass_n, WERD_RES *word)
Definition: tessbox.cpp:32

tesseract::Tesseract::AnyLSTMLang
bool AnyLSTMLang() const
Definition: tesseractclass.h:288

tesseract::Tesseract::flip_hyphens
void flip_hyphens(WERD_RES *word)
Definition: reject.cpp:621

tesseract::Tesseract::ReportFailedBox
void ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg)

tesseract::Tesseract::count_alphanums
int16_t count_alphanums(const WERD_CHOICE &word)
Definition: output.cpp:383

tesseract::Tesseract::tessedit_write_rep_codes
bool tessedit_write_rep_codes
Definition: tesseractclass.h:1036

tesseract::Tesseract::bidi_debug
int bidi_debug
Definition: tesseractclass.h:849

tesseract::Tesseract::crunch_del_rating
double crunch_del_rating
Definition: tesseractclass.h:980

tesseract::Tesseract::PrerecAllWordsPar
void PrerecAllWordsPar(const GenericVector< WordData > &words)
Definition: par_control.cpp:39

tesseract::PageSegMode
PageSegMode
Definition: publictypes.h:163

tesseract::Tesseract::PreenXHeights
void PreenXHeights(BLOCK_LIST *block_list)

tesseract::TesseractStats::doc_outline_errs
int16_t doc_outline_errs
Definition: tesseractclass.h:132

tesseract::Tesseract::fixsp_non_noise_limit
int fixsp_non_noise_limit
Definition: tesseractclass.h:1003

tesseract::Tesseract::crunch_terrible_garbage
bool crunch_terrible_garbage
Definition: tesseractclass.h:973

tesseract::Tesseract::num_sub_langs
int num_sub_langs() const
Definition: tesseractclass.h:273

tesseract::Tesseract::break_noisiest_blob_word
void break_noisiest_blob_word(WERD_RES_LIST &words)
Definition: fixspace.cpp:645

tesseract::Tesseract::poly_allow_detailed_fx
bool poly_allow_detailed_fx
Definition: tesseractclass.h:1103

tesseract::Tesseract::AssignDiacriticsToOverlappingBlobs
void AssignDiacriticsToOverlappingBlobs(const GenericVector< C_OUTLINE *> &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< bool > *overlapped_any_blob, GenericVector< C_BLOB *> *target_blobs)
Definition: control.cpp:1019

tesseract::TesseractStats::tilde_crunch_written
bool tilde_crunch_written
Definition: tesseractclass.h:140

textord.h

tesseract::Tesseract::unrej_good_chs
void unrej_good_chs(WERD_RES *word, ROW *row)
Definition: docqual.cpp:116

tesseract::Tesseract::SetupPageSegAndDetectOrientation
ColumnFinder * SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix)
Definition: pagesegmain.cpp:271

tesseract::Tesseract::tessedit_test_adaption_mode
int tessedit_test_adaption_mode
Definition: tesseractclass.h:918

tesseract::Tesseract::x_ht_acceptance_tolerance
int x_ht_acceptance_tolerance
Definition: tesseractclass.h:1011

tesseract::Tesseract::reject_I_1_L
void reject_I_1_L(WERD_RES *word)
Definition: reject.cpp:198

tesseract::Tesseract::fp_eval_word_spacing
int16_t fp_eval_word_spacing(WERD_RES_LIST &word_res_list)
Definition: fixspace.cpp:860

tesseract::Tesseract::PrepareForPageseg
void PrepareForPageseg()
Definition: tesseractclass.cpp:624

tesseract::Tesseract::output_pass
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box)
Definition: output.cpp:43

tesseract::Tesseract::TrainedXheightFix
bool TrainedXheightFix(WERD_RES *word, BLOCK *block, ROW *row)
Definition: control.cpp:1504

tesseract::Tesseract::crunch_pot_poor_rate
double crunch_pot_poor_rate
Definition: tesseractclass.h:977

tesseract::Tesseract::ResegmentCharBox
bool ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box, const TBOX *next_box, const char *correct_text)

tesseract::Tesseract::crunch_pot_poor_cert
double crunch_pot_poor_cert
Definition: tesseractclass.h:978

tesseract::Tesseract::debug_word
void debug_word(PAGE_RES *page_res, const TBOX &selection_box)
Definition: pgedit.cpp:637

tesseract::Tesseract::classify_word_and_language
void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data)
Definition: control.cpp:1338