Typedefs | |
typedef int(Dict::* | DictFunc )(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const |
typedef double(Dict::* | ProbabilityInContextFunc )(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
typedef float(Dict::* | ParamsModelClassifyFunc )(const char *lang, void *path) |
typedef void(Wordrec::* | FillLatticeFunc )(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
typedef TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > | TruthCallback |
typedef GenericVectorEqEq < const ParagraphModel * > | SetOfModels |
typedef void(Tesseract::* | WordRecognizer )(const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) |
typedef GenericVector < ParamsTrainingHypothesis > | ParamsTrainingHypothesisList |
typedef GenericVector< UNICHAR_ID > | UnicharIdVector |
typedef GenericVector < AmbigSpec_LIST * > | UnicharAmbigsVector |
typedef bool(* | FileReader )(const STRING &filename, GenericVector< char > *data) |
typedef bool(* | FileWriter )(const GenericVector< char > &data, const STRING &filename) |
typedef KDPairInc< int, int > | IntKDPair |
typedef GenericHeap < ShapeQueueEntry > | ShapeQueue |
typedef signed int | char_32 |
typedef basic_string< char_32 > | string_32 |
typedef GenericVector< NodeChild > | NodeChildVector |
typedef GenericVector< int > | SuccessorList |
typedef GenericVector < SuccessorList * > | SuccessorListsVector |
typedef GenericVector< Dawg * > | DawgVector |
typedef GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > | BlobGridSearch |
typedef GridSearch < ColPartition, ColPartition_CLIST, ColPartition_C_IT > | ColPartitionGridSearch |
typedef GenericVector < ColPartitionSet * > | PartSetVector |
typedef TessResultCallback1 < bool, int > | WidthCallback |
typedef BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT > | ColSegmentGrid |
typedef GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT > | ColSegmentGridSearch |
typedef BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT > | WordGrid |
typedef GridSearch < WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT > | WordSearch |
typedef hash_map< string, string, StringHash > | LigHash |
typedef GenericHeap < MatrixCoordPair > | PainPointHeap |
typedef unsigned char | LanguageModelFlagsType |
Used for expressing various language model flags. More... | |
Functions | |
int | CubeAPITest (Boxa *boxa_blocks, Pixa *pixa_blocks, Boxa *boxa_words, Pixa *pixa_words, const FCOORD &reskew, Pix *page_pix, PAGE_RES *page_res) |
TBLOB * | make_tesseract_blob (float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix) |
STRING | HOcrEscape (const char *text) |
double | prec (double x) |
long | dist2 (int x1, int y1, int x2, int y2) |
void | GetWordBaseline (int writing_direction, int ppi, int height, int word_x1, int word_y1, int word_x2, int word_y2, int line_x1, int line_y1, int line_x2, int line_y2, double *x0, double *y0, double *length) |
void | AffineMatrix (int writing_direction, int line_x1, int line_y1, int line_x2, int line_y2, double *a, double *b, double *c, double *d) |
void | ClipBaseline (int ppi, int x1, int y1, int x2, int y2, int *line_x1, int *line_y1, int *line_x2, int *line_y2) |
bool | IsTextOrEquationType (PolyBlockType type) |
bool | IsLeftIndented (const EquationDetect::IndentType type) |
bool | IsRightIndented (const EquationDetect::IndentType type) |
STRING | RtlEmbed (const STRING &word, bool rtlify) |
bool | IsLatinLetter (int ch) |
bool | IsDigitLike (int ch) |
bool | IsOpeningPunct (int ch) |
bool | IsTerminalPunct (int ch) |
const char * | SkipChars (const char *str, const char *toskip) |
const char * | SkipChars (const char *str, bool(*skip)(int)) |
const char * | SkipOne (const char *str, const char *toskip) |
bool | LikelyListNumeral (const STRING &word) |
bool | LikelyListMark (const STRING &word) |
bool | AsciiLikelyListItem (const STRING &word) |
int | UnicodeFor (const UNICHARSET *u, const WERD_CHOICE *werd, int pos) |
bool | LikelyListMarkUnicode (int ch) |
bool | UniLikelyListItem (const UNICHARSET *u, const WERD_CHOICE *werd) |
void | LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea) |
void | RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea) |
int | ClosestCluster (const GenericVector< Cluster > &clusters, int value) |
void | CalculateTabStops (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, int tolerance, GenericVector< Cluster > *left_tabs, GenericVector< Cluster > *right_tabs) |
void | MarkRowsWithModel (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, const ParagraphModel *model, bool ltr, int eop_threshold) |
void | GeometricClassifyThreeTabStopTextBlock (int debug_level, GeometricClassifierState &s, ParagraphTheory *theory) |
void | GeometricClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
bool | ValidFirstLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model) |
bool | ValidBodyLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model) |
bool | CrownCompatible (const GenericVector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model) |
void | DiscardUnusedModels (const GenericVector< RowScratchRegisters > &rows, ParagraphTheory *theory) |
void | DowngradeWeakestToCrowns (int debug_level, ParagraphTheory *theory, GenericVector< RowScratchRegisters > *rows) |
void | RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile) |
int | InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end) |
bool | FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification) |
bool | FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after) |
bool | TextSupportsBreak (const RowScratchRegisters &before, const RowScratchRegisters &after) |
bool | LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after) |
bool | LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification j) |
ParagraphModel | InternalParagraphModelByOutline (const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance, bool *consistent) |
ParagraphModel | ParagraphModelByOutline (int debug_level, const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance) |
bool | RowsFitModel (const GenericVector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model) |
void | MarkStrongEvidence (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end) |
void | ModelStrongEvidence (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, bool allow_flush_models, ParagraphTheory *theory) |
void | StrongEvidenceClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
void | SeparateSimpleLeaderLines (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
void | ConvertHypothesizedModelRunsToParagraphs (int debug_level, const GenericVector< RowScratchRegisters > &rows, GenericVector< PARA * > *row_owners, ParagraphTheory *theory) |
bool | RowIsStranded (const GenericVector< RowScratchRegisters > &rows, int row) |
void | LeftoverSegments (const GenericVector< RowScratchRegisters > &rows, GenericVector< Interval > *to_fix, int row_start, int row_end) |
void | CanonicalizeDetectionResults (GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs) |
void | DetectParagraphs (int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models) |
void | InitializeTextAndBoxesPreRecognition (const MutableIterator &it, RowInfo *info) |
void | InitializeRowInfo (bool after_recognition, const MutableIterator &it, RowInfo *info) |
void | DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator *block_start, GenericVector< ParagraphModel * > *models) |
bool | StrongModel (const ParagraphModel *model) |
bool | read_t (PAGE_RES_IT *page_res_it, TBOX *tbox) |
void | YOutlierPieces (WERD_RES *word, int rebuilt_blob_index, int super_y_bottom, int sub_y_top, ScriptPos *leading_pos, int *num_leading_outliers, ScriptPos *trailing_pos, int *num_trailing_outliers) |
bool | CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2) |
bool | CompareFontSet (const FontSet &fs1, const FontSet &fs2) |
void | FontInfoDeleteCallback (FontInfo f) |
void | FontSetDeleteCallback (FontSet fs) |
bool | read_info (FILE *f, FontInfo *fi, bool swap) |
bool | write_info (FILE *f, const FontInfo &fi) |
bool | read_spacing_info (FILE *f, FontInfo *fi, bool swap) |
bool | write_spacing_info (FILE *f, const FontInfo &fi) |
bool | read_set (FILE *f, FontSet *fs, bool swap) |
bool | write_set (FILE *f, const FontSet &fs) |
int | OtsuThreshold (Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values) |
void | HistogramRect (Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram) |
int | OtsuStats (const int *histogram, int *H_out, int *omega0_out) |
int | ParamsTrainingFeatureByName (const char *name) |
bool | PSM_OSD_ENABLED (int pageseg_mode) |
bool | PSM_ORIENTATION_ENABLED (int pageseg_mode) |
bool | PSM_COL_FIND_ENABLED (int pageseg_mode) |
bool | PSM_SPARSE (int pageseg_mode) |
bool | PSM_BLOCK_FIND_ENABLED (int pageseg_mode) |
bool | PSM_LINE_FIND_ENABLED (int pageseg_mode) |
bool | PSM_WORD_FIND_ENABLED (int pageseg_mode) |
const char * | ScriptPosToString (enum ScriptPos script_pos) |
ELISTIZE (AmbigSpec) | |
ELISTIZEH (AmbigSpec) | |
bool | LoadDataFromFile (const STRING &filename, GenericVector< char > *data) |
bool | SaveDataToFile (const GenericVector< char > &data, const STRING &filename) |
template<typename T > | |
bool | cmp_eq (T const &t1, T const &t2) |
template<typename T > | |
int | sort_cmp (const void *t1, const void *t2) |
template<typename T > | |
int | sort_ptr_cmp (const void *t1, const void *t2) |
void | ExtractFontName (const STRING &filename, STRING *fontname) |
TrainingSample * | BlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features) |
uinT8 | NormalizeDirection (uinT8 dir, const FCOORD &unnormed_pos, const DENORM &denorm, const DENORM *root_denorm) |
void | ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window) |
void | CallWithUTF8 (TessCallback1< const char * > *cb, const WERD_CHOICE *wc) |
Pix * | GridReducedPix (const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) |
Pix * | TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom) |
Pix * | TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) |
template<class BBC > | |
int | SortByBoxLeft (const void *void1, const void *void2) |
template<class BBC > | |
int | SortRightToLeft (const void *void1, const void *void2) |
template<class BBC > | |
int | SortByBoxBottom (const void *void1, const void *void2) |
template<typename T > | |
void | DeleteObject (T *object) |
void | SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob) |
void | assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks) |
void | ParseCommandLineFlags (const char *usage, int *argc, char ***argv, const bool remove_flags) |
ShapeTable * | LoadShapeTable (const STRING &file_prefix) |
void | WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table) |
MasterTrainer * | LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix) |
Pix * | DegradeImage (Pix *input, int exposure, TRand *randomizer, float *rotation) |
void | UTF8ToUTF32 (const char *utf8_str, GenericVector< char32 > *str32) |
void | UTF32ToUTF8 (const GenericVector< char32 > &str32, STRING *utf8_str) |
bool | is_hyphen_punc (const char32 ch) |
bool | is_single_quote (const char32 ch) |
bool | is_double_quote (const char32 ch) |
STRING | NormalizeUTF8String (const char *str8) |
void | NormalizeChar32 (char32 ch, GenericVector< char32 > *str) |
char32 | OCRNormalize (char32 ch) |
bool | IsOCREquivalent (char32 ch1, char32 ch2) |
bool | IsValidCodepoint (const char32 ch) |
bool | IsWhitespace (const char32 ch) |
bool | IsUTF8Whitespace (const char *text) |
int | SpanUTF8Whitespace (const char *text) |
int | SpanUTF8NotWhitespace (const char *text) |
bool | IsInterchangeValid (const char32 ch) |
bool | IsInterchangeValid7BitAscii (const char32 ch) |
char32 | FullwidthToHalfwidth (const char32 ch) |
Pix * | CairoARGB32ToPixFormat (cairo_surface_t *surface) |
void | ExtractFontProperties (const string &utf8_text, StringRenderer *render, const string &output_base) |
bool | MakeIndividualGlyphs (Pix *pix, const vector< BoxChar * > &vbox, const int input_tiff_page) |
void | SetupBasicProperties (bool report_errors, UNICHARSET *unicharset) |
void | SetPropertiesForInputFile (const string &script_dir, const string &input_unicharset_file, const string &output_unicharset_file, const string &output_xheights_file) |
ELISTIZE (ViterbiStateEntry) | |
ELISTIZEH (ViterbiStateEntry) | |
template<class BLOB_CHOICE > | |
int | SortByUnicharID (const void *void1, const void *void2) |
template<class BLOB_CHOICE > | |
int | SortByRating (const void *void1, const void *void2) |
convert_prob_to_tess_certainty | |
Normalize a probability in the range [0.0, 1.0] to a tesseract certainty in the range [-20.0, 0.0] | |
char_box_to_tbox | |
Create a TBOX from a character bounding box. If nonzero, the x_offset accounts for any additional padding of the word box that should be taken into account. | |
TBOX | char_box_to_tbox (Box *char_box, TBOX word_box, int x_offset) |
The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:
* <UTF8 str> <left> <bottom> <right> <top> <page id> *
and for word/line-level boxes:
* WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str> *
NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.
<page id>=""> is 0-based, and the page number is used for multipage input (tiff).
In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)
In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.
* word -> #w o r d * multi word line -> #m u l t i w o r d l i n e *
The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.
In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.
Make a word from the selected blobs and run Tess on them.
page_res | recognise blobs |
selection_box | within this box |
fp_eval_word_spacing() Evaluation function for fixed pitch word lists.
Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars
build_menu()
Construct the menu tree used by the command window
process_cmd_win_event()
Process a command returned from the command window (Just call the appropriate command handler)
word_blank_and_set_display() Word processor
Blank display of word then redisplay word according to current display mode settings
typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> tesseract::BlobGridSearch |
Definition at line 31 of file blobgrid.h.
typedef signed int tesseract::char_32 |
Definition at line 40 of file string_32.h.
typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> tesseract::ColPartitionGridSearch |
Definition at line 913 of file colpartition.h.
typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT> tesseract::ColSegmentGrid |
Definition at line 118 of file tablefind.h.
typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> tesseract::ColSegmentGridSearch |
Definition at line 121 of file tablefind.h.
typedef GenericVector<Dawg *> tesseract::DawgVector |
typedef int(Dict::* tesseract::DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const |
typedef bool(* tesseract::FileReader)(const STRING &filename, GenericVector< char > *data) |
Definition at line 349 of file genericvector.h.
typedef bool(* tesseract::FileWriter)(const GenericVector< char > &data, const STRING &filename) |
Definition at line 352 of file genericvector.h.
typedef void(Wordrec::* tesseract::FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
typedef KDPairInc<int, int> tesseract::IntKDPair |
typedef unsigned char tesseract::LanguageModelFlagsType |
Used for expressing various language model flags.
Definition at line 37 of file lm_state.h.
typedef hash_map<string, string, StringHash> tesseract::LigHash |
Definition at line 32 of file ligature_table.h.
Definition at line 34 of file lm_pain_points.h.
typedef float(Dict::* tesseract::ParamsModelClassifyFunc)(const char *lang, void *path) |
Definition at line 122 of file params_training_featdef.h.
Definition at line 33 of file colpartitionset.h.
typedef double(Dict::* tesseract::ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
typedef GenericVectorEqEq<const ParagraphModel *> tesseract::SetOfModels |
Definition at line 94 of file paragraphs_internal.h.
Definition at line 156 of file shapetable.h.
typedef basic_string<char_32> tesseract::string_32 |
Definition at line 41 of file string_32.h.
typedef GenericVector<int> tesseract::SuccessorList |
typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *> tesseract::TruthCallback |
typedef GenericVector<AmbigSpec_LIST *> tesseract::UnicharAmbigsVector |
typedef TessResultCallback1<bool, int> tesseract::WidthCallback |
typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT> tesseract::WordGrid |
typedef void(Tesseract::* tesseract::WordRecognizer)(const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) |
Definition at line 166 of file tesseractclass.h.
typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT> tesseract::WordSearch |
enum tesseract::AmbigType |
Enumerator | |
---|---|
NOT_AMBIG | |
REPLACE_AMBIG | |
DEFINITE_AMBIG | |
SIMILAR_AMBIG | |
CASE_AMBIG | |
AMBIG_TYPE_COUNT |
Definition at line 44 of file ambigs.h.
Enumerator | |
---|---|
CST_FRAGMENT | |
CST_WHOLE | |
CST_IMPROPER | |
CST_NGRAM |
Definition at line 54 of file classify.h.
Enumerator | |
---|---|
ACTION_1_CMD_EVENT | |
RECOG_WERDS | |
RECOG_PSEUDO | |
ACTION_2_CMD_EVENT |
Definition at line 477 of file tessedit.cpp.
Enumerator | |
---|---|
COL_UNKNOWN | |
COL_TEXT | |
COL_TABLE | |
COL_MIXED | |
COL_COUNT |
Definition at line 30 of file tablefind.h.
Enumerator | |
---|---|
CST_NOISE | |
CST_FLOWING | |
CST_HEADING | |
CST_PULLOUT | |
CST_COUNT |
Definition at line 47 of file colpartition.h.
Definition at line 69 of file errorcounter.h.
enum tesseract::DawgType |
Definition at line 39 of file params_training_featdef.h.
Enumerator | |
---|---|
LR_LEFT | |
LR_RIGHT |
Definition at line 39 of file strokewidth.h.
enum tesseract::LineType |
Enumerator | |
---|---|
LT_START | |
LT_BODY | |
LT_UNKNOWN | |
LT_MULTIPLE |
Definition at line 54 of file paragraphs_internal.h.
Enumerator | |
---|---|
LM_PPTYPE_BLAMER | |
LM_PPTYPE_AMBIG | |
LM_PPTYPE_PATH | |
LM_PPTYPE_SHAPE | |
LM_PPTYPE_NUM |
Definition at line 37 of file lm_pain_points.h.
Enumerator | |
---|---|
NPT_HTEXT | |
NPT_VTEXT | |
NPT_WEAK_HTEXT | |
NPT_WEAK_VTEXT | |
NPT_IMAGE | |
NPT_COUNT |
Definition at line 1558 of file colpartitiongrid.cpp.
Enumerator | |
---|---|
NM_BASELINE | |
NM_CHAR_ISOTROPIC | |
NM_CHAR_ANISOTROPIC |
Definition at line 44 of file normalis.h.
When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.
ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.
Enumerator | |
---|---|
OEM_TESSERACT_ONLY | |
OEM_CUBE_ONLY | |
OEM_TESSERACT_CUBE_COMBINED | |
OEM_DEFAULT |
Definition at line 256 of file publictypes.h.
+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto
If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.
In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).
The values of this enum match the convention of Tesseract's osdetect.h
Enumerator | |
---|---|
ORIENTATION_PAGE_UP | |
ORIENTATION_PAGE_RIGHT | |
ORIENTATION_PAGE_DOWN | |
ORIENTATION_PAGE_LEFT |
Definition at line 108 of file publictypes.h.
enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.
Enumerator | |
---|---|
RIL_BLOCK | |
RIL_PARA | |
RIL_TEXTLINE | |
RIL_WORD | |
RIL_SYMBOL |
Definition at line 207 of file publictypes.h.
Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.
Definition at line 151 of file publictypes.h.
JUSTIFICATION_UNKNONW The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.
NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.
Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.
JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.
JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.
JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.
Enumerator | |
---|---|
JUSTIFICATION_UNKNOWN | |
JUSTIFICATION_LEFT | |
JUSTIFICATION_CENTER | |
JUSTIFICATION_RIGHT |
Definition at line 239 of file publictypes.h.
Enumerator | |
---|---|
PFR_OK | |
PFR_SKEW | |
PFR_NOISE |
Definition at line 46 of file strokewidth.h.
enum tesseract::ScriptPos |
Enumerator | |
---|---|
SP_NORMAL | |
SP_SUBSCRIPT | |
SP_SUPERSCRIPT | |
SP_DROPCAP |
Enumerator | |
---|---|
SET_PARAM_CONSTRAINT_NONE | |
SET_PARAM_CONSTRAINT_DEBUG_ONLY | |
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY | |
SET_PARAM_CONSTRAINT_NON_INIT_ONLY |
Definition at line 36 of file params.h.
Enumerator | |
---|---|
TA_LEFT_ALIGNED | |
TA_LEFT_RAGGED | |
TA_CENTER_JUSTIFIED | |
TA_RIGHT_ALIGNED | |
TA_RIGHT_RAGGED | |
TA_SEPARATOR | |
TA_COUNT |
Definition at line 43 of file tabvector.h.
Definition at line 53 of file tessdatamanager.h.
The text lines are read in the given sequence.
In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.
Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
Enumerator | |
---|---|
TEXTLINE_ORDER_LEFT_TO_RIGHT | |
TEXTLINE_ORDER_RIGHT_TO_LEFT | |
TEXTLINE_ORDER_TOP_TO_BOTTOM |
Definition at line 140 of file publictypes.h.
The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".
For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.
Enumerator | |
---|---|
WRITING_DIRECTION_LEFT_TO_RIGHT | |
WRITING_DIRECTION_RIGHT_TO_LEFT | |
WRITING_DIRECTION_TOP_TO_BOTTOM |
Definition at line 123 of file publictypes.h.
void tesseract::AffineMatrix | ( | int | writing_direction, |
int | line_x1, | ||
int | line_y1, | ||
int | line_x2, | ||
int | line_y2, | ||
double * | a, | ||
double * | b, | ||
double * | c, | ||
double * | d | ||
) |
Definition at line 246 of file pdfrenderer.cpp.
bool tesseract::AsciiLikelyListItem | ( | const STRING & | word | ) |
Definition at line 267 of file paragraphs.cpp.
void tesseract::assign_blobs_to_blocks2 | ( | Pix * | pix, |
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | port_blocks | ||
) |
Definition at line 157 of file tordmain.cpp.
TrainingSample * tesseract::BlobToTrainingSample | ( | const TBLOB & | blob, |
bool | nonlinear_norm, | ||
INT_FX_RESULT_STRUCT * | fx_info, | ||
GenericVector< INT_FEATURE_STRUCT > * | bl_features | ||
) |
Definition at line 81 of file intfx.cpp.
Pix* tesseract::CairoARGB32ToPixFormat | ( | cairo_surface_t * | surface | ) |
Definition at line 78 of file stringrenderer.cpp.
void tesseract::CalculateTabStops | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end, | ||
int | tolerance, | ||
GenericVector< Cluster > * | left_tabs, | ||
GenericVector< Cluster > * | right_tabs | ||
) |
Definition at line 691 of file paragraphs.cpp.
void tesseract::CallWithUTF8 | ( | TessCallback1< const char * > * | cb, |
const WERD_CHOICE * | wc | ||
) |
Definition at line 112 of file dawg.cpp.
void tesseract::CanonicalizeDetectionResults | ( | GenericVector< PARA * > * | row_owners, |
PARA_LIST * | paragraphs | ||
) |
Definition at line 2232 of file paragraphs.cpp.
Definition at line 42 of file cube_control.cpp.
void tesseract::ClearFeatureSpaceWindow | ( | NORM_METHOD | norm_method, |
ScrollView * | window | ||
) |
Clears the given window and draws the featurespace guides for the appropriate normalization method.
Definition at line 1104 of file intproto.cpp.
void tesseract::ClipBaseline | ( | int | ppi, |
int | x1, | ||
int | y1, | ||
int | x2, | ||
int | y2, | ||
int * | line_x1, | ||
int * | line_y1, | ||
int * | line_x2, | ||
int * | line_y2 | ||
) |
Definition at line 275 of file pdfrenderer.cpp.
int tesseract::ClosestCluster | ( | const GenericVector< Cluster > & | clusters, |
int | value | ||
) |
Definition at line 665 of file paragraphs.cpp.
bool tesseract::cmp_eq | ( | T const & | t1, |
T const & | t2 | ||
) |
Definition at line 382 of file genericvector.h.
bool tesseract::CompareFontInfo | ( | const FontInfo & | fi1, |
const FontInfo & | fi2 | ||
) |
Definition at line 120 of file fontinfo.cpp.
bool tesseract::CompareFontSet | ( | const FontSet & | fs1, |
const FontSet & | fs2 | ||
) |
Definition at line 128 of file fontinfo.cpp.
void tesseract::ConvertHypothesizedModelRunsToParagraphs | ( | int | debug_level, |
const GenericVector< RowScratchRegisters > & | rows, | ||
GenericVector< PARA * > * | row_owners, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 2041 of file paragraphs.cpp.
bool tesseract::CrownCompatible | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | a, | ||
int | b, | ||
const ParagraphModel * | model | ||
) |
Definition at line 1288 of file paragraphs.cpp.
int tesseract::CubeAPITest | ( | Boxa * | boxa_blocks, |
Pixa * | pixa_blocks, | ||
Boxa * | boxa_words, | ||
Pixa * | pixa_words, | ||
const FCOORD & | reskew, | ||
Pix * | page_pix, | ||
PAGE_RES * | page_res | ||
) |
Placeholder for call to Cube and test that the input data is correct. reskew is the direction of baselines in the skewed image in normalized (cos theta, sin theta) form, so (0.866, 0.5) would represent a 30 degree anticlockwise skew.
Definition at line 757 of file baseapi.cpp.
struct Pix * tesseract::DegradeImage | ( | Pix * | input, |
int | exposure, | ||
TRand * | randomizer, | ||
float * | rotation | ||
) |
Definition at line 65 of file degradeimage.cpp.
void tesseract::DeleteObject | ( | T * | object | ) |
Definition at line 165 of file tablefind.cpp.
void tesseract::DetectParagraphs | ( | int | debug_level, |
GenericVector< RowInfo > * | row_infos, | ||
GenericVector< PARA * > * | row_owners, | ||
PARA_LIST * | paragraphs, | ||
GenericVector< ParagraphModel * > * | models | ||
) |
Definition at line 2264 of file paragraphs.cpp.
void tesseract::DetectParagraphs | ( | int | debug_level, |
bool | after_text_recognition, | ||
const MutableIterator * | block_start, | ||
GenericVector< ParagraphModel * > * | models | ||
) |
Definition at line 2509 of file paragraphs.cpp.
void tesseract::DiscardUnusedModels | ( | const GenericVector< RowScratchRegisters > & | rows, |
ParagraphTheory * | theory | ||
) |
Definition at line 1455 of file paragraphs.cpp.
long tesseract::dist2 | ( | int | x1, |
int | y1, | ||
int | x2, | ||
int | y2 | ||
) |
Definition at line 192 of file pdfrenderer.cpp.
void tesseract::DowngradeWeakestToCrowns | ( | int | debug_level, |
ParagraphTheory * | theory, | ||
GenericVector< RowScratchRegisters > * | rows | ||
) |
Definition at line 1488 of file paragraphs.cpp.
tesseract::ELISTIZE | ( | ViterbiStateEntry | ) |
tesseract::ELISTIZE | ( | AmbigSpec | ) |
tesseract::ELISTIZEH | ( | AmbigSpec | ) |
tesseract::ELISTIZEH | ( | ViterbiStateEntry | ) |
void tesseract::ExtractFontProperties | ( | const string & | utf8_text, |
StringRenderer * | render, | ||
const string & | output_base | ||
) |
Definition at line 212 of file text2image.cpp.
bool tesseract::FirstWordWouldHaveFit | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after, | ||
tesseract::ParagraphJustification | justification | ||
) |
Definition at line 1621 of file paragraphs.cpp.
bool tesseract::FirstWordWouldHaveFit | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after | ||
) |
Definition at line 1646 of file paragraphs.cpp.
void tesseract::FontInfoDeleteCallback | ( | FontInfo | f | ) |
Definition at line 139 of file fontinfo.cpp.
void tesseract::FontSetDeleteCallback | ( | FontSet | fs | ) |
Definition at line 146 of file fontinfo.cpp.
Definition at line 239 of file normstrngs.cpp.
void tesseract::GeometricClassify | ( | int | debug_level, |
GenericVector< RowScratchRegisters > * | rows, | ||
int | row_start, | ||
int | row_end, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1077 of file paragraphs.cpp.
void tesseract::GeometricClassifyThreeTabStopTextBlock | ( | int | debug_level, |
GeometricClassifierState & | s, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 985 of file paragraphs.cpp.
void tesseract::GetWordBaseline | ( | int | writing_direction, |
int | ppi, | ||
int | height, | ||
int | word_x1, | ||
int | word_y1, | ||
int | word_x2, | ||
int | word_y2, | ||
int | line_x1, | ||
int | line_y1, | ||
int | line_x2, | ||
int | line_y2, | ||
double * | x0, | ||
double * | y0, | ||
double * | length | ||
) |
Definition at line 204 of file pdfrenderer.cpp.
Pix* tesseract::GridReducedPix | ( | const TBOX & | box, |
int | gridsize, | ||
ICOORD | bleft, | ||
int * | left, | ||
int * | bottom | ||
) |
Definition at line 212 of file bbgrid.cpp.
void tesseract::HistogramRect | ( | Pix * | src_pix, |
int | channel, | ||
int | left, | ||
int | top, | ||
int | width, | ||
int | height, | ||
int * | histogram | ||
) |
Definition at line 157 of file otsuthr.cpp.
STRING tesseract::HOcrEscape | ( | const char * | text | ) |
Escape a char string - remove <>&"' with HTML codes.
Escape a char string - remove &<>"' with HTML codes.
Definition at line 2644 of file baseapi.cpp.
void tesseract::InitializeRowInfo | ( | bool | after_recognition, |
const MutableIterator & | it, | ||
RowInfo * | info | ||
) |
Definition at line 2411 of file paragraphs.cpp.
void tesseract::InitializeTextAndBoxesPreRecognition | ( | const MutableIterator & | it, |
RowInfo * | info | ||
) |
Definition at line 2359 of file paragraphs.cpp.
ParagraphModel tesseract::InternalParagraphModelByOutline | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | start, | ||
int | end, | ||
int | tolerance, | ||
bool * | consistent | ||
) |
Definition at line 1692 of file paragraphs.cpp.
int tesseract::InterwordSpace | ( | const GenericVector< RowScratchRegisters > & | rows, |
int | row_start, | ||
int | row_end | ||
) |
Definition at line 1598 of file paragraphs.cpp.
bool tesseract::is_double_quote | ( | const char32 | ch | ) |
Definition at line 97 of file normstrngs.cpp.
bool tesseract::is_hyphen_punc | ( | const char32 | ch | ) |
Definition at line 58 of file normstrngs.cpp.
bool tesseract::is_single_quote | ( | const char32 | ch | ) |
Definition at line 77 of file normstrngs.cpp.
bool tesseract::IsDigitLike | ( | int | ch | ) |
Definition at line 197 of file paragraphs.cpp.
bool tesseract::IsInterchangeValid | ( | const char32 | ch | ) |
Definition at line 208 of file normstrngs.cpp.
bool tesseract::IsInterchangeValid7BitAscii | ( | const char32 | ch | ) |
Definition at line 232 of file normstrngs.cpp.
bool tesseract::IsLatinLetter | ( | int | ch | ) |
Definition at line 193 of file paragraphs.cpp.
|
inline |
Definition at line 95 of file equationdetect.cpp.
Definition at line 166 of file normstrngs.cpp.
bool tesseract::IsOpeningPunct | ( | int | ch | ) |
Definition at line 201 of file paragraphs.cpp.
|
inline |
Definition at line 100 of file equationdetect.cpp.
bool tesseract::IsTerminalPunct | ( | int | ch | ) |
Definition at line 205 of file paragraphs.cpp.
|
inline |
Definition at line 91 of file equationdetect.cpp.
bool tesseract::IsUTF8Whitespace | ( | const char * | text | ) |
Definition at line 182 of file normstrngs.cpp.
bool tesseract::IsValidCodepoint | ( | const char32 | ch | ) |
Definition at line 170 of file normstrngs.cpp.
bool tesseract::IsWhitespace | ( | const char32 | ch | ) |
Definition at line 176 of file normstrngs.cpp.
void tesseract::LeftoverSegments | ( | const GenericVector< RowScratchRegisters > & | rows, |
GenericVector< Interval > * | to_fix, | ||
int | row_start, | ||
int | row_end | ||
) |
Definition at line 2181 of file paragraphs.cpp.
void tesseract::LeftWordAttributes | ( | const UNICHARSET * | unicharset, |
const WERD_CHOICE * | werd, | ||
const STRING & | utf8, | ||
bool * | is_list, | ||
bool * | starts_idea, | ||
bool * | ends_idea | ||
) |
Definition at line 394 of file paragraphs.cpp.
bool tesseract::LikelyListMark | ( | const STRING & | word | ) |
Definition at line 262 of file paragraphs.cpp.
bool tesseract::LikelyListMarkUnicode | ( | int | ch | ) |
Definition at line 328 of file paragraphs.cpp.
bool tesseract::LikelyListNumeral | ( | const STRING & | word | ) |
Definition at line 228 of file paragraphs.cpp.
bool tesseract::LikelyParagraphStart | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after | ||
) |
Definition at line 1672 of file paragraphs.cpp.
bool tesseract::LikelyParagraphStart | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after, | ||
tesseract::ParagraphJustification | j | ||
) |
Definition at line 1679 of file paragraphs.cpp.
|
inline |
Definition at line 356 of file genericvector.h.
ShapeTable * tesseract::LoadShapeTable | ( | const STRING & | file_prefix | ) |
Definition at line 118 of file commontraining.cpp.
MasterTrainer * tesseract::LoadTrainingData | ( | int | argc, |
const char *const * | argv, | ||
bool | replication, | ||
ShapeTable ** | shape_table, | ||
STRING * | file_prefix | ||
) |
Creates a MasterTraininer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != NULL. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:
Definition at line 175 of file commontraining.cpp.
TBLOB* tesseract::make_tesseract_blob | ( | float | baseline, |
float | xheight, | ||
float | descender, | ||
float | ascender, | ||
bool | numeric_mode, | ||
Pix * | pix | ||
) |
Return a TBLOB * from the whole pix. To be freed later with delete.
Definition at line 2338 of file baseapi.cpp.
bool tesseract::MakeIndividualGlyphs | ( | Pix * | pix, |
const vector< BoxChar * > & | vbox, | ||
const int | input_tiff_page | ||
) |
Definition at line 309 of file text2image.cpp.
void tesseract::MarkRowsWithModel | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end, | ||
const ParagraphModel * | model, | ||
bool | ltr, | ||
int | eop_threshold | ||
) |
Definition at line 807 of file paragraphs.cpp.
void tesseract::MarkStrongEvidence | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end | ||
) |
Definition at line 1830 of file paragraphs.cpp.
void tesseract::ModelStrongEvidence | ( | int | debug_level, |
GenericVector< RowScratchRegisters > * | rows, | ||
int | row_start, | ||
int | row_end, | ||
bool | allow_flush_models, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1900 of file paragraphs.cpp.
void tesseract::NormalizeChar32 | ( | char32 | ch, |
GenericVector< char32 > * | str | ||
) |
Definition at line 131 of file normstrngs.cpp.
uinT8 tesseract::NormalizeDirection | ( | uinT8 | dir, |
const FCOORD & | unnormed_pos, | ||
const DENORM & | denorm, | ||
const DENORM * | root_denorm | ||
) |
Definition at line 171 of file intfx.cpp.
STRING tesseract::NormalizeUTF8String | ( | const char * | str8 | ) |
Definition at line 116 of file normstrngs.cpp.
Definition at line 156 of file normstrngs.cpp.
int tesseract::OtsuStats | ( | const int * | histogram, |
int * | H_out, | ||
int * | omega0_out | ||
) |
Definition at line 182 of file otsuthr.cpp.
int tesseract::OtsuThreshold | ( | Pix * | src_pix, |
int | left, | ||
int | top, | ||
int | width, | ||
int | height, | ||
int ** | thresholds, | ||
int ** | hi_values | ||
) |
Definition at line 39 of file otsuthr.cpp.
ParagraphModel tesseract::ParagraphModelByOutline | ( | int | debug_level, |
const GenericVector< RowScratchRegisters > * | rows, | ||
int | start, | ||
int | end, | ||
int | tolerance | ||
) |
Definition at line 1793 of file paragraphs.cpp.
int tesseract::ParamsTrainingFeatureByName | ( | const char * | name | ) |
Definition at line 26 of file params_training_featdef.cpp.
void tesseract::ParseCommandLineFlags | ( | const char * | usage, |
int * | argc, | ||
char *** | argv, | ||
const bool | remove_flags | ||
) |
Definition at line 312 of file commandlineflags.cpp.
double tesseract::prec | ( | double | x | ) |
Definition at line 184 of file pdfrenderer.cpp.
|
inline |
Definition at line 191 of file publictypes.h.
|
inline |
Definition at line 185 of file publictypes.h.
|
inline |
Definition at line 194 of file publictypes.h.
|
inline |
Definition at line 182 of file publictypes.h.
|
inline |
Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.
Definition at line 179 of file publictypes.h.
|
inline |
Definition at line 188 of file publictypes.h.
|
inline |
bool tesseract::read_info | ( | FILE * | f, |
FontInfo * | fi, | ||
bool | swap | ||
) |
Definition at line 152 of file fontinfo.cpp.
bool tesseract::read_set | ( | FILE * | f, |
FontSet * | fs, | ||
bool | swap | ||
) |
Definition at line 240 of file fontinfo.cpp.
bool tesseract::read_spacing_info | ( | FILE * | f, |
FontInfo * | fi, | ||
bool | swap | ||
) |
Definition at line 177 of file fontinfo.cpp.
bool tesseract::read_t | ( | PAGE_RES_IT * | page_res_it, |
TBOX * | tbox | ||
) |
Definition at line 53 of file recogtraining.cpp.
void tesseract::RecomputeMarginsAndClearHypotheses | ( | GenericVector< RowScratchRegisters > * | rows, |
int | start, | ||
int | end, | ||
int | percentile | ||
) |
Definition at line 1558 of file paragraphs.cpp.
void tesseract::RightWordAttributes | ( | const UNICHARSET * | unicharset, |
const WERD_CHOICE * | werd, | ||
const STRING & | utf8, | ||
bool * | is_list, | ||
bool * | starts_idea, | ||
bool * | ends_idea | ||
) |
Definition at line 441 of file paragraphs.cpp.
bool tesseract::RowIsStranded | ( | const GenericVector< RowScratchRegisters > & | rows, |
int | row | ||
) |
Definition at line 2139 of file paragraphs.cpp.
bool tesseract::RowsFitModel | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | start, | ||
int | end, | ||
const ParagraphModel * | model | ||
) |
Definition at line 1808 of file paragraphs.cpp.
Definition at line 121 of file paragraphs.cpp.
|
inline |
Definition at line 371 of file genericvector.h.
const char * tesseract::ScriptPosToString | ( | enum ScriptPos | script_pos | ) |
Definition at line 180 of file ratngs.cpp.
void tesseract::SeparateSimpleLeaderLines | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 2025 of file paragraphs.cpp.
void tesseract::SetBlobStrokeWidth | ( | Pix * | pix, |
BLOBNBOX * | blob | ||
) |
Definition at line 58 of file tordmain.cpp.
void tesseract::SetPropertiesForInputFile | ( | const string & | script_dir, |
const string & | input_unicharset_file, | ||
const string & | output_unicharset_file, | ||
const string & | output_xheights_file | ||
) |
Definition at line 148 of file unicharset_training_utils.cpp.
void tesseract::SetupBasicProperties | ( | bool | report_errors, |
UNICHARSET * | unicharset | ||
) |
Definition at line 40 of file unicharset_training_utils.cpp.
const char* tesseract::SkipChars | ( | const char * | str, |
const char * | toskip | ||
) |
Definition at line 210 of file paragraphs.cpp.
const char* tesseract::SkipChars | ( | const char * | str, |
bool(*)(int) | skip | ||
) |
Definition at line 215 of file paragraphs.cpp.
const char* tesseract::SkipOne | ( | const char * | str, |
const char * | toskip | ||
) |
Definition at line 220 of file paragraphs.cpp.
int tesseract::sort_cmp | ( | const void * | t1, |
const void * | t2 | ||
) |
Definition at line 391 of file genericvector.h.
int tesseract::sort_ptr_cmp | ( | const void * | t1, |
const void * | t2 | ||
) |
Definition at line 408 of file genericvector.h.
int tesseract::SortByBoxBottom | ( | const void * | void1, |
const void * | void2 | ||
) |
int tesseract::SortByBoxLeft | ( | const void * | void1, |
const void * | void2 | ||
) |
int tesseract::SortByRating | ( | const void * | void1, |
const void * | void2 | ||
) |
Definition at line 86 of file pieces.cpp.
int tesseract::SortByUnicharID | ( | const void * | void1, |
const void * | void2 | ||
) |
Definition at line 78 of file pieces.cpp.
int tesseract::SortRightToLeft | ( | const void * | void1, |
const void * | void2 | ||
) |
int tesseract::SpanUTF8NotWhitespace | ( | const char * | text | ) |
Definition at line 197 of file normstrngs.cpp.
int tesseract::SpanUTF8Whitespace | ( | const char * | text | ) |
Definition at line 186 of file normstrngs.cpp.
void tesseract::StrongEvidenceClassify | ( | int | debug_level, |
GenericVector< RowScratchRegisters > * | rows, | ||
int | row_start, | ||
int | row_end, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1995 of file paragraphs.cpp.
|
inline |
Definition at line 75 of file paragraphs_internal.h.
bool tesseract::TextSupportsBreak | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after | ||
) |
Definition at line 1661 of file paragraphs.cpp.
Pix * tesseract::TraceBlockOnReducedPix | ( | BLOCK * | block, |
int | gridsize, | ||
ICOORD | bleft, | ||
int * | left, | ||
int * | bottom | ||
) |
Definition at line 258 of file bbgrid.cpp.
Pix * tesseract::TraceOutlineOnReducedPix | ( | C_OUTLINE * | outline, |
int | gridsize, | ||
ICOORD | bleft, | ||
int * | left, | ||
int * | bottom | ||
) |
Definition at line 232 of file bbgrid.cpp.
int tesseract::UnicodeFor | ( | const UNICHARSET * | u, |
const WERD_CHOICE * | werd, | ||
int | pos | ||
) |
Definition at line 274 of file paragraphs.cpp.
bool tesseract::UniLikelyListItem | ( | const UNICHARSET * | u, |
const WERD_CHOICE * | werd | ||
) |
Definition at line 357 of file paragraphs.cpp.
void tesseract::UTF32ToUTF8 | ( | const GenericVector< char32 > & | str32, |
STRING * | utf8_str | ||
) |
Definition at line 45 of file normstrngs.cpp.
void tesseract::UTF8ToUTF32 | ( | const char * | utf8_str, |
GenericVector< char32 > * | str32 | ||
) |
Definition at line 31 of file normstrngs.cpp.
bool tesseract::ValidBodyLine | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | row, | ||
const ParagraphModel * | model | ||
) |
Definition at line 1277 of file paragraphs.cpp.
bool tesseract::ValidFirstLine | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | row, | ||
const ParagraphModel * | model | ||
) |
Definition at line 1266 of file paragraphs.cpp.
bool tesseract::write_info | ( | FILE * | f, |
const FontInfo & | fi | ||
) |
Definition at line 168 of file fontinfo.cpp.
bool tesseract::write_set | ( | FILE * | f, |
const FontSet & | fs | ||
) |
Definition at line 253 of file fontinfo.cpp.
bool tesseract::write_spacing_info | ( | FILE * | f, |
const FontInfo & | fi | ||
) |
Definition at line 211 of file fontinfo.cpp.
void tesseract::WriteShapeTable | ( | const STRING & | file_prefix, |
const ShapeTable & | shape_table | ||
) |
Definition at line 144 of file commontraining.cpp.
void tesseract::YOutlierPieces | ( | WERD_RES * | word, |
int | rebuilt_blob_index, | ||
int | super_y_bottom, | ||
int | sub_y_top, | ||
ScriptPos * | leading_pos, | ||
int * | num_leading_outliers, | ||
ScriptPos * | trailing_pos, | ||
int * | num_trailing_outliers | ||
) |
Given a recognized blob, see if a contiguous collection of sub-pieces (chopped blobs) starting at its left might qualify as being a subscript or superscript letter based only on y position. Also do this for the right side.
Definition at line 46 of file superscript.cpp.
const int tesseract::case_state_table[6][4] |
Definition at line 35 of file context.cpp.
const int tesseract::kAdjacentLeaderSearchPadding = 2 |
Definition at line 125 of file tablefind.cpp.
const double tesseract::kAlignedFraction = 0.03125 |
Definition at line 39 of file alignedblob.cpp.
const double tesseract::kAlignedGapFraction = 0.75 |
Definition at line 43 of file alignedblob.cpp.
const char* tesseract::kAlignmentNames[] |
Definition at line 515 of file tabvector.cpp.
const double tesseract::kAllowBlobArea = 0.05 |
Definition at line 61 of file tablefind.cpp.
const double tesseract::kAllowBlobHeight = 0.3 |
Definition at line 59 of file tablefind.cpp.
const double tesseract::kAllowBlobWidth = 0.4 |
Definition at line 60 of file tablefind.cpp.
const double tesseract::kAllowTextArea = 0.8 |
Definition at line 54 of file tablefind.cpp.
const double tesseract::kAllowTextHeight = 0.5 |
Definition at line 52 of file tablefind.cpp.
const double tesseract::kAllowTextWidth = 0.6 |
Definition at line 53 of file tablefind.cpp.
const char * tesseract::kApostropheLikeUTF8 |
Definition at line 48 of file unicodes.cpp.
const int tesseract::kBasicBufSize = 2048 |
Definition at line 155 of file pdfrenderer.cpp.
const double tesseract::kBigPartSizeRatio = 1.75 |
Definition at line 51 of file colpartitiongrid.cpp.
const int tesseract::kBoxClipTolerance = 2 |
Definition at line 31 of file boxword.cpp.
const double tesseract::kBrokenCJKIterationFraction = 0.125 |
Definition at line 71 of file strokewidth.cpp.
const int tesseract::kBytesPer64BitNumber = 20 |
Max bytes in the decimal representation of inT64.
Definition at line 1566 of file baseapi.cpp.
const int tesseract::kBytesPerBlob = kNumbersPerBlob * (kBytesPerNumber + 1) + 1 |
Multiplier for max expected textlength assumes (kBytesPerNumber + space)
Definition at line 1563 of file baseapi.cpp.
const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1 |
Definition at line 1564 of file baseapi.cpp.
const int tesseract::kBytesPerNumber = 5 |
The number of bytes taken by each number. Since we use inT16 for ICOORD, assume only 5 digits max.
Definition at line 1557 of file baseapi.cpp.
const int tesseract::kCellSplitColumnThreshold = 0 |
Definition at line 40 of file tablerecog.cpp.
const int tesseract::kCellSplitRowThreshold = 0 |
Definition at line 39 of file tablerecog.cpp.
const double tesseract::kCharVerticalOverlapFraction = 0.375 |
Definition at line 62 of file tabfind.cpp.
const int tesseract::kCharWidth = 2 |
Definition at line 158 of file pdfrenderer.cpp.
const double tesseract::kCJKAspectRatio = 1.25 |
Definition at line 65 of file strokewidth.cpp.
const double tesseract::kCJKAspectRatioIncrease = 1.0625 |
Definition at line 67 of file strokewidth.cpp.
const double tesseract::kCJKBrokenDistanceFraction = 0.25 |
Definition at line 61 of file strokewidth.cpp.
const int tesseract::kCJKMaxComponents = 8 |
Definition at line 63 of file strokewidth.cpp.
const int tesseract::kCJKRadius = 2 |
Definition at line 59 of file strokewidth.cpp.
const int tesseract::kColumnWidthFactor = 20 |
const double tesseract::kCosMaxSkewAngle = 0.866025 |
Definition at line 81 of file tabfind.cpp.
const int tesseract::kCrackSpacing = 100 |
Spacing of cracks across the page to break up tall vertical lines.
Definition at line 45 of file linefind.cpp.
const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F) |
Definition at line 45 of file paragraphs.cpp.
const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F) |
Definition at line 47 of file paragraphs.cpp.
const int tesseract::kDefaultResolution = 300 |
Default resolution used if input in not believable.
Definition at line 60 of file pagesegmain.cpp.
const double tesseract::kDiacriticXPadRatio = 7.0 |
Definition at line 74 of file strokewidth.cpp.
const double tesseract::kDiacriticYPadRatio = 1.75 |
Definition at line 77 of file strokewidth.cpp.
const int tesseract::kExposureFactor = 16 |
Definition at line 32 of file degradeimage.cpp.
const int tesseract::kFeaturePadding = 2 |
Definition at line 34 of file imagedata.h.
const float tesseract::kFontMergeDistance = 0.025 |
Definition at line 52 of file mastertrainer.cpp.
const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7 |
Definition at line 58 of file tablerecog.cpp.
const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 } |
Definition at line 54 of file tablerecog.cpp.
const int tesseract::kGoodRowNumberOfColumnsSmallSize |
Definition at line 55 of file tablerecog.cpp.
const int tesseract::kGutterMultiple = 4 |
Definition at line 38 of file tabvector.cpp.
const int tesseract::kGutterToNeighbourRatio = 3 |
Definition at line 40 of file tabvector.cpp.
const double tesseract::kHorizontalGapMergeFraction = 0.5 |
Definition at line 57 of file colfind.cpp.
const double tesseract::kHorizontalSpacing = 0.30 |
Definition at line 33 of file tablerecog.cpp.
const int tesseract::kHorzStrongTextlineAspect = 5 |
Definition at line 74 of file colpartition.cpp.
const int tesseract::kHorzStrongTextlineCount = 8 |
Definition at line 70 of file colpartition.cpp.
const int tesseract::kHorzStrongTextlineHeight = 10 |
Definition at line 72 of file colpartition.cpp.
const char * tesseract::kHyphenLikeUTF8 |
The following are confusable internal word punctuation symbols which we normalize to the first variant when matching in dawgs.
Definition at line 32 of file unicodes.cpp.
const int tesseract::kImagePadding = 4 |
Definition at line 36 of file imagedata.h.
const float tesseract::kInfiniteDist = 999.0f |
Definition at line 911 of file mastertrainer.cpp.
const char* tesseract::kInputFile = "noname.tif" |
Filename used for input image file, from which to derive a name to search for a possible UNLV zone file, if none is specified by SetInputName.
Definition at line 97 of file baseapi.cpp.
const double tesseract::kLargeTableProjectionThreshold = 0.45 |
Definition at line 110 of file tablefind.cpp.
const int tesseract::kLargeTableRowCount = 6 |
Definition at line 112 of file tablefind.cpp.
const int tesseract::kLatinChs[] |
Latin chars corresponding to the unicode chars above.
Definition at line 1627 of file baseapi.cpp.
const int tesseract::kLeaderCutCost = 8 |
Definition at line 64 of file colpartition.cpp.
const int tesseract::kLeftIndentAlignmentCountTh = 1 |
Definition at line 88 of file equationdetect.cpp.
const double tesseract::kLineCountReciprocal = 4.0 |
Definition at line 51 of file tabvector.cpp.
const int tesseract::kLinedTableMinHorizontalLines = 3 |
Definition at line 43 of file tablerecog.cpp.
const int tesseract::kLinedTableMinVerticalLines = 3 |
Definition at line 42 of file tablerecog.cpp.
const int tesseract::kLineFindGridSize = 50 |
Grid size used by line finder. Not very critical.
Definition at line 47 of file linefind.cpp.
const double tesseract::kLineFragmentAspectRatio = 10.0 |
Definition at line 56 of file tabfind.cpp.
const double tesseract::kLineResidueAspectRatio = 8.0 |
Definition at line 100 of file strokewidth.cpp.
const int tesseract::kLineResiduePadRatio = 3 |
Definition at line 102 of file strokewidth.cpp.
const double tesseract::kLineResidueSizeRatio = 1.75 |
Definition at line 104 of file strokewidth.cpp.
const int tesseract::kLineTrapLongest = 4 |
Definition at line 93 of file strokewidth.cpp.
const int tesseract::kLineTrapShortest = 2 |
Definition at line 95 of file strokewidth.cpp.
const char * tesseract::kLRM = "\u200E" |
Definition at line 27 of file unicodes.cpp.
const double tesseract::kMarginFactor = 1.1 |
Definition at line 48 of file tablerecog.cpp.
const double tesseract::kMarginOverlapFraction = 0.25 |
Definition at line 54 of file colfind.cpp.
const float tesseract::kMathDigitDensityTh1 = 0.25 |
Definition at line 83 of file equationdetect.cpp.
const float tesseract::kMathDigitDensityTh2 = 0.1 |
Definition at line 84 of file equationdetect.cpp.
const float tesseract::kMathItalicDensityTh = 0.5 |
Definition at line 85 of file equationdetect.cpp.
const int tesseract::kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1) |
Definition at line 40 of file ambigs.cpp.
const double tesseract::kMaxBaselineError = 0.4375 |
Definition at line 77 of file colpartition.cpp.
const double tesseract::kMaxBlobOverlapFactor = 4.0 |
Definition at line 80 of file tablefind.cpp.
const int tesseract::kMaxBlobWidth = 500 |
Definition at line 43 of file tablefind.cpp.
const inT16 tesseract::kMaxBoxEdgeDiff = 2 |
Definition at line 32 of file recogtraining.cpp.
const int tesseract::kMaxBoxesInDataPartition = 20 |
Definition at line 69 of file tablefind.cpp.
const int tesseract::kMaxBytesPerLine |
A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.
Definition at line 1573 of file baseapi.cpp.
const int tesseract::kMaxCaptionLines = 7 |
Definition at line 43 of file colpartitiongrid.cpp.
const int tesseract::kMaxCharTopRange = 48 |
Definition at line 66 of file fixxht.cpp.
const int tesseract::kMaxCircleErosions = 8 |
Definition at line 62 of file pagesegmain.cpp.
const int tesseract::kMaxCJKSizeRatio = 5 |
Definition at line 69 of file strokewidth.cpp.
const int tesseract::kMaxColorDistance = 900 |
Definition at line 84 of file colpartition.cpp.
const int tesseract::kMaxColumnHeaderDistance = 4 |
Definition at line 88 of file tablefind.cpp.
const int tesseract::kMaxCredibleResolution = 2400 |
Maximum believable resolution.
Definition at line 110 of file baseapi.cpp.
const double tesseract::kMaxDiacriticDistanceRatio = 1.25 |
Definition at line 83 of file strokewidth.cpp.
const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0 |
Definition at line 86 of file strokewidth.cpp.
const double tesseract::kMaxDistToPartSizeRatio = 1.5 |
Definition at line 64 of file colfind.cpp.
const int tesseract::kMaxFillinMultiple = 11 |
Definition at line 47 of file tabvector.cpp.
const double tesseract::kMaxGapInTextPartition = 4.0 |
Definition at line 72 of file tablefind.cpp.
const double tesseract::kMaxGutterWidthAbsolute = 2.00 |
Definition at line 51 of file tabfind.cpp.
const double tesseract::kMaxHorizontalGap = 3.0 |
Definition at line 64 of file tabfind.cpp.
const int tesseract::kMaxIncompatibleColumnCount = 2 |
Definition at line 52 of file colfind.cpp.
const int tesseract::kMaxIntSize = 22 |
Max string length of an int.
Definition at line 103 of file baseapi.cpp.
const int tesseract::kMaxLargeOverlaps = 3 |
Definition at line 109 of file strokewidth.cpp.
const int tesseract::kMaxLargeOverlapsWithMedium = 12 |
Definition at line 44 of file ccnontextdetect.cpp.
const int tesseract::kMaxLargeOverlapsWithSmall = 3 |
Definition at line 35 of file ccnontextdetect.cpp.
const double tesseract::kMaxLeaderGapFractionOfMax = 0.25 |
Definition at line 58 of file colpartition.cpp.
const double tesseract::kMaxLeaderGapFractionOfMin = 0.5 |
Definition at line 60 of file colpartition.cpp.
const int tesseract::kMaxLigature = 0xfb17 |
Definition at line 46 of file ligature_table.cpp.
const int tesseract::kMaxLineLength = 1024 |
Definition at line 290 of file boxchar.cpp.
const int tesseract::kMaxLineResidue = 6 |
Definition at line 53 of file linefind.cpp.
const int tesseract::kMaxMediumOverlapsWithSmall = 12 |
Definition at line 40 of file ccnontextdetect.cpp.
const int tesseract::kMaxNeighbourDistFactor = 4 |
Definition at line 37 of file colpartitiongrid.cpp.
const double tesseract::kMaxNonLineDensity = 0.25 |
Definition at line 58 of file linefind.cpp.
const int tesseract::kMaxOffsetDist = 32 |
Definition at line 32 of file intfeaturemap.cpp.
const int tesseract::kMaxPadFactor = 6 |
Definition at line 34 of file colpartitiongrid.cpp.
const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0 |
Definition at line 134 of file tablefind.cpp.
const double tesseract::kMaxPartitionSpacing = 1.75 |
Definition at line 70 of file colpartitiongrid.cpp.
const int tesseract::kMaxPartnerDepth = 4 |
Definition at line 46 of file colpartition.cpp.
const int tesseract::kMaxRaggedSearch = 25 |
Definition at line 39 of file tabfind.cpp.
const int tesseract::kMaxRealDistance = 2.0 |
Definition at line 37 of file detlinefit.cpp.
const double tesseract::kMaxRectangularFraction = 0.75 |
Definition at line 46 of file imagefind.cpp.
const double tesseract::kMaxRectangularGradient = 0.1 |
Definition at line 49 of file imagefind.cpp.
const int tesseract::kMaxRMSColorNoise = 128 |
Definition at line 81 of file colpartition.cpp.
const double tesseract::kMaxRowSize = 2.5 |
Definition at line 51 of file tablerecog.cpp.
const double tesseract::kMaxSameBlockLineSpacing = 3 |
Definition at line 54 of file colpartition.cpp.
const double tesseract::kMaxSizeRatio = 1.5 |
Definition at line 56 of file colpartition.cpp.
const int tesseract::kMaxSkewFactor = 15 |
Definition at line 65 of file alignedblob.cpp.
const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32 |
Definition at line 32 of file ccnontextdetect.cpp.
const double tesseract::kMaxSpacingDrift = 1.0 / 72 |
Definition at line 48 of file colpartition.cpp.
const double tesseract::kMaxStaveHeight = 1.0 |
Definition at line 60 of file linefind.cpp.
const double tesseract::kMaxTableCellXheight = 2.0 |
Definition at line 84 of file tablefind.cpp.
const int tesseract::kMaxTextLineBlobRatio = 5 |
Definition at line 72 of file tabfind.cpp.
const double tesseract::kMaxTopSpacingFraction = 0.25 |
Definition at line 51 of file colpartition.cpp.
const int tesseract::kMaxUnicharsPerCluster = 2000 |
Definition at line 50 of file mastertrainer.cpp.
const int tesseract::kMaxVerticalSearch = 12 |
Definition at line 38 of file tabfind.cpp.
const int tesseract::kMaxVerticalSpacing = 500 |
Definition at line 41 of file tablefind.cpp.
const double tesseract::kMaxXProjectionGapFactor = 2.0 |
Definition at line 144 of file tablefind.cpp.
const double tesseract::kMinAlignedGutter = 0.25 |
Definition at line 53 of file tabvector.cpp.
const int tesseract::kMinAlignedTabs = 4 |
Definition at line 55 of file alignedblob.cpp.
const double tesseract::kMinBaselineCoverage = 0.5 |
Definition at line 79 of file colpartition.cpp.
const int tesseract::kMinBoxesInTextPartition = 10 |
Definition at line 66 of file tablefind.cpp.
const double tesseract::kMinCaptionGapHeightRatio = 0.5 |
Definition at line 47 of file colpartitiongrid.cpp.
const double tesseract::kMinCaptionGapRatio = 2.0 |
Definition at line 45 of file colpartitiongrid.cpp.
const int tesseract::kMinChainTextValue = 3 |
Definition at line 68 of file colpartition.cpp.
const int tesseract::kMinClusteredShapes = 1 |
Definition at line 48 of file mastertrainer.cpp.
const int tesseract::kMinColorDifference = 16 |
Definition at line 55 of file imagefind.cpp.
const int tesseract::kMinColumnWidth = 100 |
Definition at line 49 of file colfind.cpp.
const int tesseract::kMinCredibleResolution = 70 |
Minimum believable resolution.
Minimum believable resolution. Used as a default if there is no other information, as it is safer to under-estimate than over-estimate.
Definition at line 108 of file baseapi.cpp.
const double tesseract::kMinDiacriticSizeRatio = 1.0625 |
Definition at line 80 of file strokewidth.cpp.
const int tesseract::kMinEvaluatedTabs = 3 |
Definition at line 69 of file tabfind.cpp.
const double tesseract::kMinFilledArea = 0.35 |
Definition at line 61 of file tablerecog.cpp.
const double tesseract::kMinFractionalLinesInColumn = 0.125 |
Definition at line 45 of file tabfind.cpp.
const double tesseract::kMinGoodTextPARatio = 1.5 |
Definition at line 60 of file ccnontextdetect.cpp.
const double tesseract::kMinGutterFraction = 0.5 |
Definition at line 49 of file tabvector.cpp.
const double tesseract::kMinGutterWidthAbsolute = 0.02 |
Definition at line 49 of file tabfind.cpp.
const double tesseract::kMinGutterWidthGrid = 0.5 |
Definition at line 61 of file colfind.cpp.
const double tesseract::kMinImageArea = 0.5 |
Definition at line 77 of file tabfind.cpp.
const int tesseract::kMinImageFindSize = 100 |
Definition at line 51 of file imagefind.cpp.
const int tesseract::kMinLeaderCount = 5 |
Definition at line 62 of file colpartition.cpp.
const int tesseract::kMinLigature = 0xfb00 |
Definition at line 45 of file ligature_table.cpp.
const int tesseract::kMinLineLengthFraction = 4 |
Denominator of resolution makes min pixels to demand line lengths to be.
Definition at line 43 of file linefind.cpp.
const int tesseract::kMinLinesInColumn = 10 |
Definition at line 41 of file tabfind.cpp.
const double tesseract::kMinMaxGapInTextPartition = 0.5 |
Definition at line 76 of file tablefind.cpp.
const double tesseract::kMinMusicPixelFraction = 0.75 |
Definition at line 62 of file linefind.cpp.
const double tesseract::kMinNonNoiseFraction = 0.5 |
Definition at line 59 of file colfind.cpp.
const int tesseract::kMinOutlierSamples = 5 |
Definition at line 37 of file trainingsampleset.cpp.
const double tesseract::kMinOverlapWithTable = 0.6 |
Definition at line 100 of file tablefind.cpp.
const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0 |
Definition at line 140 of file tablefind.cpp.
const double tesseract::kMinPCLengthIncrease = 1.0 / 1024 |
Definition at line 33 of file intfeaturemap.cpp.
const int tesseract::kMinPointsForErrorCount = 16 |
Definition at line 34 of file detlinefit.cpp.
const double tesseract::kMinRaggedGutter = 1.5 |
Definition at line 55 of file tabvector.cpp.
const int tesseract::kMinRaggedTabs = 5 |
Definition at line 53 of file alignedblob.cpp.
const int tesseract::kMinRampSize = 1000 |
Definition at line 36 of file degradeimage.cpp.
const double tesseract::kMinRectangularFraction = 0.125 |
Definition at line 44 of file imagefind.cpp.
const int tesseract::kMinRectSize = 10 |
Minimum sensible image size to be worth running tesseract.
Definition at line 86 of file baseapi.cpp.
const int tesseract::kMinRowsInTable = 3 |
Definition at line 115 of file tablefind.cpp.
const int tesseract::kMinStrongTextValue = 6 |
Definition at line 66 of file colpartition.cpp.
const double tesseract::kMinTabGradient = 4.0 |
Definition at line 61 of file alignedblob.cpp.
const int tesseract::kMinTextLineBlobRatio = 3 |
Definition at line 75 of file tabfind.cpp.
const int tesseract::kMinThickLineWidth = 12 |
Definition at line 49 of file linefind.cpp.
const int tesseract::kMinVerticalSearch = 3 |
Definition at line 37 of file tabfind.cpp.
const int tesseract::kMostlyOneDirRatio = 3 |
Definition at line 98 of file strokewidth.cpp.
const double tesseract::kNeighbourSearchFactor = 2.5 |
Definition at line 111 of file strokewidth.cpp.
const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512 |
Definition at line 116 of file strokewidth.cpp.
const double tesseract::kNoiseOverlapGrowthFactor = 4.0 |
Definition at line 113 of file strokewidth.cpp.
const int tesseract::kNoisePadding = 4 |
Definition at line 51 of file ccnontextdetect.cpp.
const int tesseract::kNumbersPerBlob = 5 |
The 5 numbers output for each box (the usual 4 and a page number.)
Definition at line 1552 of file baseapi.cpp.
const int tesseract::kNumEndPoints = 3 |
Definition at line 28 of file detlinefit.cpp.
const int tesseract::kNumLiteralCnt = 5 |
Definition at line 36 of file tess_lang_model.h.
const int tesseract::kNumPagesPerMiniBatch = 100 |
Definition at line 38 of file imagedata.h.
const char* tesseract::kOldVarsFile = "failed_vars.txt" |
Temp file used for storing current parameters before applying retry values.
Definition at line 101 of file baseapi.cpp.
const int tesseract::kOriginalNoiseMultiple = 8 |
Definition at line 47 of file ccnontextdetect.cpp.
const double tesseract::kParagraphEndingPreviousLineRatio = 1.3 |
Definition at line 130 of file tablefind.cpp.
const char * tesseract::kPDF = "\u202C" |
Definition at line 30 of file unicodes.cpp.
const double tesseract::kPhotoOffsetFraction = 0.375 |
Definition at line 54 of file ccnontextdetect.cpp.
const int tesseract::kPrime1 = 17 |
Definition at line 34 of file trainingsampleset.cpp.
const int tesseract::kPrime2 = 13 |
Definition at line 35 of file trainingsampleset.cpp.
const double tesseract::kRaggedFraction = 2.5 |
Definition at line 41 of file alignedblob.cpp.
const double tesseract::kRaggedGapFraction = 1.0 |
Definition at line 45 of file alignedblob.cpp.
const int tesseract::kRaggedGutterMultiple = 5 |
Definition at line 53 of file tabfind.cpp.
const int tesseract::kRandomizingCenter = 128 |
Definition at line 35 of file trainingsample.cpp.
const double tesseract::kRatingEpsilon = 1.0 / 32 |
Definition at line 31 of file errorcounter.cpp.
const double tesseract::kRequiredColumns = 0.7 |
Definition at line 46 of file tablerecog.cpp.
const double tesseract::kRequiredFullJustifiedSpacing = 4.0 |
Definition at line 120 of file tablefind.cpp.
const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL" |
const int tesseract::kRGBRMSColors = 4 |
Definition at line 36 of file colpartition.h.
const char * tesseract::kRLE = "\u202A" |
Definition at line 29 of file unicodes.cpp.
const char * tesseract::kRLM = "\u200F" |
Definition at line 28 of file unicodes.cpp.
const double tesseract::kRMSFitScaling = 8.0 |
Definition at line 53 of file imagefind.cpp.
const float tesseract::kRotationRange = 0.02f |
Definition at line 30 of file degradeimage.cpp.
const int tesseract::kRulingVerticalMargin = 3 |
Definition at line 96 of file tablefind.cpp.
const int tesseract::kSaltnPepper = 5 |
Definition at line 34 of file degradeimage.cpp.
const int tesseract::kSearchRadius = 2 |
Definition at line 88 of file strokewidth.cpp.
const int tesseract::kSeedBlobsCountTh = 10 |
Definition at line 87 of file equationdetect.cpp.
const double tesseract::kShapePerimeterRatio = 3.0 |
Definition at line 118 of file strokewidth.cpp.
const int tesseract::kSideSpaceMargin = 10 |
Definition at line 105 of file tablefind.cpp.
const int tesseract::kSimilarRaggedDist = 50 |
Definition at line 45 of file tabvector.cpp.
const int tesseract::kSimilarVectorDist = 10 |
Definition at line 42 of file tabvector.cpp.
const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile) |
Definition at line 24 of file universalambigs.h.
const float tesseract::kSizeRatioToReject = 2.0 |
Definition at line 106 of file strokewidth.cpp.
const double tesseract::kSmallTableProjectionThreshold = 0.35 |
Definition at line 109 of file tablefind.cpp.
const int tesseract::kSmoothDecisionMargin = 4 |
Definition at line 73 of file colpartitiongrid.cpp.
const double tesseract::kSmoothFactor = 0.25 |
Definition at line 58 of file tabfind.cpp.
const double tesseract::kSplitPartitionSize = 2.0 |
Definition at line 47 of file tablefind.cpp.
const int tesseract::kSquareLimit = 25 |
Definition at line 32 of file trainingsampleset.cpp.
const int tesseract::kStateCnt = 4 |
Definition at line 35 of file tess_lang_model.h.
const double tesseract::kStrokeWidthCJK = 2.0 |
Definition at line 56 of file strokewidth.cpp.
const double tesseract::kStrokeWidthConstantTolerance = 2.0 |
Definition at line 55 of file colpartitiongrid.cpp.
const double tesseract::kStrokeWidthFractionalTolerance = 0.25 |
Definition at line 148 of file tablefind.cpp.
const double tesseract::kStrokeWidthFractionCJK = 0.25 |
Definition at line 55 of file strokewidth.cpp.
const double tesseract::kStrokeWidthFractionTolerance = 0.25 |
Allowed proportional change in stroke width to be the same font.
Definition at line 53 of file colpartitiongrid.cpp.
const double tesseract::kStrokeWidthTolerance = 1.5 |
Allowed constant change in stroke width to be the same font. Really 1.5 pixels.
Definition at line 53 of file strokewidth.cpp.
const double tesseract::kTableColumnThreshold = 3.0 |
Definition at line 92 of file tablefind.cpp.
const int tesseract::kTabRadiusFactor = 5 |
Definition at line 35 of file tabfind.cpp.
const char tesseract::kTesseractReject = '~' |
Character returned when Tesseract couldn't recognize as anything.
Definition at line 88 of file baseapi.cpp.
const int tesseract::kTestChar = -1 |
Definition at line 30 of file trainingsampleset.cpp.
const char* tesseract::kTextordDebugPix = "psdebug_pix" |
Definition at line 68 of file alignedblob.cpp.
const double tesseract::kThickLengthMultiple = 0.75 |
Definition at line 56 of file linefind.cpp.
const int tesseract::kThinLineFraction = 20 |
Denominator of resolution makes max pixel width to allow thin lines.
Definition at line 41 of file linefind.cpp.
const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25 |
Definition at line 57 of file colpartitiongrid.cpp.
const float tesseract::kUnclearDensityTh = 0.25 |
Definition at line 86 of file equationdetect.cpp.
const int tesseract::kUniChs[] |
Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.
Definition at line 1623 of file baseapi.cpp.
const char tesseract::kUniversalAmbigsFile |
Definition at line 23 of file universalambigs.h.
const char tesseract::kUNLVReject = '~' |
Character used by UNLV error counter as a reject.
Definition at line 90 of file baseapi.cpp.
const char tesseract::kUNLVSuspect = '^' |
Character used by UNLV as a suspect marker.
Definition at line 92 of file baseapi.cpp.
const char * tesseract::kUTF8LineSeparator = "\u2028" |
Definition at line 25 of file unicodes.cpp.
const char * tesseract::kUTF8ParagraphSeparator = "\u2029" |
Definition at line 26 of file unicodes.cpp.
const double tesseract::kVerticalSpacing = -0.2 |
Definition at line 36 of file tablerecog.cpp.
const int tesseract::kVLineAlignment = 3 |
Definition at line 47 of file alignedblob.cpp.
const int tesseract::kVLineGutter = 1 |
Definition at line 49 of file alignedblob.cpp.
const int tesseract::kVLineMinLength = 500 |
Definition at line 57 of file alignedblob.cpp.
const int tesseract::kVLineSearchSize = 150 |
Definition at line 51 of file alignedblob.cpp.
const char* const tesseract::RTLReversePolicyNames[] |
bool tesseract::textord_dump_table_images = false |
"Paint table detection output"
Definition at line 151 of file tablefind.cpp.
bool tesseract::textord_show_tables = false |
"Show table regions"
Definition at line 152 of file tablefind.cpp.
bool tesseract::textord_tabfind_find_tables = true |
"run table detection"
Definition at line 74 of file colfind.cpp.
bool tesseract::textord_tabfind_only_strokewidths = false |
"Only run stroke widths"
Definition at line 45 of file strokewidth.cpp.
bool tesseract::textord_tabfind_show_blocks = false |
"Show final block bounds"
Definition at line 73 of file colfind.cpp.
bool tesseract::textord_tabfind_show_color_fit = false |
"Show stroke widths"
Definition at line 30 of file colpartitiongrid.cpp.
bool tesseract::textord_tabfind_show_columns = false |
"Show column bounds"
Definition at line 72 of file colfind.cpp.
bool tesseract::textord_tabfind_show_finaltabs = false |
"Show tab vectors"
Definition at line 84 of file tabfind.cpp.
bool tesseract::textord_tabfind_show_initial_partitions = false |
"Show partition bounds"
Definition at line 67 of file colfind.cpp.
bool tesseract::textord_tabfind_show_initialtabs = false |
"Show tab candidates"
Definition at line 83 of file tabfind.cpp.
int tesseract::textord_tabfind_show_partitions = 0 |
"Show partition bounds, waiting if >1"
Definition at line 71 of file colfind.cpp.
bool tesseract::textord_tabfind_show_reject_blobs = false |
"Show blobs rejected as noise"
Definition at line 69 of file colfind.cpp.
int tesseract::textord_tabfind_show_strokewidths = 0 |
"Show stroke widths"
Definition at line 44 of file strokewidth.cpp.
bool tesseract::textord_tablefind_recognize_tables = false |
"Enables the table recognizer for table layout and filtering."
Definition at line 158 of file tablefind.cpp.
bool tesseract::textord_tablefind_show_mark = false |
"Debug table marking steps in detail"
Definition at line 154 of file tablefind.cpp.
bool tesseract::textord_tablefind_show_stats = false |
"Show page stats used in table finding"
Definition at line 156 of file tablefind.cpp.
double tesseract::textord_tabvector_vertical_box_ratio = 0.5 |
"Fraction of box matches required to declare a line vertical"
Definition at line 61 of file tabvector.cpp.
double tesseract::textord_tabvector_vertical_gap_fraction = 0.5 |
"max fraction of mean blob width allowed for vertical gaps in vertical text"
"Max fraction of mean blob width allowed for vertical gaps in vertical text"
Definition at line 58 of file tabvector.cpp.
CCUtilMutex tesseract::tprintfMutex |
Definition at line 51 of file ccutil.cpp.