tesseract  5.0.0-alpha-619-ge9db
tesseract Namespace Reference

Classes

class  AlignedBlob
 
struct  AlignedBlobParams
 
class  AmbigSpec
 
struct  AssociateStats
 
class  AssociateUtils
 
class  BaselineBlock
 
class  BaselineDetect
 
class  BaselineRow
 
class  BBGrid
 
struct  BestChoiceBundle
 Bundle together all the things pertaining to the best choice/state. More...
 
class  BitVector
 
struct  BlobData
 
class  BlobGrid
 
struct  BlockGroup
 
class  BoolParam
 
class  BoxChar
 
struct  BoxCharPtrSort
 
class  BoxWord
 
class  CCNonTextDetect
 
class  CCStruct
 
class  CCUtil
 
class  ChoiceIterator
 
class  Classify
 
class  ClassPruner
 
struct  ClipFFunc
 
struct  ClipFPrime
 
struct  ClipGFunc
 
struct  ClipGPrime
 
struct  Cluster
 
class  ColPartition
 
class  ColPartitionGrid
 
class  ColPartitionSet
 
class  ColSegment
 
class  ColumnFinder
 
class  Convolve
 
class  CTC
 
class  Dawg
 
struct  DawgArgs
 
class  DawgCache
 
struct  DawgLoader
 
struct  DawgPosition
 
class  DawgPositionVector
 
class  DebugPixa
 
class  DetLineFit
 
class  Dict
 
class  DocumentCache
 
class  DocumentData
 
class  DoubleParam
 
class  DoublePtr
 
class  DPPoint
 
class  EquationDetect
 
class  EquationDetectBase
 
class  EquationFinderTest
 
class  ErrorCounter
 
struct  FFunc
 
class  File
 
struct  FloatWordFeature
 
struct  FontInfo
 
class  FontInfoTable
 
struct  FontSet
 
struct  FontSpacingInfo
 
class  FontUtils
 
struct  FPrime
 
class  FRAGMENT
 
class  FullyConnected
 
class  GenericHeap
 
struct  GeometricClassifierState
 
struct  GFunc
 
struct  GPrime
 
struct  greater_than
 
class  GridBase
 
class  GridSearch
 
class  HeapTest
 
struct  HFunc
 
struct  HPrime
 
class  IcuErrorCode
 
struct  IdentityFunc
 
class  ImageData
 
class  ImageFind
 
class  ImageThresholder
 
class  IndexMap
 
class  IndexMapBiDi
 
class  Input
 
class  InputBuffer
 
struct  Interval
 
class  IntFeatureDist
 
class  IntFeatureMap
 
class  IntFeatureSpace
 
class  IntGrid
 
class  IntParam
 
struct  IntSimdMatrix
 
struct  KDPair
 
struct  KDPairDec
 
struct  KDPairInc
 
class  KDPtrPair
 
struct  KDPtrPairDec
 
struct  KDPtrPairInc
 
class  KDVector
 
class  LanguageModel
 
struct  LanguageModelDawgInfo
 
struct  LanguageModelNgramInfo
 
struct  LanguageModelState
 Struct to store information maintained by various language model components. More...
 
class  LigatureTable
 
class  LineFinder
 
struct  LineHypothesis
 
struct  LMConsistencyInfo
 
class  LMPainPoints
 
class  LSTM
 
class  LSTMRecognizer
 
class  LSTMTester
 
class  LSTMTrainer
 
class  LSTMTrainerTest
 
class  LTRResultIterator
 
class  MasterTrainer
 
class  Maxpool
 
class  MockClassifier
 
class  MutableIterator
 
class  Network
 
class  NetworkBuilder
 
class  NetworkIO
 
class  NetworkScratch
 
struct  NodeChild
 
class  NthItemTest
 
class  ObjectCache
 
class  OutputBuffer
 
class  PageIterator
 
class  PangoFontInfo
 
class  ParagraphModelSmearer
 
class  ParagraphTheory
 
class  Parallel
 
class  Param
 
class  ParamsModel
 
class  ParamsTrainingBundle
 
struct  ParamsTrainingHypothesis
 
struct  ParamsVectors
 
class  ParamUtils
 
class  PixelHistogram
 
class  Plumbing
 
class  PointerVector
 
struct  PtrHash
 
class  RecodeBeamSearch
 
class  RecodedCharID
 
struct  RecodeNode
 
class  Reconfig
 
struct  Relu
 
struct  ReluPrime
 
class  ResultIterator
 
class  Reversed
 
class  RowInfo
 
class  RowScratchRegisters
 
class  SampleIterator
 
struct  ScoredFont
 
class  SegSearchPending
 
class  Series
 
class  Shape
 
class  ShapeClassifier
 
struct  ShapeDist
 
struct  ShapeQueueEntry
 
struct  ShapeRating
 
class  ShapeTable
 
class  ShiroRekhaSplitter
 
class  SIMDDetect
 
class  SimpleClusterer
 
struct  SpacingProperties
 
class  SquishedDawg
 
class  StaticShape
 
class  StrideMap
 
class  StringParam
 
class  StringRenderer
 
class  StrokeWidth
 
class  StructuredTable
 
class  TabConstraint
 
class  TabEventHandler
 
class  TabFind
 
class  TableFinder
 
class  TableRecognizer
 
class  TabVector
 
struct  TESS_CHAR
 
class  TessAltoRenderer
 
class  TessBaseAPI
 
class  TessBoxTextRenderer
 
class  TessClassifier
 
class  TessdataManager
 
class  Tesseract
 
struct  TesseractStats
 
class  TessHOcrRenderer
 
class  TessLSTMBoxRenderer
 
class  TessOsdRenderer
 
class  TessPDFRenderer
 
class  TessResultRenderer
 
class  TessTextRenderer
 
class  TessTsvRenderer
 
class  TessUnlvRenderer
 
class  TessWordStrBoxRenderer
 
class  TestableEquationDetect
 
class  TextlineProjection
 
class  Textord
 
class  TFile
 
class  TFNetworkModel
 
class  TFNetworkModelDefaultTypeInternal
 
class  TrainingSample
 
class  TrainingSampleSet
 
class  TRand
 
class  TransposedArray
 
class  Trie
 
class  UNICHAR
 
class  UnicharAmbigs
 
struct  UnicharAndFonts
 
class  UnicharCompress
 
class  UnicharIdArrayUtils
 
struct  UnicharRating
 
class  UnicodeSpanSkipper
 
struct  UnityFunc
 
class  ValidateGrapheme
 
class  ValidateIndic
 
class  ValidateJavanese
 
class  ValidateKhmer
 
class  ValidateMyanmar
 
class  Validator
 
struct  ViterbiStateEntry
 
class  WeightMatrix
 
struct  WordData
 
class  WordFeature
 
class  Wordrec
 
class  WordWithBox
 
class  WorkingPartSet
 

Typedefs

using DictFunc = int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const
 
using ProbabilityInContextFunc = double(Dict::*)(const char *, const char *, int, const char *, int)
 
using ParamsModelClassifyFunc = float(Dict::*)(const char *, void *)
 
using FillLatticeFunc = void(Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *)
 
using TruthCallback = std::function< void(const UNICHARSET &, int, PageIterator *, Pix *)>
 
using FileReader = bool(*)(const char *filename, GenericVector< char > *data)
 
using FileWriter = bool(*)(const GenericVector< char > &data, const char *filename)
 
using char32 = signed int
 
using DotProductFunction = double(*)(const double *, const double *, int)
 
using SetOfModels = GenericVectorEqEq< const ParagraphModel * >
 
using WordRecognizer = void(Tesseract::*)(const WordData &, WERD_RES **, PointerVector< WERD_RES > *)
 
using ParamsTrainingHypothesisList = GenericVector< ParamsTrainingHypothesis >
 
using UnicharIdVector = GenericVector< UNICHAR_ID >
 
using UnicharAmbigsVector = GenericVector< AmbigSpec_LIST * >
 
using IntKDPair = KDPairInc< int, int >
 
using RSMap = std::unordered_map< int, std::unique_ptr< std::vector< int > >>
 
using RSCounts = std::unordered_map< int, int >
 
using ShapeQueue = GenericHeap< ShapeQueueEntry >
 
using NodeChildVector = GenericVector< NodeChild >
 
using SuccessorList = GenericVector< int >
 
using SuccessorListsVector = GenericVector< SuccessorList * >
 
using DawgVector = GenericVector< Dawg * >
 
using RecodePair = KDPairInc< double, RecodeNode >
 
using RecodeHeap = GenericHeap< RecodePair >
 
using BlobGridSearch = GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 
using ColPartitionGridSearch = GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
 
using PartSetVector = GenericVector< ColPartitionSet * >
 
using WidthCallback = std::function< bool(int)>
 
using ColSegmentGrid = BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT >
 
using ColSegmentGridSearch = GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT >
 
using WordGrid = BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >
 
using WordSearch = GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >
 
using LigHash = std::unordered_map< std::string, std::string, StringHash >
 
using TestCallback = std::function< STRING(int, const double *, const TessdataManager &, int)>
 
using PainPointHeap = GenericHeap< MatrixCoordPair >
 
using LanguageModelFlagsType = unsigned char
 Used for expressing various language model flags. More...
 

Enumerations

enum  Orientation { ORIENTATION_PAGE_UP = 0, ORIENTATION_PAGE_RIGHT = 1, ORIENTATION_PAGE_DOWN = 2, ORIENTATION_PAGE_LEFT = 3 }
 
enum  WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0, WRITING_DIRECTION_RIGHT_TO_LEFT = 1, WRITING_DIRECTION_TOP_TO_BOTTOM = 2 }
 
enum  TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, TEXTLINE_ORDER_TOP_TO_BOTTOM = 2 }
 
enum  PageSegMode {
  PSM_OSD_ONLY = 0, PSM_AUTO_OSD = 1, PSM_AUTO_ONLY = 2, PSM_AUTO = 3,
  PSM_SINGLE_COLUMN = 4, PSM_SINGLE_BLOCK_VERT_TEXT = 5, PSM_SINGLE_BLOCK = 6, PSM_SINGLE_LINE = 7,
  PSM_SINGLE_WORD = 8, PSM_CIRCLE_WORD = 9, PSM_SINGLE_CHAR = 10, PSM_SPARSE_TEXT,
  PSM_SPARSE_TEXT_OSD = 12, PSM_RAW_LINE = 13, PSM_COUNT
}
 
enum  PageIteratorLevel {
  RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD,
  RIL_SYMBOL
}
 
enum  ParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT }
 
enum  OcrEngineMode {
  OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT,
  OEM_COUNT
}
 
enum  LineType { LT_START = 'S', LT_BODY = 'C', LT_UNKNOWN = 'U', LT_MULTIPLE = 'M' }
 
enum  CMD_EVENTS { ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, ACTION_2_CMD_EVENT }
 
enum  CachingStrategy { CS_SEQUENTIAL, CS_ROUND_ROBIN }
 
enum  NormalizationMode { NM_BASELINE = -3, NM_CHAR_ISOTROPIC = -2, NM_CHAR_ANISOTROPIC = -1 }
 
enum  kParamsTrainingFeatureType {
  PTRAIN_DIGITS_SHORT, PTRAIN_DIGITS_MED, PTRAIN_DIGITS_LONG, PTRAIN_NUM_SHORT,
  PTRAIN_NUM_MED, PTRAIN_NUM_LONG, PTRAIN_DOC_SHORT, PTRAIN_DOC_MED,
  PTRAIN_DOC_LONG, PTRAIN_DICT_SHORT, PTRAIN_DICT_MED, PTRAIN_DICT_LONG,
  PTRAIN_FREQ_SHORT, PTRAIN_FREQ_MED, PTRAIN_FREQ_LONG, PTRAIN_SHAPE_COST_PER_CHAR,
  PTRAIN_NGRAM_COST_PER_CHAR, PTRAIN_NUM_BAD_PUNC, PTRAIN_NUM_BAD_CASE, PTRAIN_XHEIGHT_CONSISTENCY,
  PTRAIN_NUM_BAD_CHAR_TYPE, PTRAIN_NUM_BAD_SPACING, PTRAIN_NUM_BAD_FONT, PTRAIN_RATING_PER_CHAR,
  PTRAIN_NUM_FEATURE_TYPES
}
 
enum  ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP }
 
enum  AmbigType {
  NOT_AMBIG, REPLACE_AMBIG, DEFINITE_AMBIG, SIMILAR_AMBIG,
  CASE_AMBIG, AMBIG_TYPE_COUNT
}
 
enum  SetParamConstraint { SET_PARAM_CONSTRAINT_NONE, SET_PARAM_CONSTRAINT_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_INIT_ONLY }
 
enum  TessdataType {
  TESSDATA_LANG_CONFIG, TESSDATA_UNICHARSET, TESSDATA_AMBIGS, TESSDATA_INTTEMP,
  TESSDATA_PFFMTABLE, TESSDATA_NORMPROTO, TESSDATA_PUNC_DAWG, TESSDATA_SYSTEM_DAWG,
  TESSDATA_NUMBER_DAWG, TESSDATA_FREQ_DAWG, TESSDATA_FIXED_LENGTH_DAWGS, TESSDATA_CUBE_UNICHARSET,
  TESSDATA_CUBE_SYSTEM_DAWG, TESSDATA_SHAPE_TABLE, TESSDATA_BIGRAM_DAWG, TESSDATA_UNAMBIG_DAWG,
  TESSDATA_PARAMS_MODEL, TESSDATA_LSTM, TESSDATA_LSTM_PUNC_DAWG, TESSDATA_LSTM_SYSTEM_DAWG,
  TESSDATA_LSTM_NUMBER_DAWG, TESSDATA_LSTM_UNICHARSET, TESSDATA_LSTM_RECODER, TESSDATA_VERSION,
  TESSDATA_NUM_ENTRIES
}
 
enum  CharSegmentationType { CST_FRAGMENT, CST_WHOLE, CST_IMPROPER, CST_NGRAM }
 
enum  DawgType {
  DAWG_TYPE_PUNCTUATION, DAWG_TYPE_WORD, DAWG_TYPE_NUMBER, DAWG_TYPE_PATTERN,
  DAWG_TYPE_COUNT
}
 
enum  XHeightConsistencyEnum { XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT }
 
enum  TrainingFlags { TF_INT_MODE = 1, TF_COMPRESS_UNICHARSET = 64 }
 
enum  NetworkType {
  NT_NONE, NT_INPUT, NT_CONVOLVE, NT_MAXPOOL,
  NT_PARALLEL, NT_REPLICATED, NT_PAR_RL_LSTM, NT_PAR_UD_LSTM,
  NT_PAR_2D_LSTM, NT_SERIES, NT_RECONFIG, NT_XREVERSED,
  NT_YREVERSED, NT_XYTRANSPOSE, NT_LSTM, NT_LSTM_SUMMARY,
  NT_LOGISTIC, NT_POSCLIP, NT_SYMCLIP, NT_TANH,
  NT_RELU, NT_LINEAR, NT_SOFTMAX, NT_SOFTMAX_NO_CTC,
  NT_LSTM_SOFTMAX, NT_LSTM_SOFTMAX_ENCODED, NT_TENSORFLOW, NT_COUNT
}
 
enum  NetworkFlags { NF_LAYER_SPECIFIC_LR = 64, NF_ADAM = 128 }
 
enum  TrainingState { TS_DISABLED, TS_ENABLED, TS_TEMP_DISABLE, TS_RE_ENABLE }
 
enum  NodeContinuation { NC_ANYTHING, NC_ONLY_DUP, NC_NO_DUP, NC_COUNT }
 
enum  TopNState { TN_TOP2, TN_TOPN, TN_ALSO_RAN, TN_COUNT }
 
enum  LossType { LT_NONE, LT_CTC, LT_SOFTMAX, LT_LOGISTIC }
 
enum  FlexDimensions { FD_BATCH, FD_HEIGHT, FD_WIDTH, FD_DIMSIZE }
 
enum  ColumnSpanningType {
  CST_NOISE, CST_FLOWING, CST_HEADING, CST_PULLOUT,
  CST_COUNT
}
 
enum  NeighbourPartitionType {
  NPT_HTEXT, NPT_VTEXT, NPT_WEAK_HTEXT, NPT_WEAK_VTEXT,
  NPT_IMAGE, NPT_COUNT
}
 
enum  LeftOrRight { LR_LEFT, LR_RIGHT }
 
enum  PartitionFindResult { PFR_OK, PFR_SKEW, PFR_NOISE }
 
enum  ColSegType {
  COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED,
  COL_COUNT
}
 
enum  TabAlignment {
  TA_LEFT_ALIGNED, TA_LEFT_RAGGED, TA_CENTER_JUSTIFIED, TA_RIGHT_ALIGNED,
  TA_RIGHT_RAGGED, TA_SEPARATOR, TA_COUNT
}
 
enum  FactorNames {
  FN_INCOLOR, FN_Y0, FN_Y1, FN_Y2,
  FN_Y3, FN_X0, FN_X1, FN_SHEAR,
  FN_NUM_FACTORS
}
 
enum  CountTypes {
  CT_UNICHAR_TOP_OK, CT_UNICHAR_TOP1_ERR, CT_UNICHAR_TOP2_ERR, CT_UNICHAR_TOPN_ERR,
  CT_UNICHAR_TOPTOP_ERR, CT_OK_MULTI_UNICHAR, CT_OK_JOINED, CT_OK_BROKEN,
  CT_REJECT, CT_FONT_ATTR_ERR, CT_OK_MULTI_FONT, CT_NUM_RESULTS,
  CT_RANK, CT_REJECTED_JUNK, CT_ACCEPTED_JUNK, CT_SIZE
}
 
enum  ErrorTypes {
  ET_RMS, ET_DELTA, ET_WORD_RECERR, ET_CHAR_ERROR,
  ET_SKIP_RATIO, ET_COUNT
}
 
enum  Trainability {
  TRAINABLE, PERFECT, UNENCODABLE, HI_PRECISION_ERR,
  NOT_BOXED
}
 
enum  SerializeAmount { LIGHT, NO_BEST_TRAINER, FULL }
 
enum  SubTrainerResult { STR_NONE, STR_UPDATED, STR_REPLACED }
 
enum  UnicodeNormMode { UnicodeNormMode::kNFD, UnicodeNormMode::kNFC, UnicodeNormMode::kNFKD, UnicodeNormMode::kNFKC }
 
enum  OCRNorm { OCRNorm::kNone, OCRNorm::kNormalize }
 
enum  GraphemeNorm { GraphemeNorm::kNone, GraphemeNorm::kNormalize }
 
enum  GraphemeNormMode { GraphemeNormMode::kSingleString, GraphemeNormMode::kCombined, GraphemeNormMode::kGlyphSplit, GraphemeNormMode::kIndividualUnicodes }
 
enum  ViramaScript : char32 {
  ViramaScript::kNonVirama = 0, ViramaScript::kDevanagari = 0x900, ViramaScript::kBengali = 0x980, ViramaScript::kGurmukhi = 0xa00,
  ViramaScript::kGujarati = 0xa80, ViramaScript::kOriya = 0xb00, ViramaScript::kTamil = 0xb80, ViramaScript::kTelugu = 0xc00,
  ViramaScript::kKannada = 0xc80, ViramaScript::kMalayalam = 0xd00, ViramaScript::kSinhala = 0xd80, ViramaScript::kMyanmar = 0x1000,
  ViramaScript::kKhmer = 0x1780, ViramaScript::kJavanese = 0xa980
}
 
enum  LMPainPointsType {
  LM_PPTYPE_BLAMER, LM_PPTYPE_AMBIG, LM_PPTYPE_PATH, LM_PPTYPE_SHAPE,
  LM_PPTYPE_NUM
}
 

Functions

STRING HOcrEscape (const char *text)
 
bool LoadDataFromFile (const char *filename, GenericVector< char > *data)
 
bool SaveDataToFile (const GenericVector< char > &data, const char *filename)
 
template<typename T >
bool cmp_eq (T const &t1, T const &t2)
 
template<typename T >
int sort_cmp (const void *t1, const void *t2)
 
template<typename T >
int sort_ptr_cmp (const void *t1, const void *t2)
 
bool PSM_OSD_ENABLED (int pageseg_mode)
 
bool PSM_ORIENTATION_ENABLED (int pageseg_mode)
 
bool PSM_COL_FIND_ENABLED (int pageseg_mode)
 
bool PSM_SPARSE (int pageseg_mode)
 
bool PSM_BLOCK_FIND_ENABLED (int pageseg_mode)
 
bool PSM_LINE_FIND_ENABLED (int pageseg_mode)
 
bool PSM_WORD_FIND_ENABLED (int pageseg_mode)
 
template<typename T , size_t N>
constexpr size_t countof (T const (&)[N]) noexcept
 
bool DeSerialize (FILE *fp, char *data, size_t n=1)
 
bool DeSerialize (FILE *fp, float *data, size_t n=1)
 
bool DeSerialize (FILE *fp, int8_t *data, size_t n=1)
 
bool DeSerialize (FILE *fp, int16_t *data, size_t n=1)
 
bool DeSerialize (FILE *fp, int32_t *data, size_t n=1)
 
bool DeSerialize (FILE *fp, uint8_t *data, size_t n=1)
 
bool DeSerialize (FILE *fp, uint16_t *data, size_t n=1)
 
bool DeSerialize (FILE *fp, uint32_t *data, size_t n=1)
 
bool Serialize (FILE *fp, const char *data, size_t n=1)
 
bool Serialize (FILE *fp, const float *data, size_t n=1)
 
bool Serialize (FILE *fp, const int8_t *data, size_t n=1)
 
bool Serialize (FILE *fp, const int16_t *data, size_t n=1)
 
bool Serialize (FILE *fp, const int32_t *data, size_t n=1)
 
bool Serialize (FILE *fp, const uint8_t *data, size_t n=1)
 
bool Serialize (FILE *fp, const uint16_t *data, size_t n=1)
 
bool Serialize (FILE *fp, const uint32_t *data, size_t n=1)
 
double DotProductNative (const double *u, const double *v, int n)
 
double DotProductAVX (const double *u, const double *v, int n)
 
double DotProductFMA (const double *u, const double *v, int n)
 
double DotProductSSE (const double *u, const double *v, int n)
 
bool IsTextOrEquationType (PolyBlockType type)
 
bool IsLeftIndented (const EquationDetect::IndentType type)
 
bool IsRightIndented (const EquationDetect::IndentType type)
 
bool AsciiLikelyListItem (const STRING &word)
 
int UnicodeFor (const UNICHARSET *u, const WERD_CHOICE *werd, int pos)
 
void LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
 
void RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
 
bool ValidFirstLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
 
bool ValidBodyLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
 
bool CrownCompatible (const GenericVector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model)
 
void RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile)
 
int InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)
 
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)
 
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)
 
bool RowsFitModel (const GenericVector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model)
 
void CanonicalizeDetectionResults (GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs)
 
void DetectParagraphs (int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
 
void DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator *block_start, GenericVector< ParagraphModel * > *models)
 
bool StrongModel (const ParagraphModel *model)
 
bool CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2)
 
bool CompareFontSet (const FontSet &fs1, const FontSet &fs2)
 
void FontInfoDeleteCallback (FontInfo f)
 
void FontSetDeleteCallback (FontSet fs)
 
bool read_info (TFile *f, FontInfo *fi)
 
bool write_info (FILE *f, const FontInfo &fi)
 
bool read_spacing_info (TFile *f, FontInfo *fi)
 
bool write_spacing_info (FILE *f, const FontInfo &fi)
 
bool read_set (TFile *f, FontSet *fs)
 
bool write_set (FILE *f, const FontSet &fs)
 
int OtsuThreshold (Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values)
 
void HistogramRect (Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
 
int OtsuStats (const int *histogram, int *H_out, int *omega0_out)
 
int ParamsTrainingFeatureByName (const char *name)
 
const char * ScriptPosToString (enum ScriptPos script_pos)
 
void ExtractFontName (const STRING &filename, STRING *fontname)
 
TrainingSampleBlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
 
void ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window)
 
double Tanh (double x)
 
double Logistic (double x)
 
template<class Func >
void FuncInplace (int n, double *inout)
 
template<class Func >
void FuncMultiply (const double *u, const double *v, int n, double *out)
 
template<typename T >
void SoftmaxInPlace (int n, T *inout)
 
void CopyVector (int n, const double *src, double *dest)
 
void AccumulateVector (int n, const double *src, double *dest)
 
void MultiplyVectorsInPlace (int n, const double *src, double *inout)
 
void MultiplyAccumulate (int n, const double *u, const double *v, double *out)
 
void SumVectors (int n, const double *v1, const double *v2, const double *v3, const double *v4, const double *v5, double *sum)
 
template<typename T >
void ZeroVector (int n, T *vec)
 
template<typename T >
void ClipVector (int n, T lower, T upper, T *vec)
 
void CodeInBinary (int n, int nf, double *vec)
 
Pix * TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom)
 
Pix * TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom)
 
template<class BBC >
int SortByBoxLeft (const void *void1, const void *void2)
 
template<class BBC >
int SortRightToLeft (const void *void1, const void *void2)
 
template<class BBC >
int SortByBoxBottom (const void *void1, const void *void2)
 
template<typename T >
void DeleteObject (T *object)
 
void SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob)
 
void assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
 
void ParseCommandLineFlags (const char *usage, int *argc, char ***argv, const bool remove_flags)
 
ShapeTableLoadShapeTable (const STRING &file_prefix)
 
void WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
 
MasterTrainerLoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
 
Pix * DegradeImage (Pix *input, int exposure, TRand *randomizer, float *rotation)
 
Pix * PrepareDistortedPix (const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes)
 
void GeneratePerspectiveDistortion (int width, int height, TRand *randomizer, Pix **pix, GenericVector< TBOX > *boxes)
 
int ProjectiveCoeffs (int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
 
bool LoadFileLinesToStrings (const char *filename, GenericVector< STRING > *lines)
 
bool WriteFile (const std::string &output_dir, const std::string &lang, const std::string &suffix, const GenericVector< char > &data, FileWriter writer)
 
STRING ReadFile (const std::string &filename, FileReader reader)
 
bool WriteUnicharset (const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)
 
bool WriteRecoder (const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, STRING *radical_table_data, TessdataManager *traineddata)
 
int CombineLangModel (const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)
 
bool NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
 
bool NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
 
char32 OCRNormalize (char32 ch)
 
bool IsOCREquivalent (char32 ch1, char32 ch2)
 
bool IsValidCodepoint (const char32 ch)
 
bool IsWhitespace (const char32 ch)
 
bool IsUTF8Whitespace (const char *text)
 
unsigned int SpanUTF8Whitespace (const char *text)
 
unsigned int SpanUTF8NotWhitespace (const char *text)
 
bool IsInterchangeValid (const char32 ch)
 
bool IsInterchangeValid7BitAscii (const char32 ch)
 
char32 FullwidthToHalfwidth (const char32 ch)
 
void SetupBasicProperties (bool report_errors, bool decompose, UNICHARSET *unicharset)
 
void SetScriptProperties (const std::string &script_dir, UNICHARSET *unicharset)
 
std::string GetXheightString (const std::string &script_dir, const UNICHARSET &unicharset)
 
void SetPropertiesForInputFile (const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)
 
void SetupBasicProperties (bool report_errors, UNICHARSET *unicharset)
 
template<class BLOB_CHOICE >
int SortByUnicharID (const void *void1, const void *void2)
 
template<class BLOB_CHOICE >
int SortByRating (const void *void1, const void *void2)
 
 TEST_F (EquationFinderTest, IdentifySpecialText)
 
 TEST_F (EquationFinderTest, EstimateTypeForUnichar)
 
 TEST_F (EquationFinderTest, IsIndented)
 
 TEST_F (EquationFinderTest, IsNearSmallNeighbor)
 
 TEST_F (EquationFinderTest, CheckSeedBlobsCount)
 
 TEST_F (EquationFinderTest, ComputeForegroundDensity)
 
 TEST_F (EquationFinderTest, CountAlignment)
 
 TEST_F (EquationFinderTest, ComputeCPsSuperBBox)
 
 TEST_F (EquationFinderTest, SplitCPHorLite)
 
 TEST_F (EquationFinderTest, SplitCPHor)
 
 TEST_F (HeapTest, SortTest)
 
 TEST_F (HeapTest, MixedTest)
 
 TEST_F (HeapTest, PopWorstTest)
 
 TEST_F (HeapTest, RevalueTest)
 
 TEST_F (HeapTest, DoublePtrTest)
 
 TEST_F (LSTMTrainerTest, RecodeTestKorBase)
 
 TEST_F (LSTMTrainerTest, RecodeTestKor)
 
 TEST_F (LSTMTrainerTest, EncodeDecodeBothTestKor)
 
 TEST_F (LSTMTrainerTest, TestSquashed)
 
 TEST_F (LSTMTrainerTest, BasicTest)
 
 TEST_F (LSTMTrainerTest, ColorTest)
 
 TEST_F (LSTMTrainerTest, BidiTest)
 
 TEST_F (LSTMTrainerTest, Test2D)
 
 TEST_F (LSTMTrainerTest, TestAdam)
 
 TEST_F (LSTMTrainerTest, SpeedTest)
 
 TEST_F (LSTMTrainerTest, DeterminismTest)
 
 TEST_F (LSTMTrainerTest, SoftmaxBaselineTest)
 
 TEST_F (LSTMTrainerTest, SoftmaxTest)
 
 TEST_F (LSTMTrainerTest, EncodedSoftmaxTest)
 
 TEST_F (LSTMTrainerTest, TestLayerAccess)
 
std::string CodepointList (const std::vector< char32 > &str32)
 
std::string PrintString32WithUnicodes (const std::string &str)
 
std::string PrintStringVectorWithUnicodes (const std::vector< std::string > &glyphs)
 
void ExpectGraphemeModeResults (const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
 
 TEST_F (NthItemTest, GeneralTest)
 
 TEST_F (NthItemTest, BoringTest)
 
 TEST_F (NthItemTest, UniqueTest)
 
 TEST_F (NthItemTest, EqualTest)
 

Variables

const int kMinRectSize = 10
 
const char kTesseractReject = '~'
 
const char kUNLVReject = '~'
 
const char kUNLVSuspect = '^'
 
const int kMaxIntSize = 22
 
const int kNumbersPerBlob = 5
 
const int kBytesPerNumber = 5
 
const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1
 
const int kBytesPer64BitNumber = 20
 
const int kMaxBytesPerLine
 
const int kUniChs []
 
const int kLatinChs []
 
constexpr int kNumOutputsPerRegister = 8
 
constexpr int kMaxOutputRegisters = 8
 
constexpr int kNumInputsPerRegister = 32
 
constexpr int kNumInputsPerGroup = 4
 
constexpr int kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup
 
DotProductFunction DotProduct
 
const float kMathDigitDensityTh1 = 0.25
 
const float kMathDigitDensityTh2 = 0.1
 
const float kMathItalicDensityTh = 0.5
 
const float kUnclearDensityTh = 0.25
 
const int kSeedBlobsCountTh = 10
 
const int kLeftIndentAlignmentCountTh = 1
 
const int kMaxCharTopRange = 48
 
const float kCertaintyScale = 7.0f
 
const float kWorstDictCertainty = -25.0f
 
const int kMaxCircleErosions = 8
 
const ParagraphModelkCrownLeft = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F))
 
const ParagraphModelkCrownRight = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F))
 
const int16_t kMaxBoxEdgeDiff = 2
 
const int kBoxClipTolerance = 2
 
const int kNumEndPoints = 3
 
const int kMinPointsForErrorCount = 16
 
const int kMaxRealDistance = 2.0
 
const int kFeaturePadding = 2
 
const int kImagePadding = 4
 
const int kHistogramSize = 256
 
const int kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)
 
const int kRadicalRadix = 29
 
const char *const kLRM = "\u200E"
 Left-to-Right Mark. More...
 
const char *const kRLM = "\u200F"
 Right-to-Left Mark. More...
 
const char *const kRLE = "\u202A"
 Right-to-Left Embedding. More...
 
const char *const kPDF = "\u202C"
 Pop Directional Formatting. More...
 
const char kUniversalAmbigsFile []
 
const int ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)
 
const int kRandomizingCenter = 128
 
const int case_state_table [6][4]
 
const char kDoNotReverse [] = "RRP_DO_NO_REVERSE"
 
const char kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL"
 
const char kForceReverse [] = "RRP_FORCE_REVERSE"
 
const char *const RTLReversePolicyNames []
 
const double TanhTable []
 
const double LogisticTable []
 
constexpr int kTableSize = 4096
 
constexpr double kScaleFactor = 256.0
 
const int kMaxInputHeight = 48
 
const double kStateClip = 100.0
 
const double kErrClip = 1.0f
 
const double kDictRatio = 2.25
 
const double kCertOffset = -0.085
 
const int kMinWinSize = 500
 
const int kMaxWinSize = 2000
 
const int kXWinFrameSize = 30
 
const int kYWinFrameSize = 80
 
const float kMinCertainty = -20.0f
 
const float kMinProb = exp(kMinCertainty)
 
class tesseract::TFNetworkModelDefaultTypeInternal _TFNetworkModel_default_instance_
 
const int kAdamCorrectionIterations = 200000
 
const double kAdamEpsilon = 1e-8
 
const int kInt8Flag = 1
 
const int kAdamFlag = 4
 
const int kDoubleFlag = 128
 
const int kHistogramBuckets = 16
 
const double kAlignedFraction = 0.03125
 
const double kRaggedFraction = 2.5
 
const double kAlignedGapFraction = 0.75
 
const double kRaggedGapFraction = 1.0
 
const int kVLineAlignment = 3
 
const int kVLineGutter = 1
 
const int kVLineSearchSize = 150
 
const int kMinRaggedTabs = 5
 
const int kMinAlignedTabs = 4
 
const int kVLineMinLength = 500
 
const double kMinTabGradient = 4.0
 
const int kMaxSkewFactor = 15
 
const double kMaxSmallNeighboursPerPix = 1.0 / 32
 
const int kMaxLargeOverlapsWithSmall = 3
 
const int kMaxMediumOverlapsWithSmall = 12
 
const int kMaxLargeOverlapsWithMedium = 12
 
const int kOriginalNoiseMultiple = 8
 
const int kNoisePadding = 4
 
const double kPhotoOffsetFraction = 0.375
 
const double kMinGoodTextPARatio = 1.5
 
const int kMaxIncompatibleColumnCount = 2
 
const double kHorizontalGapMergeFraction = 0.5
 
const double kMinGutterWidthGrid = 0.5
 
const double kMaxDistToPartSizeRatio = 1.5
 
const double kMaxSpacingDrift = 1.0 / 72
 
const double kMaxTopSpacingFraction = 0.25
 
const double kMaxSameBlockLineSpacing = 3
 
const double kMaxSizeRatio = 1.5
 
const double kMaxLeaderGapFractionOfMax = 0.25
 
const double kMaxLeaderGapFractionOfMin = 0.5
 
const int kMinLeaderCount = 5
 
const int kMinStrongTextValue = 6
 
const int kMinChainTextValue = 3
 
const int kHorzStrongTextlineCount = 8
 
const int kHorzStrongTextlineHeight = 10
 
const int kHorzStrongTextlineAspect = 5
 
const double kMaxBaselineError = 0.4375
 
const double kMinBaselineCoverage = 0.5
 
const int kMaxRMSColorNoise = 128
 
const int kMaxColorDistance = 900
 
const int kRGBRMSColors = 4
 
const int kMaxPadFactor = 6
 
const int kMaxNeighbourDistFactor = 4
 
const int kMaxCaptionLines = 7
 
const double kMinCaptionGapRatio = 2.0
 
const double kMinCaptionGapHeightRatio = 0.5
 
const double kMarginOverlapFraction = 0.25
 
const double kBigPartSizeRatio = 1.75
 
const double kTinyEnoughTextlineOverlapFraction = 0.25
 
const double kMaxPartitionSpacing = 1.75
 
const int kSmoothDecisionMargin = 4
 
const double kMinColumnWidth = 2.0 / 3
 
const double kMinRectangularFraction = 0.125
 
const double kMaxRectangularFraction = 0.75
 
const double kMaxRectangularGradient = 0.1
 
const int kMinImageFindSize = 100
 
const double kRMSFitScaling = 8.0
 
const int kMinColorDifference = 16
 
const int kThinLineFraction = 20
 Denominator of resolution makes max pixel width to allow thin lines. More...
 
const int kMinLineLengthFraction = 4
 Denominator of resolution makes min pixels to demand line lengths to be. More...
 
const int kCrackSpacing = 100
 Spacing of cracks across the page to break up tall vertical lines. More...
 
const int kLineFindGridSize = 50
 Grid size used by line finder. Not very critical. More...
 
const int kMinThickLineWidth = 12
 
const int kMaxLineResidue = 6
 
const double kThickLengthMultiple = 0.75
 
const double kMaxNonLineDensity = 0.25
 
const double kMaxStaveHeight = 1.0
 
const double kMinMusicPixelFraction = 0.75
 
const double kStrokeWidthFractionTolerance = 0.125
 
const double kStrokeWidthTolerance = 1.5
 
const double kStrokeWidthFractionCJK = 0.25
 
const double kStrokeWidthCJK = 2.0
 
const int kCJKRadius = 2
 
const double kCJKBrokenDistanceFraction = 0.25
 
const int kCJKMaxComponents = 8
 
const double kCJKAspectRatio = 1.25
 
const double kCJKAspectRatioIncrease = 1.0625
 
const int kMaxCJKSizeRatio = 5
 
const double kBrokenCJKIterationFraction = 0.125
 
const double kDiacriticXPadRatio = 7.0
 
const double kDiacriticYPadRatio = 1.75
 
const double kMinDiacriticSizeRatio = 1.0625
 
const double kMaxDiacriticDistanceRatio = 1.25
 
const double kMaxDiacriticGapToBaseCharHeight = 1.0
 
const int kLineTrapLongest = 4
 
const int kLineTrapShortest = 2
 
const int kMostlyOneDirRatio = 3
 
const double kLineResidueAspectRatio = 8.0
 
const int kLineResiduePadRatio = 3
 
const double kLineResidueSizeRatio = 1.75
 
const float kSizeRatioToReject = 2.0
 
const double kNeighbourSearchFactor = 2.5
 
const double kNoiseOverlapGrowthFactor = 4.0
 
const double kNoiseOverlapAreaFactor = 1.0 / 512
 
const int kTabRadiusFactor = 5
 
const int kMinVerticalSearch = 3
 
const int kMaxVerticalSearch = 12
 
const int kMaxRaggedSearch = 25
 
const int kMinLinesInColumn = 10
 
const double kMinFractionalLinesInColumn = 0.125
 
const double kMaxGutterWidthAbsolute = 2.00
 
const int kRaggedGutterMultiple = 5
 
const double kLineFragmentAspectRatio = 10.0
 
const int kMinEvaluatedTabs = 3
 
const double kCosMaxSkewAngle = 0.866025
 
const int kColumnWidthFactor = 20
 
const int kMaxVerticalSpacing = 500
 
const int kMaxBlobWidth = 500
 
const double kSplitPartitionSize = 2.0
 
const double kAllowTextHeight = 0.5
 
const double kAllowTextWidth = 0.6
 
const double kAllowTextArea = 0.8
 
const double kAllowBlobHeight = 0.3
 
const double kAllowBlobWidth = 0.4
 
const double kAllowBlobArea = 0.05
 
const int kMinBoxesInTextPartition = 10
 
const int kMaxBoxesInDataPartition = 20
 
const double kMaxGapInTextPartition = 4.0
 
const double kMinMaxGapInTextPartition = 0.5
 
const double kMaxBlobOverlapFactor = 4.0
 
const double kMaxTableCellXheight = 2.0
 
const int kMaxColumnHeaderDistance = 4
 
const double kTableColumnThreshold = 3.0
 
const double kMinOverlapWithTable = 0.6
 
const int kSideSpaceMargin = 10
 
const double kSmallTableProjectionThreshold = 0.35
 
const double kLargeTableProjectionThreshold = 0.45
 
const int kLargeTableRowCount = 6
 
const int kMinRowsInTable = 3
 
const int kAdjacentLeaderSearchPadding = 2
 
const double kParagraphEndingPreviousLineRatio = 1.3
 
const double kMaxParagraphEndingLeftSpaceMultiple = 3.0
 
const double kMinParagraphEndingTextToWhitespaceRatio = 3.0
 
const double kMaxXProjectionGapFactor = 2.0
 
const double kStrokeWidthFractionalTolerance = 0.25
 
const double kStrokeWidthConstantTolerance = 2.0
 
const double kHorizontalSpacing = 0.30
 
const double kVerticalSpacing = -0.2
 
const int kCellSplitRowThreshold = 0
 
const int kCellSplitColumnThreshold = 0
 
const int kLinedTableMinVerticalLines = 3
 
const int kLinedTableMinHorizontalLines = 3
 
const double kRequiredColumns = 0.7
 
const double kMarginFactor = 1.1
 
const double kMaxRowSize = 2.5
 
const double kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 }
 
const int kGoodRowNumberOfColumnsSmallSize
 
const double kGoodRowNumberOfColumnsLarge = 0.7
 
const double kMinFilledArea = 0.35
 
const int kGutterMultiple = 4
 
const int kGutterToNeighbourRatio = 3
 
const int kSimilarVectorDist = 10
 
const int kSimilarRaggedDist = 50
 
const int kMaxFillinMultiple = 11
 
const double kMinGutterFraction = 0.5
 
const double kLineCountReciprocal = 4.0
 
const double kMinAlignedGutter = 0.25
 
const double kMinRaggedGutter = 1.5
 
double textord_tabvector_vertical_gap_fraction = 0.5
 
double textord_tabvector_vertical_box_ratio = 0.5
 
const int kMaxLineLength = 1024
 
const float kRotationRange = 0.02f
 
const int kExposureFactor = 16
 
const int kSaltnPepper = 5
 
const int kMinRampSize = 1000
 
const double kRatingEpsilon = 1.0 / 32
 
const int kMaxOffsetDist = 32
 
const int kMinLigature = 0xfb00
 
const int kMaxLigature = 0xfb17
 
const double kMinDivergenceRate = 50.0
 
const int kMinStallIterations = 10000
 
const double kSubTrainerMarginFraction = 3.0 / 128
 
const double kLearningRateDecay = M_SQRT1_2
 
const int kNumAdjustmentIterations = 100
 
const int kErrorGraphInterval = 1000
 
const int kNumPagesPerBatch = 100
 
const int kMinStartedErrorRate = 75
 
const double kStageTransitionThreshold = 10.0
 
const double kHighConfidence = 0.9375
 
const double kImprovementFraction = 15.0 / 16.0
 
const double kBestCheckpointFraction = 31.0 / 32.0
 
const int kTargetXScale = 5
 
const int kTargetYScale = 100
 
const int kMinClusteredShapes = 1
 
const int kMaxUnicharsPerCluster = 2000
 
const float kFontMergeDistance = 0.025
 
const float kInfiniteDist = 999.0f
 
const int kDefaultResolution = 300
 
const int kTestChar = -1
 
const int kSquareLimit = 25
 
const int kPrime1 = 17
 
const int kPrime2 = 13
 
int test_data [] = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}
 
const int kTrainerIterations = 600
 
const int kBatchIterations = 100
 

Detailed Description

The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:

*   <UTF8 str> <left> <bottom> <right> <top> <page id>
* 

and for word/line-level boxes:

*   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
* 

NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.

<page id>=""> is 0-based, and the page number is used for multipage input (tiff).

In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)

In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.

* word -> #w o r d
* multi word line -> #m u l t i w o r d l i n e
* 

The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.

In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.

Make a word from the selected blobs and run Tess on them.

Parameters
page_resrecognise blobs
selection_boxwithin this box

fp_eval_word_spacing() Evaluation function for fixed pitch word lists.

Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars

build_menu()

Construct the menu tree used by the command window

process_cmd_win_event()

Process a command returned from the command window (Just call the appropriate command handler)

word_blank_and_set_display() Word processor

Blank display of word then redisplay word according to current display mode settings


Include Files and Type Defines


Public Function Prototypes


Include Files and Type Defines


Include Files and Type Defines

Typedef Documentation

◆ BlobGridSearch

using tesseract::BlobGridSearch = typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>

Definition at line 31 of file blobgrid.h.

◆ char32

using tesseract::char32 = typedef signed int

Definition at line 53 of file unichar.h.

◆ ColPartitionGridSearch

using tesseract::ColPartitionGridSearch = typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>

Definition at line 935 of file colpartition.h.

◆ ColSegmentGrid

using tesseract::ColSegmentGrid = typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 117 of file tablefind.h.

◆ ColSegmentGridSearch

using tesseract::ColSegmentGridSearch = typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 120 of file tablefind.h.

◆ DawgVector

Definition at line 53 of file dict.h.

◆ DictFunc

using tesseract::DictFunc = typedef int (Dict::*)(void*, const UNICHARSET&, UNICHAR_ID, bool) const

Definition at line 80 of file baseapi.h.

◆ DotProductFunction

using tesseract::DotProductFunction = typedef double (*)(const double*, const double*, int)

Definition at line 25 of file simddetect.h.

◆ FileReader

using tesseract::FileReader = typedef bool (*)(const char* filename, GenericVector<char>* data)

Definition at line 47 of file serialis.h.

◆ FileWriter

using tesseract::FileWriter = typedef bool (*)(const GenericVector<char>& data, const char* filename)

Definition at line 51 of file serialis.h.

◆ FillLatticeFunc

using tesseract::FillLatticeFunc = typedef void (Wordrec::*)(const MATRIX&, const WERD_CHOICE_LIST&, const UNICHARSET&, BlamerBundle*)

Definition at line 86 of file baseapi.h.

◆ IntKDPair

using tesseract::IntKDPair = typedef KDPairInc<int, int>

Definition at line 179 of file kdpair.h.

◆ LanguageModelFlagsType

using tesseract::LanguageModelFlagsType = typedef unsigned char

Used for expressing various language model flags.

Definition at line 37 of file lm_state.h.

◆ LigHash

using tesseract::LigHash = typedef std::unordered_map<std::string, std::string, StringHash>

Definition at line 53 of file ligature_table.h.

◆ NodeChildVector

Definition at line 62 of file dawg.h.

◆ PainPointHeap

Definition at line 37 of file lm_pain_points.h.

◆ ParamsModelClassifyFunc

using tesseract::ParamsModelClassifyFunc = typedef float (Dict::*)(const char*, void*)

Definition at line 83 of file baseapi.h.

◆ ParamsTrainingHypothesisList

◆ PartSetVector

Definition at line 33 of file colpartitionset.h.

◆ ProbabilityInContextFunc

using tesseract::ProbabilityInContextFunc = typedef double (Dict::*)(const char*, const char*, int, const char*, int)

Definition at line 82 of file baseapi.h.

◆ RecodeHeap

Definition at line 177 of file recodebeam.h.

◆ RecodePair

using tesseract::RecodePair = typedef KDPairInc<double, RecodeNode>

Definition at line 176 of file recodebeam.h.

◆ RSCounts

using tesseract::RSCounts = typedef std::unordered_map<int, int>

Definition at line 48 of file unicharcompress.cpp.

◆ RSMap

using tesseract::RSMap = typedef std::unordered_map<int, std::unique_ptr<std::vector<int> >>

Definition at line 46 of file unicharcompress.cpp.

◆ SetOfModels

Definition at line 98 of file paragraphs_internal.h.

◆ ShapeQueue

Definition at line 155 of file shapetable.h.

◆ SuccessorList

Definition at line 63 of file dawg.h.

◆ SuccessorListsVector

Definition at line 64 of file dawg.h.

◆ TestCallback

using tesseract::TestCallback = typedef std::function<STRING(int, const double*, const TessdataManager&, int)>

Definition at line 73 of file lstmtrainer.h.

◆ TruthCallback

using tesseract::TruthCallback = typedef std::function<void(const UNICHARSET&, int, PageIterator*, Pix*)>

Definition at line 88 of file baseapi.h.

◆ UnicharAmbigsVector

using tesseract::UnicharAmbigsVector = typedef GenericVector<AmbigSpec_LIST *>

Definition at line 134 of file ambigs.h.

◆ UnicharIdVector

Definition at line 35 of file ambigs.h.

◆ WidthCallback

using tesseract::WidthCallback = typedef std::function<bool(int)>

Definition at line 35 of file tabfind.h.

◆ WordGrid

using tesseract::WordGrid = typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 65 of file textord.h.

◆ WordRecognizer

using tesseract::WordRecognizer = typedef void (Tesseract::*)(const WordData&, WERD_RES**, PointerVector<WERD_RES>*)

Definition at line 170 of file tesseractclass.h.

◆ WordSearch

using tesseract::WordSearch = typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 66 of file textord.h.

Enumeration Type Documentation

◆ AmbigType

Enumerator
NOT_AMBIG 
REPLACE_AMBIG 
DEFINITE_AMBIG 
SIMILAR_AMBIG 
CASE_AMBIG 
AMBIG_TYPE_COUNT 

Definition at line 37 of file ambigs.h.

37  {
38  NOT_AMBIG, // the ngram pair is not ambiguous
39  REPLACE_AMBIG, // ocred ngram should always be substituted with correct
40  DEFINITE_AMBIG, // add correct ngram to the classifier results (1-1)
41  SIMILAR_AMBIG, // use pairwise classifier for ocred/correct pair (1-1)
42  CASE_AMBIG, // this is a case ambiguity (1-1)
43 
44  AMBIG_TYPE_COUNT // number of enum entries
45 };

◆ CachingStrategy

Enumerator
CS_SEQUENTIAL 
CS_ROUND_ROBIN 

Definition at line 41 of file imagedata.h.

41  {
42  // Reads all of one file before moving on to the next. Requires samples to be
43  // shuffled across files. Uses the count of samples in the first file as
44  // the count in all the files to achieve high-speed random access. As a
45  // consequence, if subsequent files are smaller, they get entries used more
46  // than once, and if subsequent files are larger, some entries are not used.
47  // Best for larger data sets that don't fit in memory.
49  // Reads one sample from each file in rotation. Does not require shuffled
50  // samples, but is extremely disk-intensive. Samples in smaller files also
51  // get used more often than samples in larger files.
52  // Best for smaller data sets that mostly fit in memory.
54 };

◆ CharSegmentationType

Enumerator
CST_FRAGMENT 
CST_WHOLE 
CST_IMPROPER 
CST_NGRAM 

Definition at line 96 of file classify.h.

96  {
97  CST_FRAGMENT, // A partial character.
98  CST_WHOLE, // A correctly segmented character.
99  CST_IMPROPER, // More than one but less than 2 characters.
100  CST_NGRAM // Multiple characters.
101 };

◆ CMD_EVENTS

Enumerator
ACTION_1_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
ACTION_2_CMD_EVENT 

Definition at line 486 of file tessedit.cpp.

◆ ColSegType

Enumerator
COL_UNKNOWN 
COL_TEXT 
COL_TABLE 
COL_MIXED 
COL_COUNT 

Definition at line 29 of file tablefind.h.

29  {
31  COL_TEXT,
32  COL_TABLE,
33  COL_MIXED,
34  COL_COUNT
35 };

◆ ColumnSpanningType

Enumerator
CST_NOISE 
CST_FLOWING 
CST_HEADING 
CST_PULLOUT 
CST_COUNT 

Definition at line 47 of file colpartition.h.

47  {
48  CST_NOISE, // Strictly between columns.
49  CST_FLOWING, // Strictly within a single column.
50  CST_HEADING, // Spans multiple columns.
51  CST_PULLOUT, // Touches multiple columns, but doesn't span them.
52  CST_COUNT // Number of entries.
53 };

◆ CountTypes

Enumerator
CT_UNICHAR_TOP_OK 
CT_UNICHAR_TOP1_ERR 
CT_UNICHAR_TOP2_ERR 
CT_UNICHAR_TOPN_ERR 
CT_UNICHAR_TOPTOP_ERR 
CT_OK_MULTI_UNICHAR 
CT_OK_JOINED 
CT_OK_BROKEN 
CT_REJECT 
CT_FONT_ATTR_ERR 
CT_OK_MULTI_FONT 
CT_NUM_RESULTS 
CT_RANK 
CT_REJECTED_JUNK 
CT_ACCEPTED_JUNK 
CT_SIZE 

Definition at line 69 of file errorcounter.h.

69  {
70  CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id.
71  // The rank of the results in TOP1, TOP2, TOPN is determined by a gap of
72  // kRatingEpsilon from the first result in each group. The real top choice
73  // is measured using TOPTOP.
74  CT_UNICHAR_TOP1_ERR, // Top shape does not contain correct unichar id.
75  CT_UNICHAR_TOP2_ERR, // Top 2 shapes don't contain correct unichar id.
76  CT_UNICHAR_TOPN_ERR, // No output shape contains correct unichar id.
77  CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct.
78  CT_OK_MULTI_UNICHAR, // Top shape id has correct unichar id, and others.
79  CT_OK_JOINED, // Top shape id is correct but marked joined.
80  CT_OK_BROKEN, // Top shape id is correct but marked broken.
81  CT_REJECT, // Classifier hates this.
82  CT_FONT_ATTR_ERR, // Top unichar OK, but font attributes incorrect.
83  CT_OK_MULTI_FONT, // CT_FONT_ATTR_OK but there are multiple font attrs.
84  CT_NUM_RESULTS, // Number of answers produced.
85  CT_RANK, // Rank of correct answer.
86  CT_REJECTED_JUNK, // Junk that was correctly rejected.
87  CT_ACCEPTED_JUNK, // Junk that was incorrectly classified otherwise.
88 
89  CT_SIZE // Number of types for array sizing.
90 };

◆ DawgType

Enumerator
DAWG_TYPE_PUNCTUATION 
DAWG_TYPE_WORD 
DAWG_TYPE_NUMBER 
DAWG_TYPE_PATTERN 
DAWG_TYPE_COUNT 

Definition at line 66 of file dawg.h.

67  {
72 
73  DAWG_TYPE_COUNT // number of enum entries

◆ ErrorTypes

Enumerator
ET_RMS 
ET_DELTA 
ET_WORD_RECERR 
ET_CHAR_ERROR 
ET_SKIP_RATIO 
ET_COUNT 

Definition at line 37 of file lstmtrainer.h.

37  {
38  ET_RMS, // RMS activation error.
39  ET_DELTA, // Number of big errors in deltas.
40  ET_WORD_RECERR, // Output text string word recall error.
41  ET_CHAR_ERROR, // Output text string total char error.
42  ET_SKIP_RATIO, // Fraction of samples skipped.
43  ET_COUNT // For array sizing.
44 };

◆ FactorNames

Enumerator
FN_INCOLOR 
FN_Y0 
FN_Y1 
FN_Y2 
FN_Y3 
FN_X0 
FN_X1 
FN_SHEAR 
FN_NUM_FACTORS 

Definition at line 58 of file degradeimage.cpp.

92  {

◆ FlexDimensions

Enumerator
FD_BATCH 
FD_HEIGHT 
FD_WIDTH 
FD_DIMSIZE 

Definition at line 32 of file stridemap.h.

32  {
33  FD_BATCH, // Index of multiple images.
34  FD_HEIGHT, // y-coordinate in image.
35  FD_WIDTH, // x-coordinate in image.
36  FD_DIMSIZE, // Number of flexible non-depth dimensions.
37 };

◆ GraphemeNorm

Enumerator
kNone 
kNormalize 

Definition at line 65 of file normstrngs.h.

◆ GraphemeNormMode

Enumerator
kSingleString 
kCombined 
kGlyphSplit 
kIndividualUnicodes 

Definition at line 48 of file validator.h.

52  : char32 {
53  kNonVirama = 0,
54  kDevanagari = 0x900,
55  kBengali = 0x980,
56  kGurmukhi = 0xa00,
57  kGujarati = 0xa80,
58  kOriya = 0xb00,
59  kTamil = 0xb80,
60  kTelugu = 0xc00,
61  kKannada = 0xc80,

◆ kParamsTrainingFeatureType

Enumerator
PTRAIN_DIGITS_SHORT 
PTRAIN_DIGITS_MED 
PTRAIN_DIGITS_LONG 
PTRAIN_NUM_SHORT 
PTRAIN_NUM_MED 
PTRAIN_NUM_LONG 
PTRAIN_DOC_SHORT 
PTRAIN_DOC_MED 
PTRAIN_DOC_LONG 
PTRAIN_DICT_SHORT 
PTRAIN_DICT_MED 
PTRAIN_DICT_LONG 
PTRAIN_FREQ_SHORT 
PTRAIN_FREQ_MED 
PTRAIN_FREQ_LONG 
PTRAIN_SHAPE_COST_PER_CHAR 
PTRAIN_NGRAM_COST_PER_CHAR 
PTRAIN_NUM_BAD_PUNC 
PTRAIN_NUM_BAD_CASE 
PTRAIN_XHEIGHT_CONSISTENCY 
PTRAIN_NUM_BAD_CHAR_TYPE 
PTRAIN_NUM_BAD_SPACING 
PTRAIN_NUM_BAD_FONT 
PTRAIN_RATING_PER_CHAR 
PTRAIN_NUM_FEATURE_TYPES 

Definition at line 39 of file params_training_featdef.h.

39  {
40  // Digits
42  PTRAIN_DIGITS_MED, // 1
43  PTRAIN_DIGITS_LONG, // 2
44  // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
45  PTRAIN_NUM_SHORT, // 3
46  PTRAIN_NUM_MED, // 4
47  PTRAIN_NUM_LONG, // 5
48  // Document word (DOC_DAWG_PERM)
49  PTRAIN_DOC_SHORT, // 6
50  PTRAIN_DOC_MED, // 7
51  PTRAIN_DOC_LONG, // 8
52  // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
53  PTRAIN_DICT_SHORT, // 9
54  PTRAIN_DICT_MED, // 10
55  PTRAIN_DICT_LONG, // 11
56  // Frequent word (FREQ_DAWG_PERM)
57  PTRAIN_FREQ_SHORT, // 12
58  PTRAIN_FREQ_MED, // 13
59  PTRAIN_FREQ_LONG, // 14
62  PTRAIN_NUM_BAD_PUNC, // 17
63  PTRAIN_NUM_BAD_CASE, // 18
67  PTRAIN_NUM_BAD_FONT, // 22
69 
71 };

◆ LeftOrRight

Enumerator
LR_LEFT 
LR_RIGHT 

Definition at line 39 of file strokewidth.h.

39  {
40  LR_LEFT,
41  LR_RIGHT
42 };

◆ LineType

Enumerator
LT_START 
LT_BODY 
LT_UNKNOWN 
LT_MULTIPLE 

Definition at line 49 of file paragraphs_internal.h.

50  {
51  LT_START = 'S', // First line of a paragraph.
52  LT_BODY = 'C', // Continuation line of a paragraph.
53  LT_UNKNOWN = 'U', // No clues.
54  LT_MULTIPLE = 'M', // Matches for both LT_START and LT_BODY.

◆ LMPainPointsType

Enumerator
LM_PPTYPE_BLAMER 
LM_PPTYPE_AMBIG 
LM_PPTYPE_PATH 
LM_PPTYPE_SHAPE 
LM_PPTYPE_NUM 

Definition at line 40 of file lm_pain_points.h.

40  {
45 
47 };

◆ LossType

Enumerator
LT_NONE 
LT_CTC 
LT_SOFTMAX 
LT_LOGISTIC 

Definition at line 29 of file static_shape.h.

29  {
30  LT_NONE, // Undefined.
31  LT_CTC, // Softmax with standard CTC for training/decoding.
32  LT_SOFTMAX, // Outputs sum to 1 in fixed positions.
33  LT_LOGISTIC, // Logistic outputs with independent values.
34 };

◆ NeighbourPartitionType

Enumerator
NPT_HTEXT 
NPT_VTEXT 
NPT_WEAK_HTEXT 
NPT_WEAK_VTEXT 
NPT_IMAGE 
NPT_COUNT 

Definition at line 1501 of file colpartitiongrid.cpp.

1501  {
1502  NPT_HTEXT, // Definite horizontal text.
1503  NPT_VTEXT, // Definite vertical text.
1504  NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but
1505  // image for image and VTEXT.
1506  NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but
1507  // image for image and HTEXT.
1508  NPT_IMAGE, // Defininte non-text.
1509  NPT_COUNT // Number of array elements.
1510 };

◆ NetworkFlags

Enumerator
NF_LAYER_SPECIFIC_LR 
NF_ADAM 

Definition at line 85 of file network.h.

85  {
86  // Network forward/backprop behavior.
87  NF_LAYER_SPECIFIC_LR = 64, // Separate learning rate for each layer.
88  NF_ADAM = 128, // Weight-specific learning rate.
89 };

◆ NetworkType

Enumerator
NT_NONE 
NT_INPUT 
NT_CONVOLVE 
NT_MAXPOOL 
NT_PARALLEL 
NT_REPLICATED 
NT_PAR_RL_LSTM 
NT_PAR_UD_LSTM 
NT_PAR_2D_LSTM 
NT_SERIES 
NT_RECONFIG 
NT_XREVERSED 
NT_YREVERSED 
NT_XYTRANSPOSE 
NT_LSTM 
NT_LSTM_SUMMARY 
NT_LOGISTIC 
NT_POSCLIP 
NT_SYMCLIP 
NT_TANH 
NT_RELU 
NT_LINEAR 
NT_SOFTMAX 
NT_SOFTMAX_NO_CTC 
NT_LSTM_SOFTMAX 
NT_LSTM_SOFTMAX_ENCODED 
NT_TENSORFLOW 
NT_COUNT 

Definition at line 43 of file network.h.

43  {
44  NT_NONE, // The naked base class.
45  NT_INPUT, // Inputs from an image.
46  // Plumbing networks combine other networks or rearrange the inputs.
47  NT_CONVOLVE, // Duplicates inputs in a sliding window neighborhood.
48  NT_MAXPOOL, // Chooses the max result from a rectangle.
49  NT_PARALLEL, // Runs networks in parallel.
50  NT_REPLICATED, // Runs identical networks in parallel.
51  NT_PAR_RL_LSTM, // Runs LTR and RTL LSTMs in parallel.
52  NT_PAR_UD_LSTM, // Runs Up and Down LSTMs in parallel.
53  NT_PAR_2D_LSTM, // Runs 4 LSTMs in parallel.
54  NT_SERIES, // Executes a sequence of layers.
55  NT_RECONFIG, // Scales the time/y size but makes the output deeper.
56  NT_XREVERSED, // Reverses the x direction of the inputs/outputs.
57  NT_YREVERSED, // Reverses the y-direction of the inputs/outputs.
58  NT_XYTRANSPOSE, // Transposes x and y (for just a single op).
59  // Functional networks actually calculate stuff.
60  NT_LSTM, // Long-Short-Term-Memory block.
61  NT_LSTM_SUMMARY, // LSTM that only keeps its last output.
62  NT_LOGISTIC, // Fully connected logistic nonlinearity.
63  NT_POSCLIP, // Fully connected rect lin version of logistic.
64  NT_SYMCLIP, // Fully connected rect lin version of tanh.
65  NT_TANH, // Fully connected with tanh nonlinearity.
66  NT_RELU, // Fully connected with rectifier nonlinearity.
67  NT_LINEAR, // Fully connected with no nonlinearity.
68  NT_SOFTMAX, // Softmax uses exponential normalization, with CTC.
69  NT_SOFTMAX_NO_CTC, // Softmax uses exponential normalization, no CTC.
70  // The SOFTMAX LSTMs both have an extra softmax layer on top, but inside, with
71  // the outputs fed back to the input of the LSTM at the next timestep.
72  // The ENCODED version binary encodes the softmax outputs, providing log2 of
73  // the number of outputs as additional inputs, and the other version just
74  // provides all the softmax outputs as additional inputs.
75  NT_LSTM_SOFTMAX, // 1-d LSTM with built-in fully connected softmax.
76  NT_LSTM_SOFTMAX_ENCODED, // 1-d LSTM with built-in binary encoded softmax.
77  // A TensorFlow graph encapsulated as a Tesseract network.
79 
80  NT_COUNT // Array size.
81 };

◆ NodeContinuation

Enumerator
NC_ANYTHING 
NC_ONLY_DUP 
NC_NO_DUP 
NC_COUNT 

Definition at line 73 of file recodebeam.h.

73  {
74  NC_ANYTHING, // This node used just its own score, so anything can follow.
75  NC_ONLY_DUP, // The current node combined another score with the score for
76  // itself, without a stand-alone duplicate before, so must be
77  // followed by a stand-alone duplicate.
78  NC_NO_DUP, // The current node combined another score with the score for
79  // itself, after a stand-alone, so can only be followed by
80  // something other than a duplicate of the current node.
81  NC_COUNT
82 };

◆ NormalizationMode

Enumerator
NM_BASELINE 
NM_CHAR_ISOTROPIC 
NM_CHAR_ANISOTROPIC 

Definition at line 41 of file normalis.h.

42  {
43  NM_BASELINE = -3, // The original BL normalization mode.
44  NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
45  NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.

◆ OcrEngineMode

When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.

ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.

Enumerator
OEM_TESSERACT_ONLY 
OEM_LSTM_ONLY 
OEM_TESSERACT_LSTM_COMBINED 
OEM_DEFAULT 
OEM_COUNT 

Definition at line 265 of file publictypes.h.

265  {
266  OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
267  OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
268  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
269  // to Tesseract when things get difficult.
270  // deprecated
271  OEM_DEFAULT, // Specify this mode when calling init_*(),
272  // to indicate that any of the above modes
273  // should be automatically inferred from the
274  // variables in the language-specific config,
275  // command-line configs, or if not specified
276  // in any of the above should be set to the
277  // default OEM_TESSERACT_ONLY.
278  OEM_COUNT // Number of OEMs
279 };

◆ OCRNorm

enum tesseract::OCRNorm
strong
Enumerator
kNone 
kNormalize 

Definition at line 57 of file normstrngs.h.

◆ Orientation

+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto

If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.

In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).

The values of this enum match the convention of Tesseract's osdetect.h

Enumerator
ORIENTATION_PAGE_UP 
ORIENTATION_PAGE_RIGHT 
ORIENTATION_PAGE_DOWN 
ORIENTATION_PAGE_LEFT 

Definition at line 116 of file publictypes.h.

116  {
121 };

◆ PageIteratorLevel

enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.

Enumerator
RIL_BLOCK 
RIL_PARA 
RIL_TEXTLINE 
RIL_WORD 
RIL_SYMBOL 

Definition at line 216 of file publictypes.h.

216  {
217  RIL_BLOCK, // Block of text/image/separator line.
218  RIL_PARA, // Paragraph within a block.
219  RIL_TEXTLINE, // Line within a paragraph.
220  RIL_WORD, // Word within a textline.
221  RIL_SYMBOL // Symbol/character within a word.
222 };

◆ PageSegMode

Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.

Enumerator
PSM_OSD_ONLY 

Orientation and script detection only.

PSM_AUTO_OSD 

Automatic page segmentation with orientation and script detection. (OSD)

PSM_AUTO_ONLY 

Automatic page segmentation, but no OSD, or OCR.

PSM_AUTO 

Fully automatic page segmentation, but no OSD.

PSM_SINGLE_COLUMN 

Assume a single column of text of variable sizes.

PSM_SINGLE_BLOCK_VERT_TEXT 

Assume a single uniform block of vertically aligned text.

PSM_SINGLE_BLOCK 

Assume a single uniform block of text. (Default.)

PSM_SINGLE_LINE 

Treat the image as a single text line.

PSM_SINGLE_WORD 

Treat the image as a single word.

PSM_CIRCLE_WORD 

Treat the image as a single word in a circle.

PSM_SINGLE_CHAR 

Treat the image as a single character.

PSM_SPARSE_TEXT 

Find as much text as possible in no particular order.

PSM_SPARSE_TEXT_OSD 

Sparse text with orientation and script det.

PSM_RAW_LINE 

Treat the image as a single text line, bypassing hacks that are Tesseract-specific.

PSM_COUNT 

Number of enum entries.

Definition at line 159 of file publictypes.h.

159  {
160  PSM_OSD_ONLY = 0,
161  PSM_AUTO_OSD = 1,
162  PSM_AUTO_ONLY = 2,
164  PSM_AUTO = 3,
165  PSM_SINGLE_COLUMN = 4,
167  PSM_SINGLE_BLOCK = 6,
169  PSM_SINGLE_LINE = 7,
170  PSM_SINGLE_WORD = 8,
171  PSM_CIRCLE_WORD = 9,
172  PSM_SINGLE_CHAR = 10,
174  11,
175  PSM_SPARSE_TEXT_OSD = 12,
176  PSM_RAW_LINE = 13,
177 
179  PSM_COUNT
180 };

◆ ParagraphJustification

JUSTIFICATION_UNKNOWN The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.

NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.

Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.

JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.

JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.

JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.

Enumerator
JUSTIFICATION_UNKNOWN 
JUSTIFICATION_LEFT 
JUSTIFICATION_CENTER 
JUSTIFICATION_RIGHT 

Definition at line 248 of file publictypes.h.

◆ PartitionFindResult

Enumerator
PFR_OK 
PFR_SKEW 
PFR_NOISE 

Definition at line 46 of file strokewidth.h.

46  {
47  PFR_OK, // Everything is OK.
48  PFR_SKEW, // Skew was detected and rotated.
49  PFR_NOISE // Noise was detected and removed.
50 };

◆ ScriptPos

Enumerator
SP_NORMAL 
SP_SUBSCRIPT 
SP_SUPERSCRIPT 
SP_DROPCAP 

Definition at line 250 of file ratngs.h.

251  {
252  SP_NORMAL,
253  SP_SUBSCRIPT,
255  SP_DROPCAP

◆ SerializeAmount

Enumerator
LIGHT 
NO_BEST_TRAINER 
FULL 

Definition at line 56 of file lstmtrainer.h.

56  {
57  LIGHT, // Minimal data for remote training.
58  NO_BEST_TRAINER, // Save an empty vector in place of best_trainer_.
59  FULL, // All data including best_trainer_.
60 };

◆ SetParamConstraint

Enumerator
SET_PARAM_CONSTRAINT_NONE 
SET_PARAM_CONSTRAINT_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_INIT_ONLY 

Definition at line 49 of file params.h.

50  {
51  public:
52  // Reads a file of parameter definitions and set/modify the values therein.
53  // If the filename begins with a + or -, the BoolVariables will be
54  // ORed or ANDed with any current values.

◆ SubTrainerResult

Enumerator
STR_NONE 
STR_UPDATED 
STR_REPLACED 

Definition at line 63 of file lstmtrainer.h.

63  {
64  STR_NONE, // Did nothing as not good enough.
65  STR_UPDATED, // Subtrainer was updated, but didn't replace *this.
66  STR_REPLACED // Subtrainer replaced *this.
67 };

◆ TabAlignment

Enumerator
TA_LEFT_ALIGNED 
TA_LEFT_RAGGED 
TA_CENTER_JUSTIFIED 
TA_RIGHT_ALIGNED 
TA_RIGHT_RAGGED 
TA_SEPARATOR 
TA_COUNT 

Definition at line 44 of file tabvector.h.

◆ TessdataType

Enumerator
TESSDATA_LANG_CONFIG 
TESSDATA_UNICHARSET 
TESSDATA_AMBIGS 
TESSDATA_INTTEMP 
TESSDATA_PFFMTABLE 
TESSDATA_NORMPROTO 
TESSDATA_PUNC_DAWG 
TESSDATA_SYSTEM_DAWG 
TESSDATA_NUMBER_DAWG 
TESSDATA_FREQ_DAWG 
TESSDATA_FIXED_LENGTH_DAWGS 
TESSDATA_CUBE_UNICHARSET 
TESSDATA_CUBE_SYSTEM_DAWG 
TESSDATA_SHAPE_TABLE 
TESSDATA_BIGRAM_DAWG 
TESSDATA_UNAMBIG_DAWG 
TESSDATA_PARAMS_MODEL 
TESSDATA_LSTM 
TESSDATA_LSTM_PUNC_DAWG 
TESSDATA_LSTM_SYSTEM_DAWG 
TESSDATA_LSTM_NUMBER_DAWG 
TESSDATA_LSTM_UNICHARSET 
TESSDATA_LSTM_RECODER 
TESSDATA_VERSION 
TESSDATA_NUM_ENTRIES 

Definition at line 56 of file tessdatamanager.h.

56  {
59  TESSDATA_AMBIGS, // 2
60  TESSDATA_INTTEMP, // 3
61  TESSDATA_PFFMTABLE, // 4
62  TESSDATA_NORMPROTO, // 5
63  TESSDATA_PUNC_DAWG, // 6
66  TESSDATA_FREQ_DAWG, // 9
67  TESSDATA_FIXED_LENGTH_DAWGS, // 10 // deprecated
68  TESSDATA_CUBE_UNICHARSET, // 11 // deprecated
69  TESSDATA_CUBE_SYSTEM_DAWG, // 12 // deprecated
74  TESSDATA_LSTM, // 17
80  TESSDATA_VERSION, // 23
81 
83 };

◆ TextlineOrder

The text lines are read in the given sequence.

In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.

Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM

Enumerator
TEXTLINE_ORDER_LEFT_TO_RIGHT 
TEXTLINE_ORDER_RIGHT_TO_LEFT 
TEXTLINE_ORDER_TOP_TO_BOTTOM 

Definition at line 148 of file publictypes.h.

◆ TopNState

Enumerator
TN_TOP2 
TN_TOPN 
TN_ALSO_RAN 
TN_COUNT 

Definition at line 85 of file recodebeam.h.

85  {
86  TN_TOP2, // Winner or 2nd.
87  TN_TOPN, // Runner up in top-n, but not 1st or 2nd.
88  TN_ALSO_RAN, // Not in the top-n.
89  TN_COUNT
90 };

◆ Trainability

Enumerator
TRAINABLE 
PERFECT 
UNENCODABLE 
HI_PRECISION_ERR 
NOT_BOXED 

Definition at line 47 of file lstmtrainer.h.

47  {
48  TRAINABLE, // Non-zero delta error.
49  PERFECT, // Zero delta error.
50  UNENCODABLE, // Not trainable due to coding/alignment trouble.
51  HI_PRECISION_ERR, // Hi confidence disagreement.
52  NOT_BOXED, // Early in training and has no character boxes.
53 };

◆ TrainingFlags

Enumerator
TF_INT_MODE 
TF_COMPRESS_UNICHARSET 

Definition at line 46 of file lstmrecognizer.h.

46  {
47  TF_INT_MODE = 1,
49 };

◆ TrainingState

Enumerator
TS_DISABLED 
TS_ENABLED 
TS_TEMP_DISABLE 
TS_RE_ENABLE 

Definition at line 92 of file network.h.

92  {
93  // Valid states of training_.
94  TS_DISABLED, // Disabled permanently.
95  TS_ENABLED, // Enabled for backprop and to write a training dump.
96  // Re-enable from ANY disabled state.
97  TS_TEMP_DISABLE, // Temporarily disabled to write a recognition dump.
98  // Valid only for SetEnableTraining.
99  TS_RE_ENABLE, // Re-Enable from TS_TEMP_DISABLE, but not TS_DISABLED.
100 };

◆ UnicodeNormMode

Enumerator
kNFD 
kNFC 
kNFKD 
kNFKC 

Definition at line 48 of file normstrngs.h.

49  {
50  kNone,
51  kNormalize,
52 };
53 

◆ ViramaScript

Enumerator
kNonVirama 
kDevanagari 
kBengali 
kGurmukhi 
kGujarati 
kOriya 
kTamil 
kTelugu 
kKannada 
kMalayalam 
kSinhala 
kMyanmar 
kKhmer 
kJavanese 

Definition at line 67 of file validator.h.

71  {
72  public:
73  // Validates and cleans the src vector of unicodes to the *dest, according to
74  // g_mode. In the case of kSingleString, a single vector containing the whole
75  // result is added to *dest. With kCombined, multiple vectors are added to
76  // *dest with one grapheme in each. With kGlyphSplit, multiple vectors are
77  // added to *dest with a smaller unit representing a glyph in each.
78  // In case of validation error, returns false and as much as possible of the
79  // input, without discarding invalid text.
80  static bool ValidateCleanAndSegment(GraphemeNormMode g_mode,
81  bool report_errors,
82  const std::vector<char32>& src,

◆ WritingDirection

The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".

For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.

Enumerator
WRITING_DIRECTION_LEFT_TO_RIGHT 
WRITING_DIRECTION_RIGHT_TO_LEFT 
WRITING_DIRECTION_TOP_TO_BOTTOM 

Definition at line 131 of file publictypes.h.

◆ XHeightConsistencyEnum

Enumerator
XH_GOOD 
XH_SUBNORMAL 
XH_INCONSISTENT 

Definition at line 78 of file dict.h.

Function Documentation

◆ AccumulateVector()

void tesseract::AccumulateVector ( int  n,
const double *  src,
double *  dest 
)
inline

Definition at line 174 of file functions.h.

174  {
175  for (int i = 0; i < n; ++i) dest[i] += src[i];
176 }

◆ AsciiLikelyListItem()

bool tesseract::AsciiLikelyListItem ( const STRING word)

Definition at line 296 of file paragraphs.cpp.

296  {
297  public:
298  UnicodeSpanSkipper(const UNICHARSET *unicharset, const WERD_CHOICE *word)

◆ assign_blobs_to_blocks2()

void tesseract::assign_blobs_to_blocks2 ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 165 of file tordmain.cpp.

170  { // output list
171  BLOCK *block; // current block
172  BLOBNBOX *newblob; // created blob
173  C_BLOB *blob; // current blob
174  BLOCK_IT block_it = blocks;
175  C_BLOB_IT blob_it; // iterator
176  BLOBNBOX_IT port_box_it; // iterator
177  // destination iterator
178  TO_BLOCK_IT port_block_it = port_blocks;
179  TO_BLOCK *port_block; // created block
180 
181  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
182  block = block_it.data();
183  port_block = new TO_BLOCK(block);
184 
185  // Convert the good outlines to block->blob_list
186  port_box_it.set_to_list(&port_block->blobs);
187  blob_it.set_to_list(block->blob_list());
188  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
189  blob = blob_it.extract();
190  newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
191  SetBlobStrokeWidth(pix, newblob);
192  port_box_it.add_after_then_move(newblob);
193  }
194 
195  // Put the rejected outlines in block->noise_blobs, which allows them to
196  // be reconsidered and sorted back into rows and recover outlines mistakenly
197  // rejected.
198  port_box_it.set_to_list(&port_block->noise_blobs);
199  blob_it.set_to_list(block->reject_blobs());
200  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
201  blob = blob_it.extract();
202  newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
203  SetBlobStrokeWidth(pix, newblob);
204  port_box_it.add_after_then_move(newblob);
205  }
206 

◆ BlobToTrainingSample()

TrainingSample * tesseract::BlobToTrainingSample ( const TBLOB blob,
bool  nonlinear_norm,
INT_FX_RESULT_STRUCT fx_info,
GenericVector< INT_FEATURE_STRUCT > *  bl_features 
)

Definition at line 75 of file intfx.cpp.

78  {
80  Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
81  &cn_features, fx_info, nullptr);
82  // TODO(rays) Use blob->PreciseBoundingBox() instead.
83  TBOX box = blob.bounding_box();
84  TrainingSample* sample = nullptr;
85  int num_features = fx_info->NumCN;
86  if (num_features > 0) {
87  sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
88  num_features);
89  }
90  if (sample != nullptr) {
91  // Set the bounding box (in original image coordinates) in the sample.
92  TPOINT topleft, botright;
93  topleft.x = box.left();
94  topleft.y = box.top();
95  botright.x = box.right();
96  botright.y = box.bottom();
97  TPOINT original_topleft, original_botright;
98  blob.denorm().DenormTransform(nullptr, topleft, &original_topleft);
99  blob.denorm().DenormTransform(nullptr, botright, &original_botright);
100  sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y,
101  original_botright.x, original_topleft.y));
102  }
103  return sample;

◆ CanonicalizeDetectionResults()

void tesseract::CanonicalizeDetectionResults ( GenericVector< PARA * > *  row_owners,
PARA_LIST *  paragraphs 
)

Definition at line 2252 of file paragraphs.cpp.

2252  {
2253  continue;
2254  }
2255  out.add_after_then_move(rows[i]);
2256  }
2257 }
2258 
2259 // Main entry point for Paragraph Detection Algorithm.
2260 //
2261 // Given a set of equally spaced textlines (described by row_infos),
2262 // Split them into paragraphs.
2263 //
2264 // Output:
2265 // row_owners - one pointer for each row, to the paragraph it belongs to.
2266 // paragraphs - this is the actual list of PARA objects.
2267 // models - the list of paragraph models referenced by the PARA objects.
2268 // caller is responsible for deleting the models.
2269 void DetectParagraphs(int debug_level,
2270  GenericVector<RowInfo> *row_infos,
2271  GenericVector<PARA *> *row_owners,
2272  PARA_LIST *paragraphs,

◆ ClearFeatureSpaceWindow()

void tesseract::ClearFeatureSpaceWindow ( NORM_METHOD  norm_method,
ScrollView window 
)

Clears the given window and draws the featurespace guides for the appropriate normalization method.

Definition at line 987 of file intproto.cpp.

988  {
989  window->Clear();
990 
991  window->Pen(ScrollView::GREY);
992  // Draw the feature space limit rectangle.
993  window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y);
994  if (norm_method == baseline) {
995  window->SetCursor(0, INT_DESCENDER);
996  window->DrawTo(INT_MAX_X, INT_DESCENDER);
997  window->SetCursor(0, INT_BASELINE);
998  window->DrawTo(INT_MAX_X, INT_BASELINE);
999  window->SetCursor(0, INT_XHEIGHT);
1000  window->DrawTo(INT_MAX_X, INT_XHEIGHT);
1001  window->SetCursor(0, INT_CAPHEIGHT);
1002  window->DrawTo(INT_MAX_X, INT_CAPHEIGHT);
1003  } else {
1006  }

◆ ClipVector()

template<typename T >
void tesseract::ClipVector ( int  n,
lower,
upper,
T *  vec 
)
inline

Definition at line 208 of file functions.h.

208  {
209  for (int i = 0; i < n; ++i) vec[i] = ClipToRange(vec[i], lower, upper);
210 }

◆ cmp_eq()

template<typename T >
bool tesseract::cmp_eq ( T const &  t1,
T const &  t2 
)

Definition at line 375 of file genericvector.h.

375  {
376  return t1 == t2;
377 }

◆ CodeInBinary()

void tesseract::CodeInBinary ( int  n,
int  nf,
double *  vec 
)
inline

Definition at line 214 of file functions.h.

214  {
215  if (nf <= 0 || n < nf) return;
216  int index = 0;
217  double best_score = vec[0];
218  for (int i = 1; i < n; ++i) {
219  if (vec[i] > best_score) {
220  best_score = vec[i];
221  index = i;
222  }
223  }
224  int mask = 1;
225  for (int i = 0; i < nf; ++i, mask *= 2) {
226  vec[i] = (index & mask) ? 1.0 : 0.0;
227  }
228 }

◆ CodepointList()

std::string tesseract::CodepointList ( const std::vector< char32 > &  str32)
inline

Definition at line 24 of file normstrngs_test.h.

24  {
25  std::stringstream result;
26  int total_chars = str32.size();
27  result << std::hex;
28  for (int i = 0; i < total_chars; ++i) {
29  result << "[" << str32[i] << "]";
30  }
31  return result.str();
32 }

◆ CombineLangModel()

int tesseract::CombineLangModel ( const UNICHARSET unicharset,
const std::string script_dir,
const std::string version_str,
const std::string output_dir,
const std::string lang,
bool  pass_through_recoder,
const GenericVector< STRING > &  words,
const GenericVector< STRING > &  puncs,
const GenericVector< STRING > &  numbers,
bool  lang_is_rtl,
FileReader  reader,
FileWriter  writer 
)

Definition at line 185 of file lang_model_helpers.cpp.

191  {
192  // Build the traineddata file.
193  TessdataManager traineddata;
194  if (!version_str.empty()) {
195  traineddata.SetVersionString(traineddata.VersionString() + ":" +
196  version_str);
197  }
198  // Unicharset and recoder.
199  if (!WriteUnicharset(unicharset, output_dir, lang, writer, &traineddata)) {
200  tprintf("Error writing unicharset!!\n");
201  return EXIT_FAILURE;
202  } else {
203  tprintf("Config file is optional, continuing...\n");
204  }
205  // If there is a config file, read it and add to traineddata.
206  std::string config_filename = script_dir + "/" + lang + "/" + lang + ".config";
207  STRING config_file = ReadFile(config_filename, reader);
208  if (config_file.length() > 0) {
209  traineddata.OverwriteEntry(TESSDATA_LANG_CONFIG, &config_file[0],
210  config_file.length());
211  }
212  std::string radical_filename = script_dir + "/radical-stroke.txt";
213  STRING radical_data = ReadFile(radical_filename, reader);
214  if (radical_data.length() == 0) {
215  tprintf("Error reading radical code table %s\n", radical_filename.c_str());
216  return EXIT_FAILURE;
217  }
218  if (!WriteRecoder(unicharset, pass_through_recoder, output_dir, lang, writer,
219  &radical_data, &traineddata)) {
220  tprintf("Error writing recoder!!\n");
221  }
222  if (!words.empty() || !puncs.empty() || !numbers.empty()) {
223  if (!WriteDawgs(words, puncs, numbers, lang_is_rtl, unicharset,
224  &traineddata)) {
225  tprintf("Error during conversion of wordlists to DAWGs!!\n");
226  return EXIT_FAILURE;
227  }
228  }
229 
230  // Traineddata file.
231  GenericVector<char> traineddata_data;
232  traineddata.Serialize(&traineddata_data);
233  if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
234  tprintf("Error writing output traineddata file!!\n");
235  return EXIT_FAILURE;
236  }
237  return EXIT_SUCCESS;
238 }

◆ CompareFontInfo()

bool tesseract::CompareFontInfo ( const FontInfo fi1,
const FontInfo fi2 
)

Definition at line 122 of file fontinfo.cpp.

122  {
123  // The font properties are required to be the same for two font with the same
124  // name, so there is no need to test them.
125  // Consequently, querying the table with only its font name as information is
126  // enough to retrieve its properties.
127  return strcmp(fi1.name, fi2.name) == 0;
128 }

◆ CompareFontSet()

bool tesseract::CompareFontSet ( const FontSet fs1,
const FontSet fs2 
)

Definition at line 130 of file fontinfo.cpp.

130  {
131  if (fs1.size != fs2.size)
132  return false;
133  for (int i = 0; i < fs1.size; ++i) {
134  if (fs1.configs[i] != fs2.configs[i])
135  return false;
136  }
137  return true;
138 }

◆ CopyVector()

void tesseract::CopyVector ( int  n,
const double *  src,
double *  dest 
)
inline

Definition at line 169 of file functions.h.

169  {
170  memcpy(dest, src, n * sizeof(dest[0]));
171 }

◆ countof()

template<typename T , size_t N>
constexpr size_t tesseract::countof ( T   const(&)[N])
constexprnoexcept

Definition at line 41 of file serialis.h.

43  {

◆ CrownCompatible()

bool tesseract::CrownCompatible ( const GenericVector< RowScratchRegisters > *  rows,
int  a,
int  b,
const ParagraphModel model 
)

Definition at line 1314 of file paragraphs.cpp.

1323  : theory_(theory), rows_(rows), row_start_(row_start),
1324  row_end_(row_end) {
1325  if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
1326  row_start_ = 0;
1327  row_end_ = 0;
1328  return;
1329  }
1330  SetOfModels no_models;

◆ DegradeImage()

struct Pix * tesseract::DegradeImage ( Pix *  input,
int  exposure,
TRand randomizer,
float *  rotation 
)

Definition at line 108 of file degradeimage.cpp.

112  {
113  float radians_clockwise = 0.0f;
114  if (*rotation) {
115  radians_clockwise = *rotation;
116  } else if (randomizer != nullptr) {
117  radians_clockwise = randomizer->SignedRand(kRotationRange);
118  }
119 
120  input = pixRotate(pix, radians_clockwise,
121  L_ROTATE_AREA_MAP, L_BRING_IN_WHITE,
122  0, 0);
123  // Rotate the boxes to match.
124  *rotation = radians_clockwise;
125  pixDestroy(&pix);
126  } else {
127  input = pix;
128  }
129 
130  if (exposure >= 3 || exposure == 1) {
131  // Erosion after the convolution is not as heavy as before, so it is
132  // good for level 1 and in addition as a level 3.
133  // This is backwards to binary morphology,
134  // see http://www.leptonica.com/grayscale-morphology.html
135  pix = input;
136  input = pixErodeGray(pix, 3, 3);
137  pixDestroy(&pix);
138  }
139  // The convolution really needed to be 2x2 to be realistic enough, but
140  // we only have 3x3, so we have to bias the image darker or lose thin
141  // strokes.
142  int erosion_offset = 0;
143  // For light and 0 exposure, there is no dilation, so compensate for the
144  // convolution with a big darkening bias which is undone for lighter
145  // exposures.
146  if (exposure <= 0)
147  erosion_offset = -3 * kExposureFactor;
148  // Add in a general offset of the greyscales for the exposure level so
149  // a threshold of 128 gives a reasonable binary result.
150  erosion_offset -= exposure * kExposureFactor;
151  // Add a gradual fade over the page and a small amount of salt and pepper
152  // noise to simulate noise in the sensor/paper fibres and varying
153  // illumination.
154  l_uint32* data = pixGetData(input);
155  for (int y = 0; y < height; ++y) {
156  for (int x = 0; x < width; ++x) {
157  int pixel = GET_DATA_BYTE(data, x);
158  if (randomizer != nullptr)
159  pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;
160  if (height + width > kMinRampSize)
161  pixel -= (2*x + y) * 32 / (height + width);
162  pixel += erosion_offset;
163  if (pixel < 0)
164  pixel = 0;
165  if (pixel > 255)
166  pixel = 255;
167  SET_DATA_BYTE(data, x, pixel);
168  }
169  data += input->wpl;
170  }
171  return input;
172 }
173 
174 // Creates and returns a Pix distorted by various means according to the bool
175 // flags. If boxes is not nullptr, the boxes are resized/positioned according to
176 // any spatial distortion and also by the integer reduction factor box_scale
177 // so they will match what the network will output.
178 // Returns nullptr on error. The returned Pix must be pixDestroyed.
179 Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
180  bool white_noise, bool smooth_noise, bool blur,
181  int box_reduction, TRand* randomizer,
182  GenericVector<TBOX>* boxes) {
183  Pix* distorted = pixCopy(nullptr, const_cast<Pix*>(pix));
184  // Things to do to synthetic training data.
185  if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
186  // TODO(rays) Cook noise in a more thread-safe manner than rand().
187  // Attempt to make the sequences reproducible.
188  srand(randomizer->IntRand());
189  Pix* pixn = pixAddGaussianNoise(distorted, 8.0);

◆ DeleteObject()

template<typename T >
void tesseract::DeleteObject ( T *  object)

Definition at line 155 of file tablefind.cpp.

155  {
156  delete object;
157 }

◆ DeSerialize() [1/8]

bool tesseract::DeSerialize ( FILE *  fp,
char *  data,
size_t  n = 1 
)

Definition at line 41 of file serialis.cpp.

43  {

◆ DeSerialize() [2/8]

bool tesseract::DeSerialize ( FILE *  fp,
float *  data,
size_t  n = 1 
)

Definition at line 45 of file serialis.cpp.

47  {

◆ DeSerialize() [3/8]

bool tesseract::DeSerialize ( FILE *  fp,
int16_t *  data,
size_t  n = 1 
)

Definition at line 53 of file serialis.cpp.

55  {

◆ DeSerialize() [4/8]

bool tesseract::DeSerialize ( FILE *  fp,
int32_t *  data,
size_t  n = 1 
)

Definition at line 57 of file serialis.cpp.

59  {

◆ DeSerialize() [5/8]

bool tesseract::DeSerialize ( FILE *  fp,
int8_t *  data,
size_t  n = 1 
)

Definition at line 49 of file serialis.cpp.

51  {

◆ DeSerialize() [6/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint16_t *  data,
size_t  n = 1 
)

Definition at line 65 of file serialis.cpp.

67  {

◆ DeSerialize() [7/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint32_t *  data,
size_t  n = 1 
)

Definition at line 69 of file serialis.cpp.

71  {

◆ DeSerialize() [8/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint8_t *  data,
size_t  n = 1 
)

Definition at line 61 of file serialis.cpp.

63  {

◆ DetectParagraphs() [1/2]

void tesseract::DetectParagraphs ( int  debug_level,
bool  after_text_recognition,
const MutableIterator block_start,
GenericVector< ParagraphModel * > *  models 
)

Definition at line 2527 of file paragraphs.cpp.

2531  {
2532  if (!row.PageResIt()->row())
2533  continue; // empty row.
2534  row.PageResIt()->row()->row->set_para(nullptr);
2535  row_infos.push_back(RowInfo());
2536  RowInfo &ri = row_infos.back();
2537  InitializeRowInfo(after_text_recognition, row, &ri);
2538  } while (!row.IsAtFinalElement(RIL_BLOCK, RIL_TEXTLINE) &&
2539  row.Next(RIL_TEXTLINE));
2540 
2541  // If we're called before text recognition, we might not have
2542  // tight block bounding boxes, so trim by the minimum on each side.
2543  if (!row_infos.empty()) {
2544  int min_lmargin = row_infos[0].pix_ldistance;
2545  int min_rmargin = row_infos[0].pix_rdistance;
2546  for (int i = 1; i < row_infos.size(); i++) {
2547  if (row_infos[i].pix_ldistance < min_lmargin)
2548  min_lmargin = row_infos[i].pix_ldistance;
2549  if (row_infos[i].pix_rdistance < min_rmargin)
2550  min_rmargin = row_infos[i].pix_rdistance;
2551  }
2552  if (min_lmargin > 0 || min_rmargin > 0) {
2553  for (int i = 0; i < row_infos.size(); i++) {
2554  row_infos[i].pix_ldistance -= min_lmargin;
2555  row_infos[i].pix_rdistance -= min_rmargin;
2556  }
2557  }
2558  }
2559 
2560  // Run the paragraph detection algorithm.
2561  GenericVector<PARA *> row_owners;
2562  GenericVector<PARA *> the_paragraphs;
2563  if (!is_image_block) {
2564  DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(),
2565  models);
2566  } else {
2567  row_owners.init_to_size(row_infos.size(), nullptr);
2568  CanonicalizeDetectionResults(&row_owners, block->para_list());
2569  }
2570 
2571  // Now stitch in the row_owners into the rows.
2572  row = *block_start;
2573  for (int i = 0; i < row_owners.size(); i++) {
2574  while (!row.PageResIt()->row())
2575  row.Next(RIL_TEXTLINE);
2576  row.PageResIt()->row()->row->set_para(row_owners[i]);
2577  row.Next(RIL_TEXTLINE);
2578  }
2579 }
2580 
2581 } // namespace

◆ DetectParagraphs() [2/2]

void tesseract::DetectParagraphs ( int  debug_level,
GenericVector< RowInfo > *  row_infos,
GenericVector< PARA * > *  row_owners,
PARA_LIST *  paragraphs,
GenericVector< ParagraphModel * > *  models 
)

Definition at line 2284 of file paragraphs.cpp.

2297  {
2298  // Pass 2a:
2299  // Find any strongly evidenced start-of-paragraph lines. If they're
2300  // followed by two lines that look like body lines, make a paragraph
2301  // model for that and see if that model applies throughout the text
2302  // (that is, "smear" it).
2303  StrongEvidenceClassify(debug_level, &rows,
2304  leftovers[i].begin, leftovers[i].end, &theory);
2305 
2306  // Pass 2b:
2307  // If we had any luck in pass 2a, we got part of the page and didn't
2308  // know how to classify a few runs of rows. Take the segments that
2309  // didn't find a model and reprocess them individually.
2310  GenericVector<Interval> leftovers2;
2311  LeftoverSegments(rows, &leftovers2, leftovers[i].begin, leftovers[i].end);
2312  bool pass2a_was_useful = leftovers2.size() > 1 ||
2313  (leftovers2.size() == 1 &&
2314  (leftovers2[0].begin != 0 || leftovers2[0].end != rows.size()));
2315  if (pass2a_was_useful) {
2316  for (int j = 0; j < leftovers2.size(); j++) {
2317  StrongEvidenceClassify(debug_level, &rows,
2318  leftovers2[j].begin, leftovers2[j].end,
2319  &theory);
2320  }
2321  }
2322  }
2323 
2324  DebugDump(debug_level > 1, "End of Pass 2", theory, rows);
2325 
2326  // Pass 3:
2327  // These are the dregs for which we didn't have enough strong textual
2328  // and geometric clues to form matching models for. Let's see if
2329  // the geometric clues are simple enough that we could just use those.
2330  LeftoverSegments(rows, &leftovers, 0, rows.size());
2331  for (int i = 0; i < leftovers.size(); i++) {
2332  GeometricClassify(debug_level, &rows,
2333  leftovers[i].begin, leftovers[i].end, &theory);
2334  }
2335 
2336  // Undo any flush models for which there's little evidence.
2337  DowngradeWeakestToCrowns(debug_level, &theory, &rows);
2338 
2339  DebugDump(debug_level > 1, "End of Pass 3", theory, rows);
2340 
2341  // Pass 4:
2342  // Take everything that's still not marked up well and clear all markings.
2343  LeftoverSegments(rows, &leftovers, 0, rows.size());
2344  for (int i = 0; i < leftovers.size(); i++) {
2345  for (int j = leftovers[i].begin; j < leftovers[i].end; j++) {
2346  rows[j].SetUnknown();
2347  }
2348  }
2349 
2350  DebugDump(debug_level > 1, "End of Pass 4", theory, rows);
2351 
2352  // Convert all of the unique hypothesis runs to PARAs.
2353  ConvertHypothesizedModelRunsToParagraphs(debug_level, rows, row_owners,
2354  &theory);
2355 
2356  DebugDump(debug_level > 0, "Final Paragraph Segmentation", theory, rows);
2357 
2358  // Finally, clean up any dangling nullptr row paragraph parents.
2359  CanonicalizeDetectionResults(row_owners, paragraphs);
2360 }
2361 
2362 // ============ Code interfacing with the rest of Tesseract ==================
2363 
2364 static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it,
2365  RowInfo *info) {
2366  // Set up text, lword_text, and rword_text (mostly for debug printing).
2367  STRING fake_text;
2368  PageIterator pit(static_cast<const PageIterator&>(it));
2369  bool first_word = true;
2370  if (!pit.Empty(RIL_WORD)) {
2371  do {
2372  fake_text += "x";
2373  if (first_word) info->lword_text += "x";
2374  info->rword_text += "x";
2375  if (pit.IsAtFinalElement(RIL_WORD, RIL_SYMBOL) &&

◆ DotProductAVX()

double tesseract::DotProductAVX ( const double *  u,
const double *  v,
int  n 
)

Definition at line 30 of file dotproductavx.cpp.

30  {
31  const unsigned quot = n / 8;
32  const unsigned rem = n % 8;
33  __m256d t0 = _mm256_setzero_pd();
34  __m256d t1 = _mm256_setzero_pd();
35  for (unsigned k = 0; k < quot; k++) {
36  __m256d f0 = _mm256_loadu_pd(u);
37  __m256d f1 = _mm256_loadu_pd(v);
38  f0 = _mm256_mul_pd(f0, f1);
39  t0 = _mm256_add_pd(t0, f0);
40  u += 4;
41  v += 4;
42  __m256d f2 = _mm256_loadu_pd(u);
43  __m256d f3 = _mm256_loadu_pd(v);
44  f2 = _mm256_mul_pd(f2, f3);
45  t1 = _mm256_add_pd(t1, f2);
46  u += 4;
47  v += 4;
48  }
49  t0 = _mm256_hadd_pd(t0, t1);
50  alignas(32) double tmp[4];
51  _mm256_store_pd(tmp, t0);
52  double result = tmp[0] + tmp[1] + tmp[2] + tmp[3];
53  for (unsigned k = 0; k < rem; k++) {
54  result += *u++ * *v++;
55  }
56  return result;
57 }

◆ DotProductFMA()

double tesseract::DotProductFMA ( const double *  u,
const double *  v,
int  n 
)

Definition at line 30 of file dotproductfma.cpp.

30  {
31  const unsigned quot = n / 8;
32  const unsigned rem = n % 8;
33  __m256d t0 = _mm256_setzero_pd();
34  __m256d t1 = _mm256_setzero_pd();
35  for (unsigned k = 0; k < quot; k++) {
36  __m256d f0 = _mm256_loadu_pd(u);
37  __m256d f1 = _mm256_loadu_pd(v);
38  t0 = _mm256_fmadd_pd(f0, f1, t0);
39  u += 4;
40  v += 4;
41  __m256d f2 = _mm256_loadu_pd(u);
42  __m256d f3 = _mm256_loadu_pd(v);
43  t1 = _mm256_fmadd_pd(f2, f3, t1);
44  u += 4;
45  v += 4;
46  }
47  t0 = _mm256_hadd_pd(t0, t1);
48  alignas(32) double tmp[4];
49  _mm256_store_pd(tmp, t0);
50  double result = tmp[0] + tmp[1] + tmp[2] + tmp[3];
51  for (unsigned k = 0; k < rem; k++) {
52  result += *u++ * *v++;
53  }
54  return result;
55 }

◆ DotProductNative()

double tesseract::DotProductNative ( const double *  u,
const double *  v,
int  n 
)

Definition at line 22 of file dotproduct.cpp.

22  {
23  double total = 0.0;
24  for (int k = 0; k < n; ++k) total += u[k] * v[k];
25  return total;
26 }

◆ DotProductSSE()

double tesseract::DotProductSSE ( const double *  u,
const double *  v,
int  n 
)

Definition at line 31 of file dotproductsse.cpp.

31  {
32  int max_offset = n - 2;
33  int offset = 0;
34  // Accumulate a set of 2 sums in sum, by loading pairs of 2 values from u and
35  // v, and multiplying them together in parallel.
36  __m128d sum = _mm_setzero_pd();
37  if (offset <= max_offset) {
38  offset = 2;
39  // Aligned load is reputedly faster but requires 16 byte aligned input.
40  if ((reinterpret_cast<uintptr_t>(u) & 15) == 0 &&
41  (reinterpret_cast<uintptr_t>(v) & 15) == 0) {
42  // Use aligned load.
43  sum = _mm_load_pd(u);
44  __m128d floats2 = _mm_load_pd(v);
45  // Multiply.
46  sum = _mm_mul_pd(sum, floats2);
47  while (offset <= max_offset) {
48  __m128d floats1 = _mm_load_pd(u + offset);
49  floats2 = _mm_load_pd(v + offset);
50  offset += 2;
51  floats1 = _mm_mul_pd(floats1, floats2);
52  sum = _mm_add_pd(sum, floats1);
53  }
54  } else {
55  // Use unaligned load.
56  sum = _mm_loadu_pd(u);
57  __m128d floats2 = _mm_loadu_pd(v);
58  // Multiply.
59  sum = _mm_mul_pd(sum, floats2);
60  while (offset <= max_offset) {
61  __m128d floats1 = _mm_loadu_pd(u + offset);
62  floats2 = _mm_loadu_pd(v + offset);
63  offset += 2;
64  floats1 = _mm_mul_pd(floats1, floats2);
65  sum = _mm_add_pd(sum, floats1);
66  }
67  }
68  }
69  // Add the 2 sums in sum horizontally.
70  sum = _mm_hadd_pd(sum, sum);
71  // Extract the low result.
72  double result = _mm_cvtsd_f64(sum);
73  // Add on any left-over products.
74  while (offset < n) {
75  result += u[offset] * v[offset];
76  ++offset;
77  }
78  return result;
79 }

◆ ExpectGraphemeModeResults()

void tesseract::ExpectGraphemeModeResults ( const std::string str,
UnicodeNormMode  u_mode,
int  unicode_count,
int  glyph_count,
int  grapheme_count,
const std::string target_str 
)
inline

Definition at line 48 of file normstrngs_test.h.

51  {
52  std::vector<std::string> glyphs;
53  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(
54  u_mode, OCRNorm::kNone, GraphemeNormMode::kIndividualUnicodes, true,
55  str.c_str(), &glyphs));
56  EXPECT_EQ(glyphs.size(), unicode_count)
58  EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
59  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone,
60  GraphemeNormMode::kGlyphSplit, true,
61  str.c_str(), &glyphs));
62  EXPECT_EQ(glyphs.size(), glyph_count)
64  EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
65  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone,
66  GraphemeNormMode::kCombined, true,
67  str.c_str(), &glyphs));
68  EXPECT_EQ(glyphs.size(), grapheme_count)
70  EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
71  EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone,
72  GraphemeNormMode::kSingleString,
73  true, str.c_str(), &glyphs));
74  EXPECT_EQ(glyphs.size(), 1) << PrintStringVectorWithUnicodes(glyphs);
75  EXPECT_EQ(target_str, glyphs[0]);
76  std::string result;
77  EXPECT_TRUE(NormalizeUTF8String(
78  u_mode, OCRNorm::kNone, GraphemeNorm::kNormalize, str.c_str(), &result));
79  EXPECT_EQ(target_str, result);
80 }

◆ ExtractFontName()

void tesseract::ExtractFontName ( const STRING filename,
STRING fontname 
)

Public Code

Definition at line 45 of file blobclass.cpp.

45  {
46  *fontname = classify_font_name;
47  if (*fontname == kUnknownFontName) {
48  // filename is expected to be of the form [lang].[fontname].exp[num]
49  // The [lang], [fontname] and [num] fields should not have '.' characters.
50  const char *basename = strrchr(filename.c_str(), '/');
51  const char *firstdot = strchr(basename ? basename : filename.c_str(), '.');
52  const char *lastdot = strrchr(filename.c_str(), '.');
53  if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
54  ++firstdot;
55  *fontname = firstdot;
56  fontname->truncate_at(lastdot - firstdot);
57  }
58  }
59 }

◆ FirstWordWouldHaveFit() [1/2]

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters before,
const RowScratchRegisters after 
)

Definition at line 1671 of file paragraphs.cpp.

1672  {
1673  if (before.ri_->ltr) {
1674  return before.ri_->rword_likely_ends_idea &&
1675  after.ri_->lword_likely_starts_idea;
1676  } else {
1677  return before.ri_->lword_likely_ends_idea &&
1678  after.ri_->rword_likely_starts_idea;
1679  }
1680 }
1681 
1682 static bool LikelyParagraphStart(const RowScratchRegisters &before,
1683  const RowScratchRegisters &after,

◆ FirstWordWouldHaveFit() [2/2]

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters before,
const RowScratchRegisters after,
tesseract::ParagraphJustification  justification 
)

Definition at line 1646 of file paragraphs.cpp.

1657  {
1658  if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
1659  return true;
1660 
1661  int available_space = before.lindent_;
1662  if (before.rindent_ > available_space)
1663  available_space = before.rindent_;
1664  available_space -= before.ri_->average_interword_space;
1665 
1666  if (before.ri_->ltr)

◆ FontInfoDeleteCallback()

void tesseract::FontInfoDeleteCallback ( FontInfo  f)

Definition at line 141 of file fontinfo.cpp.

141  {
142  if (f.spacing_vec != nullptr) {
143  f.spacing_vec->delete_data_pointers();
144  delete f.spacing_vec;
145  f.spacing_vec = nullptr;
146  }
147  delete[] f.name;
148  f.name = nullptr;
149 }

◆ FontSetDeleteCallback()

void tesseract::FontSetDeleteCallback ( FontSet  fs)

Definition at line 150 of file fontinfo.cpp.

150  {
151  delete[] fs.configs;
152 }

◆ FullwidthToHalfwidth()

char32 tesseract::FullwidthToHalfwidth ( const char32  ch)

Definition at line 298 of file normstrngs.cpp.

◆ FuncInplace()

template<class Func >
void tesseract::FuncInplace ( int  n,
double *  inout 
)
inline

Definition at line 129 of file functions.h.

129  {
130  Func f;
131  for (int i = 0; i < n; ++i) {
132  inout[i] = f(inout[i]);
133  }
134 }

◆ FuncMultiply()

template<class Func >
void tesseract::FuncMultiply ( const double *  u,
const double *  v,
int  n,
double *  out 
)
inline

Definition at line 138 of file functions.h.

138  {
139  Func f;
140  for (int i = 0; i < n; ++i) {
141  out[i] = f(u[i]) * v[i];
142  }
143 }

◆ GeneratePerspectiveDistortion()

void tesseract::GeneratePerspectiveDistortion ( int  width,
int  height,
TRand randomizer,
Pix **  pix,
GenericVector< TBOX > *  boxes 
)

Definition at line 237 of file degradeimage.cpp.

240  {
241  // Transform the boxes.
242  for (int b = 0; b < boxes->size(); ++b) {
243  int x1, y1, x2, y2;
244  const TBOX& box = (*boxes)[b];
245  projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,
246  &y1);
247  projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),
248  &x2, &y2);
249  TBOX new_box1(x1, height - y2, x2, height - y1);
250  projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),
251  &x1, &y1);
252  projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,
253  &y2);
254  TBOX new_box2(x1, height - y1, x2, height - y2);
255  (*boxes)[b] = new_box1.bounding_union(new_box2);
256  }
257  }
258  free(im_coeffs);
259  free(box_coeffs);
260 }
261 
262 // Computes the coefficients of a randomized projective transformation.
263 // The image transform requires backward transformation coefficient, and the
264 // box transform the forward coefficients.
265 // Returns the incolor arg to pixProjective.
266 int ProjectiveCoeffs(int width, int height, TRand* randomizer,
267  float** im_coeffs, float** box_coeffs) {
268  // Setup "from" points.
269  Pta* src_pts = ptaCreate(4);
270  ptaAddPt(src_pts, 0.0f, 0.0f);
271  ptaAddPt(src_pts, width, 0.0f);
272  ptaAddPt(src_pts, width, height);
273  ptaAddPt(src_pts, 0.0f, height);
274  // Extract factors from pseudo-random sequence.
275  float factors[FN_NUM_FACTORS];
276  float shear = 0.0f; // Shear is signed.
277  for (int i = 0; i < FN_NUM_FACTORS; ++i) {

◆ GetXheightString()

std::string tesseract::GetXheightString ( const std::string script_dir,
const UNICHARSET unicharset 
)

Definition at line 164 of file unicharset_training_utils.cpp.

165  {
166  std::string xheights_str;
167  for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
168  // Load the xheights for the script if available.
169  std::string filename = script_dir + "/" +
170  unicharset.get_script_from_script_id(s) + ".xheights";
171  std::string script_heights;
172  if (File::ReadFileToString(filename, &script_heights))
173  xheights_str += script_heights;
174  }
175  return xheights_str;
176 }

◆ HistogramRect()

void tesseract::HistogramRect ( Pix *  src_pix,
int  channel,
int  left,
int  top,
int  width,
int  height,
int *  histogram 
)

Definition at line 166 of file otsuthr.cpp.

171  {
172  int H = 0;
173  double mu_T = 0.0;
174  for (int i = 0; i < kHistogramSize; ++i) {
175  H += histogram[i];
176  mu_T += static_cast<double>(i) * histogram[i];
177  }
178 
179  // Now maximize sig_sq_B over t.
180  // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
181  int best_t = -1;
182  int omega_0, omega_1;

◆ HOcrEscape()

STRING tesseract::HOcrEscape ( const char *  text)

Escape a char string - remove &<>"' with HTML codes.

Escape a char string - remove <>&"' with HTML codes.

Definition at line 2307 of file baseapi.cpp.

2307  {
2308  STRING ret;
2309  const char *ptr;
2310  for (ptr = text; *ptr; ptr++) {
2311  switch (*ptr) {
2312  case '<': ret += "&lt;"; break;
2313  case '>': ret += "&gt;"; break;
2314  case '&': ret += "&amp;"; break;
2315  case '"': ret += "&quot;"; break;
2316  case '\'': ret += "&#39;"; break;
2317  default: ret += *ptr;
2318  }
2319  }
2320  return ret;
2321 }

◆ InterwordSpace()

int tesseract::InterwordSpace ( const GenericVector< RowScratchRegisters > &  rows,
int  row_start,
int  row_end 
)

Definition at line 1623 of file paragraphs.cpp.

1626  : minimum_reasonable_space;
1627 }
1628 
1629 // Return whether the first word on the after line can fit in the space at
1630 // the end of the before line (knowing which way the text is aligned and read).
1631 bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
1632  const RowScratchRegisters &after,
1633  tesseract::ParagraphJustification justification) {
1634  if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
1635  return true;
1636 
1637  if (justification == JUSTIFICATION_UNKNOWN) {
1638  tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n");
1639  }
1640  int available_space;
1641  if (justification == JUSTIFICATION_CENTER) {
1642  available_space = before.lindent_ + before.rindent_;

◆ IsInterchangeValid()

bool tesseract::IsInterchangeValid ( const char32  ch)

Definition at line 269 of file normstrngs.cpp.

276  {
277  return IsValidCodepoint(ch) && ch <= 128 &&
278  (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' ||
279  ch == '\f' || ch == '\t' || ch == '\r');
280 }
281 
283  // Return unchanged if not in the fullwidth-halfwidth Unicode block.
284  if (ch < 0xFF00 || ch > 0xFFEF || !IsValidCodepoint(ch)) {
285  if (ch != 0x3000) return ch;
286  }
287  // Special case for fullwidth left and right "white parentheses".
288  if (ch == 0xFF5F) return 0x2985;
289  if (ch == 0xFF60) return 0x2986;
290  // Construct a full-to-half width transliterator.

◆ IsInterchangeValid7BitAscii()

bool tesseract::IsInterchangeValid7BitAscii ( const char32  ch)

Definition at line 292 of file normstrngs.cpp.

◆ IsLeftIndented()

bool tesseract::IsLeftIndented ( const EquationDetect::IndentType  type)
inline

Definition at line 92 of file equationdetect.cpp.

92  {
93  return type == EquationDetect::LEFT_INDENT ||
94  type == EquationDetect::BOTH_INDENT;
95 }

◆ IsOCREquivalent()

bool tesseract::IsOCREquivalent ( char32  ch1,
char32  ch2 
)

Definition at line 230 of file normstrngs.cpp.

233  {

◆ IsRightIndented()

bool tesseract::IsRightIndented ( const EquationDetect::IndentType  type)
inline

Definition at line 97 of file equationdetect.cpp.

97  {
98  return type == EquationDetect::RIGHT_INDENT ||
99  type == EquationDetect::BOTH_INDENT;
100 }

◆ IsTextOrEquationType()

bool tesseract::IsTextOrEquationType ( PolyBlockType  type)
inline

Definition at line 88 of file equationdetect.cpp.

88  {
89  return PTIsTextType(type) || type == PT_EQUATION;
90 }

◆ IsUTF8Whitespace()

bool tesseract::IsUTF8Whitespace ( const char *  text)

Definition at line 245 of file normstrngs.cpp.

246  {
247  if (IsWhitespace(*it)) break;

◆ IsValidCodepoint()

bool tesseract::IsValidCodepoint ( const char32  ch)

Definition at line 234 of file normstrngs.cpp.

236  {
237  if (!IsWhitespace(*it)) break;

◆ IsWhitespace()

bool tesseract::IsWhitespace ( const char32  ch)

Definition at line 239 of file normstrngs.cpp.

243  {

◆ LeftWordAttributes()

void tesseract::LeftWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const STRING utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)

Definition at line 423 of file paragraphs.cpp.

425  {
426  *starts_idea = true;
427  }
428  if (unicharset->get_ispunctuation(werd->unichar_id(0))) {
429  *starts_idea = true;
430  *ends_idea = true;
431  }
432  } else { // Assume utf8 is mostly ASCII
433  if (AsciiLikelyListItem(utf8)) {
434  *is_list = true;
435  *starts_idea = true;
436  }
437  int start_letter = utf8[0];
438  if (IsOpeningPunct(start_letter)) {
439  *starts_idea = true;
440  }
441  if (IsTerminalPunct(start_letter)) {
442  *ends_idea = true;
443  }
444  if (start_letter >= 'A' && start_letter <= 'Z') {
445  *starts_idea = true;
446  }
447  }
448 }
449 
450 // Given the rightmost word of a line either as a Tesseract unicharset + werd
451 // or a utf8 string, set the following attributes for it:
452 // is_list - this word might be a list number or bullet.
453 // starts_idea - this word is likely to start a sentence.
454 // ends_idea - this word is likely to end a sentence.
455 void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
456  const STRING &utf8,
457  bool *is_list, bool *starts_idea, bool *ends_idea) {
458  *is_list = false;
459  *starts_idea = false;
460  *ends_idea = false;
461  if (utf8.size() == 0 || (werd != nullptr && werd->length() == 0)) { // Empty
462  *ends_idea = true;
463  return;

◆ LoadDataFromFile()

bool tesseract::LoadDataFromFile ( const char *  filename,
GenericVector< char > *  data 
)
inline

Definition at line 341 of file genericvector.h.

341  {
342  bool result = false;
343  FILE* fp = fopen(filename, "rb");
344  if (fp != nullptr) {
345  fseek(fp, 0, SEEK_END);
346  auto size = std::ftell(fp);
347  fseek(fp, 0, SEEK_SET);
348  // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
349  if (size > 0 && size < LONG_MAX) {
350  // reserve an extra byte in case caller wants to append a '\0' character
351  data->reserve(size + 1);
352  data->resize_no_init(size);
353  result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
354  }
355  fclose(fp);
356  }
357  return result;
358 }

◆ LoadFileLinesToStrings()

bool tesseract::LoadFileLinesToStrings ( const char *  filename,
GenericVector< STRING > *  lines 
)
inline

Definition at line 43 of file fileio.h.

43  {
44  public:
45  // Try to open the file 'filename' in mode 'mode'.
46  // Stop the program if it cannot open it.
47  static FILE* OpenOrDie(const std::string& filename, const std::string& mode);
48  static FILE* Open(const std::string& filename, const std::string& mode);
49 
50  // Try to open the file 'filename' and to write 'str' in it.
51  // Stop the program if it fails.
52  static void WriteStringToFileOrDie(const std::string& str, const std::string& filename);

◆ LoadShapeTable()

ShapeTable * tesseract::LoadShapeTable ( const STRING file_prefix)

Definition at line 154 of file commontraining.cpp.

154  {
155  ShapeTable* shape_table = nullptr;
156  STRING shape_table_file = file_prefix;
157  shape_table_file += kShapeTableFileSuffix;
158  TFile shape_fp;
159  if (shape_fp.Open(shape_table_file.c_str(), nullptr)) {
160  shape_table = new ShapeTable;
161  if (!shape_table->DeSerialize(&shape_fp)) {
162  delete shape_table;
163  shape_table = nullptr;
164  tprintf("Error: Failed to read shape table %s\n",
165  shape_table_file.c_str());
166  } else {
167  int num_shapes = shape_table->NumShapes();
168  tprintf("Read shape table %s of %d shapes\n",
169  shape_table_file.c_str(), num_shapes);
170  }
171  } else {
172  tprintf("Warning: No shape table file present: %s\n",
173  shape_table_file.c_str());
174  }
175  return shape_table;
176 }

◆ LoadTrainingData()

MasterTrainer * tesseract::LoadTrainingData ( int  argc,
const char *const *  argv,
bool  replication,
ShapeTable **  shape_table,
STRING file_prefix 
)

Creates a MasterTrainer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != nullptr. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:

  • Loads font info from -F option.
  • Loads xheights from -X option.
  • Loads samples from .tr files in remaining command-line args.
  • Deletes outliers and computes canonical samples.
  • If FLAGS_output_trainer is set, saves the trainer for future use. TODO: Who uses that? There is currently no code which reads it. Computes canonical and cloud features. If shape_table is not nullptr, but failed to load, make a fake flat one, as shape clustering was not run.

Definition at line 211 of file commontraining.cpp.

214  {
216  InitIntegerFX();
217  *file_prefix = "";
218  if (!FLAGS_D.empty()) {
219  *file_prefix += FLAGS_D.c_str();
220  *file_prefix += "/";
221  }
222  // If we are shape clustering (nullptr shape_table) or we successfully load
223  // a shape_table written by a previous shape clustering, then
224  // shape_analysis will be true, meaning that the MasterTrainer will replace
225  // some members of the unicharset with their fragments.
226  bool shape_analysis = false;
227  if (shape_table != nullptr) {
228  *shape_table = LoadShapeTable(*file_prefix);
229  if (*shape_table != nullptr) shape_analysis = true;
230  } else {
231  shape_analysis = true;
232  }
233  MasterTrainer* trainer = new MasterTrainer(NM_CHAR_ANISOTROPIC,
234  shape_analysis,
235  replication,
236  FLAGS_debug_level);
237  IntFeatureSpace fs;
239  trainer->LoadUnicharset(FLAGS_U.c_str());
240  // Get basic font information from font_properties.
241  if (!FLAGS_F.empty()) {
242  if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
243  delete trainer;
244  return nullptr;
245  }
246  }
247  if (!FLAGS_X.empty()) {
248  if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
249  delete trainer;
250  return nullptr;
251  }
252  }
253  trainer->SetFeatureSpace(fs);
254  const char* page_name;
255  // Load training data from .tr files on the command line.
256  while ((page_name = GetNextFilename(argc, argv)) != nullptr) {
257  tprintf("Reading %s ...\n", page_name);
258  trainer->ReadTrainingSamples(page_name, feature_defs, false);
259 
260  // If there is a file with [lang].[fontname].exp[num].fontinfo present,
261  // read font spacing information in to fontinfo_table.
262  int pagename_len = strlen(page_name);
263  char* fontinfo_file_name = new char[pagename_len + 7];
264  strncpy(fontinfo_file_name, page_name, pagename_len - 2); // remove "tr"
265  strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo"); // +"fontinfo"
266  trainer->AddSpacingInfo(fontinfo_file_name);
267  delete[] fontinfo_file_name;
268 
269  // Load the images into memory if required by the classifier.
270  if (FLAGS_load_images) {
271  STRING image_name = page_name;
272  // Chop off the tr and replace with tif. Extension must be tif!
273  image_name.truncate_at(image_name.length() - 2);
274  image_name += "tif";
275  trainer->LoadPageImages(image_name.c_str());
276  }
277  }
278  trainer->PostLoadCleanup();
279  // Write the master trainer if required.
280  if (!FLAGS_output_trainer.empty()) {
281  FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb");
282  if (fp == nullptr) {
283  tprintf("Can't create saved trainer data!\n");
284  } else {
285  trainer->Serialize(fp);
286  fclose(fp);
287  }
288  }
289  trainer->PreTrainingSetup();
290  if (!FLAGS_O.empty() &&
291  !trainer->unicharset().save_to_file(FLAGS_O.c_str())) {
292  fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
293  delete trainer;
294  return nullptr;
295  }
296  if (shape_table != nullptr) {
297  // If we previously failed to load a shapetable, then shape clustering
298  // wasn't run so make a flat one now.
299  if (*shape_table == nullptr) {
300  *shape_table = new ShapeTable;
301  trainer->SetupFlatShapeTable(*shape_table);
302  tprintf("Flat shape table summary: %s\n",
303  (*shape_table)->SummaryStr().c_str());
304  }
305  (*shape_table)->set_unicharset(trainer->unicharset());
306  }
307  return trainer;
308 }

◆ Logistic()

double tesseract::Logistic ( double  x)
inline

Definition at line 54 of file functions.h.

54  {
55  if (x < 0.0) return 1.0 - Logistic(-x);
56  x *= kScaleFactor;
57  unsigned index = static_cast<unsigned>(x);
58  if (index >= (kTableSize - 1)) return 1.0;
59  double l0 = LogisticTable[index];
60  double l1 = LogisticTable[index + 1];
61  // Linear interpolation.
62  return l0 + (l1 - l0) * (x - index);
63 }

◆ MultiplyAccumulate()

void tesseract::MultiplyAccumulate ( int  n,
const double *  u,
const double *  v,
double *  out 
)
inline

Definition at line 184 of file functions.h.

185  {
186  for (int i = 0; i < n; i++) {
187  out[i] += u[i] * v[i];
188  }
189 }

◆ MultiplyVectorsInPlace()

void tesseract::MultiplyVectorsInPlace ( int  n,
const double *  src,
double *  inout 
)
inline

Definition at line 179 of file functions.h.

179  {
180  for (int i = 0; i < n; ++i) inout[i] *= src[i];
181 }

◆ NormalizeCleanAndSegmentUTF8()

bool tesseract::NormalizeCleanAndSegmentUTF8 ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
GraphemeNormMode  g_mode,
bool  report_errors,
const char *  str8,
std::vector< std::string > *  graphemes 
)

Definition at line 188 of file normstrngs.cpp.

189  {
190  graphemes32.clear();
191  success = Validator::ValidateCleanAndSegment(g_mode, report_errors,
192  cleaned32, &graphemes32);
193  }
194  }
195  graphemes->clear();
196  graphemes->reserve(graphemes32.size());
197  for (const auto& grapheme : graphemes32) {
198  graphemes->push_back(UNICHAR::UTF32ToUTF8(grapheme));
199  }
200  return success;
201 }
202 
203 // Apply just the OCR-specific normalizations and return the normalized char.
205  if (is_hyphen_punc(ch))
206  return '-';
207  else if (is_single_quote(ch))
208  return '\'';
209  else if (is_double_quote(ch))
210  return '"';
211  return ch;
212 }
213 
214 bool IsOCREquivalent(char32 ch1, char32 ch2) {
215  return OCRNormalize(ch1) == OCRNormalize(ch2);
216 }
217 

◆ NormalizeUTF8String()

bool tesseract::NormalizeUTF8String ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
GraphemeNorm  grapheme_normalize,
const char *  str8,
std::string normalized 
)

Definition at line 163 of file normstrngs.cpp.

175  {
176  std::vector<char32> normed32;
177  NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32);
178  StripJoiners(&normed32);
179  std::vector<std::vector<char32>> graphemes32;
180  bool success = Validator::ValidateCleanAndSegment(g_mode, report_errors,
181  normed32, &graphemes32);
182  if (g_mode != GraphemeNormMode::kSingleString && success) {

◆ OCRNormalize()

char32 tesseract::OCRNormalize ( char32  ch)

Definition at line 220 of file normstrngs.cpp.

223  {
224  ASSERT_HOST_MSG(IsValidCodepoint(ch), "Invalid Unicode codepoint: 0x%x\n",
225  ch);
226  return u_isUWhiteSpace(static_cast<UChar32>(ch));
227 }
228 

◆ OtsuStats()

int tesseract::OtsuStats ( const int *  histogram,
int *  H_out,
int *  omega0_out 
)

Definition at line 187 of file otsuthr.cpp.

188  {
189  omega_0 += histogram[t];
190  mu_t += t * static_cast<double>(histogram[t]);
191  if (omega_0 == 0)
192  continue;
193  omega_1 = H - omega_0;
194  if (omega_1 == 0)
195  break;
196  mu_0 = mu_t / omega_0;
197  mu_1 = (mu_T - mu_t) / omega_1;
198  double sig_sq_B = mu_1 - mu_0;
199  sig_sq_B *= sig_sq_B * omega_0 * omega_1;
200  if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
201  best_sig_sq_B = sig_sq_B;
202  best_t = t;
203  best_omega_0 = omega_0;
204  }
205  }
206  if (H_out != nullptr) *H_out = H;
207  if (omega0_out != nullptr) *omega0_out = best_omega_0;
208  return best_t;
209 }
210 
211 } // namespace tesseract.

◆ OtsuThreshold()

int tesseract::OtsuThreshold ( Pix *  src_pix,
int  left,
int  top,
int  width,
int  height,
int **  thresholds,
int **  hi_values 
)

Definition at line 56 of file otsuthr.cpp.

60  {
61  od.HistogramRectOCL(pixGetData(src_pix), num_channels,
62  pixGetWpl(src_pix) * 4, left, top, width, height,
63  kHistogramSize, histogramAllChannels);
64 
65  // Calculate Threshold from Histogram on cpu
66  for (int ch = 0; ch < num_channels; ++ch) {
67  (*thresholds)[ch] = -1;
68  (*hi_values)[ch] = -1;
69  int *histogram = &histogramAllChannels[kHistogramSize * ch];
70  int H;
71  int best_omega_0;
72  int best_t = OtsuStats(histogram, &H, &best_omega_0);
73  if (best_omega_0 == 0 || best_omega_0 == H) {
74  // This channel is empty.
75  continue;
76  }
77  // To be a convincing foreground we must have a small fraction of H
78  // or to be a convincing background we must have a large fraction of H.
79  // In between we assume this channel contains no thresholding information.
80  int hi_value = best_omega_0 < H * 0.5;
81  (*thresholds)[ch] = best_t;
82  if (best_omega_0 > H * 0.75) {
83  any_good_hivalue = true;
84  (*hi_values)[ch] = 0;
85  } else if (best_omega_0 < H * 0.25) {
86  any_good_hivalue = true;
87  (*hi_values)[ch] = 1;
88  } else {
89  // In case all channels are like this, keep the best of the bad lot.
90  double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
91  if (hi_dist > best_hi_dist) {
92  best_hi_dist = hi_dist;
93  best_hi_value = hi_value;
94  best_hi_index = ch;
95  }
96  }
97  }
98  } else {
99 #endif
100  for (int ch = 0; ch < num_channels; ++ch) {
101  (*thresholds)[ch] = -1;
102  (*hi_values)[ch] = -1;
103  // Compute the histogram of the image rectangle.
104  int histogram[kHistogramSize];
105  HistogramRect(src_pix, ch, left, top, width, height, histogram);
106  int H;
107  int best_omega_0;
108  int best_t = OtsuStats(histogram, &H, &best_omega_0);
109  if (best_omega_0 == 0 || best_omega_0 == H) {
110  // This channel is empty.
111  continue;
112  }
113  // To be a convincing foreground we must have a small fraction of H
114  // or to be a convincing background we must have a large fraction of H.
115  // In between we assume this channel contains no thresholding information.
116  int hi_value = best_omega_0 < H * 0.5;
117  (*thresholds)[ch] = best_t;
118  if (best_omega_0 > H * 0.75) {
119  any_good_hivalue = true;
120  (*hi_values)[ch] = 0;
121  } else if (best_omega_0 < H * 0.25) {
122  any_good_hivalue = true;
123  (*hi_values)[ch] = 1;
124  } else {
125  // In case all channels are like this, keep the best of the bad lot.
126  double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
127  if (hi_dist > best_hi_dist) {
128  best_hi_dist = hi_dist;
129  best_hi_value = hi_value;
130  best_hi_index = ch;
131  }
132  }
133  }
134 #ifdef USE_OPENCL
135  }
136  delete[] histogramAllChannels;
137 #endif // USE_OPENCL
138 
139  if (!any_good_hivalue) {
140  // Use the best of the ones that were not good enough.
141  (*hi_values)[best_hi_index] = best_hi_value;
142  }
143  return num_channels;
144 }
145 
146 // Computes the histogram for the given image rectangle, and the given
147 // single channel. Each channel is always one byte per pixel.
148 // Histogram is always a kHistogramSize(256) element array to count
149 // occurrences of each pixel value.
150 void HistogramRect(Pix* src_pix, int channel,
151  int left, int top, int width, int height,
152  int* histogram) {
153  int num_channels = pixGetDepth(src_pix) / 8;
154  channel = ClipToRange(channel, 0, num_channels - 1);
155  int bottom = top + height;
156  memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
157  int src_wpl = pixGetWpl(src_pix);
158  l_uint32* srcdata = pixGetData(src_pix);
159  for (int y = top; y < bottom; ++y) {
160  const l_uint32* linedata = srcdata + y * src_wpl;

◆ ParamsTrainingFeatureByName()

int tesseract::ParamsTrainingFeatureByName ( const char *  name)

Definition at line 26 of file params_training_featdef.cpp.

26  {
27  if (name == nullptr)
28  return -1;
29  int array_size = sizeof(kParamsTrainingFeatureTypeName) /
30  sizeof(kParamsTrainingFeatureTypeName[0]);
31  for (int i = 0; i < array_size; i++) {
32  if (kParamsTrainingFeatureTypeName[i] == nullptr)
33  continue;
34  if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0)
35  return i;
36  }
37  return -1;
38 }

◆ ParseCommandLineFlags()

void tesseract::ParseCommandLineFlags ( const char *  usage,
int *  argc,
char ***  argv,
const bool  remove_flags 
)

Definition at line 166 of file commandlineflags.cpp.

168  {
169  if (*argc == 1) {
170  printf("USAGE: %s\n", usage);
171  PrintCommandLineFlags();
172  exit(0);
173  }
174 
175  if (*argc > 1 && (!strcmp((*argv)[1], "-v") || !strcmp((*argv)[1], "--version"))) {
176  printf("%s\n", TessBaseAPI::Version());
177  exit(0);
178  }
179 
180  int i;
181  for (i = 1; i < *argc; ++i) {
182  const char* current_arg = (*argv)[i];
183  // If argument does not start with a hyphen then break.
184  if (current_arg[0] != '-') {
185  break;
186  }
187  // Position current_arg after startings hyphens. We treat a sequence of
188  // one or two consecutive hyphens identically.
189  ++current_arg;
190  if (current_arg[0] == '-') {
191  ++current_arg;
192  }
193  // If this is asking for usage, print the help message and abort.
194  if (!strcmp(current_arg, "help")) {
195  printf("Usage:\n %s [OPTION ...]\n\n", usage);
196  PrintCommandLineFlags();
197  exit(0);
198  }
199  // Find the starting position of the value if it was specified in this
200  // string.
201  const char* equals_position = strchr(current_arg, '=');
202  const char* rhs = nullptr;
203  if (equals_position != nullptr) {
204  rhs = equals_position + 1;
205  }
206  // Extract the flag name.
207  STRING lhs;
208  if (equals_position == nullptr) {
209  lhs = current_arg;
210  } else {
211  lhs.assign(current_arg, equals_position - current_arg);
212  }
213  if (!lhs.length()) {
214  tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
215  exit(1);
216  }
217 
218  // Find the flag name in the list of global flags.
219  // int32_t flag
220  int32_t int_val;
221  if (IntFlagExists(lhs.c_str(), &int_val)) {
222  if (rhs != nullptr) {
223  if (!strlen(rhs)) {
224  // Bad input of the format --int_flag=
225  tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
226  exit(1);
227  }
228  if (!SafeAtoi(rhs, &int_val)) {
229  tprintf("ERROR: Could not parse int from %s in flag %s\n",
230  rhs, (*argv)[i]);
231  exit(1);
232  }
233  } else {
234  // We need to parse the next argument
235  if (i + 1 >= *argc) {
236  tprintf("ERROR: Could not find value argument for flag %s\n",
237  lhs.c_str());
238  exit(1);
239  } else {
240  ++i;
241  if (!SafeAtoi((*argv)[i], &int_val)) {
242  tprintf("ERROR: Could not parse int32_t from %s\n", (*argv)[i]);
243  exit(1);
244  }
245  }
246  }
247  SetIntFlagValue(lhs.c_str(), int_val);
248  continue;
249  }
250 
251  // double flag
252  double double_val;
253  if (DoubleFlagExists(lhs.c_str(), &double_val)) {
254  if (rhs != nullptr) {
255  if (!strlen(rhs)) {
256  // Bad input of the format --double_flag=
257  tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
258  exit(1);
259  }
260  if (!SafeAtod(rhs, &double_val)) {
261  tprintf("ERROR: Could not parse double from %s in flag %s\n",
262  rhs, (*argv)[i]);
263  exit(1);
264  }
265  } else {
266  // We need to parse the next argument
267  if (i + 1 >= *argc) {
268  tprintf("ERROR: Could not find value argument for flag %s\n",
269  lhs.c_str());
270  exit(1);
271  } else {
272  ++i;
273  if (!SafeAtod((*argv)[i], &double_val)) {
274  tprintf("ERROR: Could not parse double from %s\n", (*argv)[i]);
275  exit(1);
276  }
277  }
278  }
279  SetDoubleFlagValue(lhs.c_str(), double_val);
280  continue;
281  }
282 
283  // Bool flag. Allow input forms --flag (equivalent to --flag=true),
284  // --flag=false, --flag=true, --flag=0 and --flag=1
285  bool bool_val;
286  if (BoolFlagExists(lhs.c_str(), &bool_val)) {
287  if (rhs == nullptr) {
288  // --flag form
289  bool_val = true;
290  } else {
291  if (!strlen(rhs)) {
292  // Bad input of the format --bool_flag=
293  tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
294  exit(1);
295  }
296  if (!strcmp(rhs, "false") || !strcmp(rhs, "0")) {
297  bool_val = false;
298  } else if (!strcmp(rhs, "true") || !strcmp(rhs, "1")) {
299  bool_val = true;
300  } else {
301  tprintf("ERROR: Could not parse bool from flag %s\n", (*argv)[i]);
302  exit(1);
303  }
304  }
305  SetBoolFlagValue(lhs.c_str(), bool_val);
306  continue;
307  }
308 
309  // string flag
310  const char* string_val;
311  if (StringFlagExists(lhs.c_str(), &string_val)) {
312  if (rhs != nullptr) {
313  string_val = rhs;
314  } else {
315  // Pick the next argument
316  if (i + 1 >= *argc) {
317  tprintf("ERROR: Could not find string value for flag %s\n",
318  lhs.c_str());
319  exit(1);
320  } else {
321  string_val = (*argv)[++i];
322  }
323  }
324  SetStringFlagValue(lhs.c_str(), string_val);
325  continue;
326  }
327 
328  // Flag was not found. Exit with an error message.
329  tprintf("ERROR: Non-existent flag %s\n", (*argv)[i]);
330  exit(1);
331  } // for each argv
332  if (remove_flags) {
333  (*argv)[i - 1] = (*argv)[0];
334  (*argv) += (i - 1);
335  (*argc) -= (i - 1);
336  }
337 }

◆ PrepareDistortedPix()

Pix * tesseract::PrepareDistortedPix ( const Pix *  pix,
bool  perspective,
bool  invert,
bool  white_noise,
bool  smooth_noise,
bool  blur,
int  box_reduction,
TRand randomizer,
GenericVector< TBOX > *  boxes 
)

Definition at line 196 of file degradeimage.cpp.

198  {
199  Pix* blurred = pixBlockconv(distorted, 1, 1);
200  pixDestroy(&distorted);
201  distorted = blurred;
202  }
203  if (perspective)
204  GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
205  if (boxes != nullptr) {
206  for (int b = 0; b < boxes->size(); ++b) {
207  (*boxes)[b].scale(1.0f / box_reduction);
208  if ((*boxes)[b].width() <= 0)
209  (*boxes)[b].set_right((*boxes)[b].left() + 1);
210  }
211  }
212  if (invert && randomizer->SignedRand(1.0) < -0)
213  pixInvert(distorted, distorted);
214  return distorted;
215 }
216 
217 // Distorts anything that has a non-null pointer with the same pseudo-random
218 // perspective distortion. Width and height only need to be set if there
219 // is no pix. If there is a pix, then they will be taken from there.
220 void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
221  Pix** pix, GenericVector<TBOX>* boxes) {
222  if (pix != nullptr && *pix != nullptr) {
223  width = pixGetWidth(*pix);
224  height = pixGetHeight(*pix);
225  }
226  float* im_coeffs = nullptr;
227  float* box_coeffs = nullptr;
228  l_int32 incolor =
229  ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
230  if (pix != nullptr && *pix != nullptr) {
231  // Transform the image.
232  Pix* transformed = pixProjective(*pix, im_coeffs, incolor);

◆ PrintString32WithUnicodes()

std::string tesseract::PrintString32WithUnicodes ( const std::string str)
inline

Definition at line 34 of file normstrngs_test.h.

34  {
35  std::vector<char32> str32 = UNICHAR::UTF8ToUTF32(str.c_str());
36  return absl::StrCat("\"", str, "\" ", CodepointList(str32));
37 }

◆ PrintStringVectorWithUnicodes()

std::string tesseract::PrintStringVectorWithUnicodes ( const std::vector< std::string > &  glyphs)
inline

Definition at line 39 of file normstrngs_test.h.

39  {
40  std::string result;
41  for (const auto& s : glyphs) {
42  result += "Glyph:";
43  result += PrintString32WithUnicodes(s) + "\n";
44  }
45  return result;
46 }

◆ ProjectiveCoeffs()

int tesseract::ProjectiveCoeffs ( int  width,
int  height,
TRand randomizer,
float **  im_coeffs,
float **  box_coeffs 
)

Definition at line 283 of file degradeimage.cpp.

287  {
288  factors[i] = fabs(randomizer->SignedRand(1.0));
289  if (i <= FN_Y3)
290  factors[i] *= 5.0 / 8.0;
291  else
292  factors[i] *= 0.5;
293  factors[i] *= factors[i];
294  }
295  }
296  // Setup "to" points.
297  Pta* dest_pts = ptaCreate(4);
298  ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
299  ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
300  ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,
301  (1 - factors[FN_Y2]) * height);
302  ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,
303  (1 - factors[FN_Y3]) * height);
304  getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
305  getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
306  ptaDestroy(&src_pts);
307  ptaDestroy(&dest_pts);
308  return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
309 }
310 
311 } // namespace tesseract

◆ PSM_BLOCK_FIND_ENABLED()

bool tesseract::PSM_BLOCK_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 200 of file publictypes.h.

200  {
201  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
202 }

◆ PSM_COL_FIND_ENABLED()

bool tesseract::PSM_COL_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 194 of file publictypes.h.

194  {
195  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
196 }

◆ PSM_LINE_FIND_ENABLED()

bool tesseract::PSM_LINE_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 203 of file publictypes.h.

203  {
204  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
205 }

◆ PSM_ORIENTATION_ENABLED()

bool tesseract::PSM_ORIENTATION_ENABLED ( int  pageseg_mode)
inline

Definition at line 191 of file publictypes.h.

191  {
192  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
193 }

◆ PSM_OSD_ENABLED()

bool tesseract::PSM_OSD_ENABLED ( int  pageseg_mode)
inline

Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.

Definition at line 188 of file publictypes.h.

188  {
189  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
190 }

◆ PSM_SPARSE()

bool tesseract::PSM_SPARSE ( int  pageseg_mode)
inline

Definition at line 197 of file publictypes.h.

197  {
198  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
199 }

◆ PSM_WORD_FIND_ENABLED()

bool tesseract::PSM_WORD_FIND_ENABLED ( int  pageseg_mode)
inline

Definition at line 206 of file publictypes.h.

206  {
207  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
208  pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
209 }

◆ read_info()

bool tesseract::read_info ( TFile f,
FontInfo fi 
)

Definition at line 156 of file fontinfo.cpp.

156  {
157  uint32_t size;
158  if (!f->DeSerialize(&size)) return false;
159  char* font_name = new char[size + 1];
160  fi->name = font_name;
161  if (!f->DeSerialize(font_name, size)) return false;
162  font_name[size] = '\0';
163  return f->DeSerialize(&fi->properties);
164 }

◆ read_set()

bool tesseract::read_set ( TFile f,
FontSet fs 
)

Definition at line 229 of file fontinfo.cpp.

229  {
230  if (!f->DeSerialize(&fs->size)) return false;
231  fs->configs = new int[fs->size];
232  return f->DeSerialize(&fs->configs[0], fs->size);
233 }

◆ read_spacing_info()

bool tesseract::read_spacing_info ( TFile f,
FontInfo fi 
)

Definition at line 173 of file fontinfo.cpp.

173  {
174  int32_t vec_size, kern_size;
175  if (!f->DeSerialize(&vec_size)) return false;
176  ASSERT_HOST(vec_size >= 0);
177  if (vec_size == 0) return true;
178  fi->init_spacing(vec_size);
179  for (int i = 0; i < vec_size; ++i) {
180  auto *fs = new FontSpacingInfo();
181  if (!f->DeSerialize(&fs->x_gap_before) ||
182  !f->DeSerialize(&fs->x_gap_after) ||
183  !f->DeSerialize(&kern_size)) {
184  delete fs;
185  return false;
186  }
187  if (kern_size < 0) { // indication of a nullptr entry in fi->spacing_vec
188  delete fs;
189  continue;
190  }
191  if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(f) ||
192  !fs->kerned_x_gaps.DeSerialize(f))) {
193  delete fs;
194  return false;
195  }
196  fi->add_spacing(i, fs);
197  }
198  return true;
199 }

◆ ReadFile()

STRING tesseract::ReadFile ( const std::string filename,
FileReader  reader 
)

Definition at line 57 of file lang_model_helpers.cpp.

57  {
58  if (filename.empty()) return STRING();
60  bool read_result;
61  if (reader == nullptr)
62  read_result = LoadDataFromFile(filename.c_str(), &data);
63  else
64  read_result = (*reader)(filename.c_str(), &data);
65  if (read_result) return STRING(&data[0], data.size());
66  tprintf("Failed to read data from: %s\n", filename.c_str());
67  return STRING();
68 }

◆ RecomputeMarginsAndClearHypotheses()

void tesseract::RecomputeMarginsAndClearHypotheses ( GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  percentile 
)

Definition at line 1583 of file paragraphs.cpp.

1587  {
1588  RowScratchRegisters &sr = (*rows)[i];
1589  if (sr.ri_->num_words == 0)
1590  continue;
1591  lefts.add(sr.lmargin_ + sr.lindent_, 1);
1592  rights.add(sr.rmargin_ + sr.rindent_, 1);
1593  }
1594  int ignorable_left = lefts.ile(ClipToRange(percentile, 0, 100) / 100.0);
1595  int ignorable_right = rights.ile(ClipToRange(percentile, 0, 100) / 100.0);
1596  for (int i = start; i < end; i++) {
1597  RowScratchRegisters &sr = (*rows)[i];
1598  int ldelta = ignorable_left - sr.lmargin_;
1599  sr.lmargin_ += ldelta;
1600  sr.lindent_ -= ldelta;
1601  int rdelta = ignorable_right - sr.rmargin_;
1602  sr.rmargin_ += rdelta;
1603  sr.rindent_ -= rdelta;
1604  }
1605 }
1606 
1607 // Return the median inter-word space in rows[row_start, row_end).
1609  int row_start, int row_end) {
1610  if (row_end < row_start + 1) return 1;
1611  int word_height = (rows[row_start].ri_->lword_box.height() +
1612  rows[row_end - 1].ri_->lword_box.height()) / 2;
1613  int word_width = (rows[row_start].ri_->lword_box.width() +
1614  rows[row_end - 1].ri_->lword_box.width()) / 2;
1615  STATS spacing_widths(0, 5 + word_width);
1616  for (int i = row_start; i < row_end; i++) {
1617  if (rows[i].ri_->num_words > 1) {
1618  spacing_widths.add(rows[i].ri_->average_interword_space, 1);
1619  }
1620  }

◆ RightWordAttributes()

void tesseract::RightWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const STRING utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)

Definition at line 470 of file paragraphs.cpp.

472  {
473  *ends_idea = true;
474  }
475  } else { // Assume utf8 is mostly ASCII
476  if (AsciiLikelyListItem(utf8)) {
477  *is_list = true;
478  *starts_idea = true;
479  }
480  int last_letter = utf8[utf8.size() - 1];
481  if (IsOpeningPunct(last_letter) || IsTerminalPunct(last_letter)) {
482  *ends_idea = true;
483  }
484  }
485 }
486 
487 // =============== Implementation of RowScratchRegisters =====================
488 /* static */
489 void RowScratchRegisters::AppendDebugHeaderFields(
490  GenericVector<STRING> *header) {
491  header->push_back("[lmarg,lind;rind,rmarg]");
492  header->push_back("model");
493 }
494 
495 void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
496  GenericVector<STRING> *dbg) const {
497  char s[30];
498  snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]",
499  lmargin_, lindent_, rindent_, rmargin_);
500  dbg->push_back(s);

◆ RowsFitModel()

bool tesseract::RowsFitModel ( const GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
const ParagraphModel model 
)

Definition at line 1826 of file paragraphs.cpp.

1834  {
1835  // Record patently obvious body text.

◆ SaveDataToFile()

bool tesseract::SaveDataToFile ( const GenericVector< char > &  data,
const char *  filename 
)
inline

Definition at line 362 of file genericvector.h.

363  {
364  FILE* fp = fopen(filename, "wb");
365  if (fp == nullptr) {
366  return false;
367  }
368  bool result =
369  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
370  fclose(fp);
371  return result;
372 }

◆ ScriptPosToString()

const char * tesseract::ScriptPosToString ( enum ScriptPos  script_pos)

Definition at line 202 of file ratngs.cpp.

203  {
204  switch (script_pos) {
205  case SP_NORMAL: return "NORM";
206  case SP_SUBSCRIPT: return "SUB";
207  case SP_SUPERSCRIPT: return "SUPER";
208  case SP_DROPCAP: return "DROPC";
209  }
210  return "SP_UNKNOWN";

◆ Serialize() [1/8]

bool tesseract::Serialize ( FILE *  fp,
const char *  data,
size_t  n = 1 
)

Definition at line 73 of file serialis.cpp.

75  {

◆ Serialize() [2/8]

bool tesseract::Serialize ( FILE *  fp,
const float *  data,
size_t  n = 1 
)

Definition at line 77 of file serialis.cpp.

79  {

◆ Serialize() [3/8]

bool tesseract::Serialize ( FILE *  fp,
const int16_t *  data,
size_t  n = 1 
)

Definition at line 85 of file serialis.cpp.

87  {

◆ Serialize() [4/8]

bool tesseract::Serialize ( FILE *  fp,
const int32_t *  data,
size_t  n = 1 
)

Definition at line 89 of file serialis.cpp.

92  : offset_(0),

◆ Serialize() [5/8]

bool tesseract::Serialize ( FILE *  fp,
const int8_t *  data,
size_t  n = 1 
)

Definition at line 81 of file serialis.cpp.

83  {

◆ Serialize() [6/8]

bool tesseract::Serialize ( FILE *  fp,
const uint16_t *  data,
size_t  n = 1 
)

Definition at line 97 of file serialis.cpp.

98  {
99  if (data_is_owned_)

◆ Serialize() [7/8]

bool tesseract::Serialize ( FILE *  fp,
const uint32_t *  data,
size_t  n = 1 
)

Definition at line 101 of file serialis.cpp.

103  {

◆ Serialize() [8/8]

bool tesseract::Serialize ( FILE *  fp,
const uint8_t *  data,
size_t  n = 1 
)

Definition at line 93 of file serialis.cpp.

96  {}

◆ SetBlobStrokeWidth()

void tesseract::SetBlobStrokeWidth ( Pix *  pix,
BLOBNBOX blob 
)

Definition at line 67 of file tordmain.cpp.

69  {
70  // Cut the blob rectangle into a Pix.
71  int pix_height = pixGetHeight(pix);
72  const TBOX& box = blob->bounding_box();
73  int width = box.width();
74  int height = box.height();
75  Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(),
76  width, height);
77  Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
78  boxDestroy(&blob_pix_box);
79  Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
80  pixDestroy(&pix_blob);
81  // Compute the stroke widths.
82  uint32_t* data = pixGetData(dist_pix);
83  int wpl = pixGetWpl(dist_pix);
84  // Horizontal width of stroke.
85  STATS h_stats(0, width + 1);
86  for (int y = 0; y < height; ++y) {
87  uint32_t* pixels = data + y*wpl;
88  int prev_pixel = 0;
89  int pixel = GET_DATA_BYTE(pixels, 0);
90  for (int x = 1; x < width; ++x) {
91  int next_pixel = GET_DATA_BYTE(pixels, x);
92  // We are looking for a pixel that is equal to its vertical neighbours,
93  // yet greater than its left neighbour.
94  if (prev_pixel < pixel &&
95  (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
96  (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
97  if (pixel > next_pixel) {
98  // Single local max, so an odd width.
99  h_stats.add(pixel * 2 - 1, 1);
100  } else if (pixel == next_pixel && x + 1 < width &&
101  pixel > GET_DATA_BYTE(pixels, x + 1)) {
102  // Double local max, so an even width.
103  h_stats.add(pixel * 2, 1);
104  }
105  }
106  prev_pixel = pixel;
107  pixel = next_pixel;
108  }
109  }
110  // Vertical width of stroke.
111  STATS v_stats(0, height + 1);
112  for (int x = 0; x < width; ++x) {
113  int prev_pixel = 0;
114  int pixel = GET_DATA_BYTE(data, x);
115  for (int y = 1; y < height; ++y) {
116  uint32_t* pixels = data + y*wpl;
117  int next_pixel = GET_DATA_BYTE(pixels, x);
118  // We are looking for a pixel that is equal to its horizontal neighbours,
119  // yet greater than its upper neighbour.
120  if (prev_pixel < pixel &&
121  (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
122  (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
123  if (pixel > next_pixel) {
124  // Single local max, so an odd width.
125  v_stats.add(pixel * 2 - 1, 1);
126  } else if (pixel == next_pixel && y + 1 < height &&
127  pixel > GET_DATA_BYTE(pixels + wpl, x)) {
128  // Double local max, so an even width.
129  v_stats.add(pixel * 2, 1);
130  }
131  }
132  prev_pixel = pixel;
133  pixel = next_pixel;
134  }
135  }
136  pixDestroy(&dist_pix);
137  // Store the horizontal and vertical width in the blob, keeping both
138  // widths if there is enough information, otherwise only the one with
139  // the most samples.
140  // If there are insufficient samples, store zero, rather than using
141  // 2*area/perimeter, as the numbers that gives do not match the numbers
142  // from the distance method.
143  if (h_stats.get_total() >= (width + height) / 4) {
144  blob->set_horz_stroke_width(h_stats.ile(0.5f));
145  if (v_stats.get_total() >= (width + height) / 4)
146  blob->set_vert_stroke_width(v_stats.ile(0.5f));
147  else
148  blob->set_vert_stroke_width(0.0f);
149  } else {
150  if (v_stats.get_total() >= (width + height) / 4 ||
151  v_stats.get_total() > h_stats.get_total()) {
152  blob->set_horz_stroke_width(0.0f);
153  blob->set_vert_stroke_width(v_stats.ile(0.5f));
154  } else {
155  blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f)
156  : 0.0f);
157  blob->set_vert_stroke_width(0.0f);
158  }

◆ SetPropertiesForInputFile()

void tesseract::SetPropertiesForInputFile ( const std::string script_dir,
const std::string input_unicharset_file,
const std::string output_unicharset_file,
const std::string output_xheights_file 
)

Definition at line 183 of file unicharset_training_utils.cpp.

186  {
187  UNICHARSET unicharset;
188 
189  // Load the input unicharset
190  unicharset.load_from_file(input_unicharset_file.c_str());
191  tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(),
192  input_unicharset_file.c_str());
193 
194  // Set unichar properties
195  tprintf("Setting unichar properties\n");
196  SetupBasicProperties(true, false, &unicharset);
197  tprintf("Setting script properties\n");
198  SetScriptProperties(script_dir, &unicharset);
199  if (!output_xheights_file.empty()) {
200  std::string xheights_str = GetXheightString(script_dir, unicharset);
201  File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
202  }
203 
204  // Write the output unicharset
205  tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
206  unicharset.save_to_file(output_unicharset_file.c_str());
207 }

◆ SetScriptProperties()

void tesseract::SetScriptProperties ( const std::string script_dir,
UNICHARSET unicharset 
)

Definition at line 143 of file unicharset_training_utils.cpp.

143  {
144  for (int s = 0; s < unicharset->get_script_table_size(); ++s) {
145  // Load the unicharset for the script if available.
146  std::string filename = script_dir + "/" +
147  unicharset->get_script_from_script_id(s) + ".unicharset";
148  UNICHARSET script_set;
149  if (script_set.load_from_file(filename.c_str())) {
150  unicharset->SetPropertiesFromOther(script_set);
151  } else if (s != unicharset->common_sid() && s != unicharset->null_sid()) {
152  tprintf("Failed to load script unicharset from:%s\n", filename.c_str());
153  }
154  }
155  for (int c = SPECIAL_UNICHAR_CODES_COUNT; c < unicharset->size(); ++c) {
156  if (unicharset->PropertiesIncomplete(c)) {
157  tprintf("Warning: properties incomplete for index %d = %s\n", c,
158  unicharset->id_to_unichar(c));
159  }
160  }
161 }

◆ SetupBasicProperties() [1/2]

void tesseract::SetupBasicProperties ( bool  report_errors,
bool  decompose,
UNICHARSET unicharset 
)

Definition at line 40 of file unicharset_training_utils.cpp.

41  {
42  for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
43  // Convert any custom ligatures.
44  const char* unichar_str = unicharset->id_to_unichar(unichar_id);
45  for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
46  if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
47  unichar_str = UNICHARSET::kCustomLigatures[i][0];
48  break;
49  }
50  }
51 
52  // Convert the unichar to UTF32 representation
53  std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);
54 
55  // Assume that if the property is true for any character in the string,
56  // then it holds for the whole "character".
57  bool unichar_isalpha = false;
58  bool unichar_islower = false;
59  bool unichar_isupper = false;
60  bool unichar_isdigit = false;
61  bool unichar_ispunct = false;
62 
63  for (char32 u_ch : uni_vector) {
64  if (u_isalpha(u_ch)) unichar_isalpha = true;
65  if (u_islower(u_ch)) unichar_islower = true;
66  if (u_isupper(u_ch)) unichar_isupper = true;
67  if (u_isdigit(u_ch)) unichar_isdigit = true;
68  if (u_ispunct(u_ch)) unichar_ispunct = true;
69  }
70 
71  unicharset->set_isalpha(unichar_id, unichar_isalpha);
72  unicharset->set_islower(unichar_id, unichar_islower);
73  unicharset->set_isupper(unichar_id, unichar_isupper);
74  unicharset->set_isdigit(unichar_id, unichar_isdigit);
75  unicharset->set_ispunctuation(unichar_id, unichar_ispunct);
76 
78  unicharset->set_script(unichar_id, uscript_getName(
79  uscript_getScript(uni_vector[0], err)));
80 
81  const int num_code_points = uni_vector.size();
82  // Obtain the lower/upper case if needed and record it in the properties.
83  unicharset->set_other_case(unichar_id, unichar_id);
84  if (unichar_islower || unichar_isupper) {
85  std::vector<char32> other_case(num_code_points, 0);
86  for (int i = 0; i < num_code_points; ++i) {
87  // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.
88  // However since they deal with UChars (so need a conversion function
89  // from char32 or UTF8string) and require a meaningful locale string,
90  // for now u_tolower()/u_toupper() are used.
91  other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) :
92  u_tolower(uni_vector[i]);
93  }
94  std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);
95  UNICHAR_ID other_case_id =
96  unicharset->unichar_to_id(other_case_uch.c_str());
97  if (other_case_id != INVALID_UNICHAR_ID) {
98  unicharset->set_other_case(unichar_id, other_case_id);
99  } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) {
100  tprintf("Other case %s of %s is not in unicharset\n",
101  other_case_uch.c_str(), unichar_str);
102  }
103  }
104 
105  // Set RTL property and obtain mirror unichar ID from ICU.
106  std::vector<char32> mirrors(num_code_points, 0);
107  for (int i = 0; i < num_code_points; ++i) {
108  mirrors[i] = u_charMirror(uni_vector[i]);
109  if (i == 0) { // set directionality to that of the 1st code point
110  unicharset->set_direction(unichar_id,
111  static_cast<UNICHARSET::Direction>(
112  u_charDirection(uni_vector[i])));
113  }
114  }
115  std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);
116  UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());
117  if (mirror_uch_id != INVALID_UNICHAR_ID) {
118  unicharset->set_mirror(unichar_id, mirror_uch_id);
119  } else if (report_errors) {
120  tprintf("Mirror %s of %s is not in unicharset\n",
121  mirror_uch.c_str(), unichar_str);
122  }
123 
124  // Record normalized version of this unichar.
125  std::string normed_str;
126  if (unichar_id != 0 &&
131  unichar_str, &normed_str) &&
132  !normed_str.empty()) {
133  unicharset->set_normed(unichar_id, normed_str.c_str());
134  } else {
135  unicharset->set_normed(unichar_id, unichar_str);
136  }
137  ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size());
138  }
139  unicharset->post_load_setup();
140 }

◆ SetupBasicProperties() [2/2]

void tesseract::SetupBasicProperties ( bool  report_errors,
UNICHARSET unicharset 
)
inline

Definition at line 38 of file unicharset_training_utils.h.

38  {
39  SetupBasicProperties(report_errors, false, unicharset);
40 }

◆ SoftmaxInPlace()

template<typename T >
void tesseract::SoftmaxInPlace ( int  n,
T *  inout 
)
inline

Definition at line 146 of file functions.h.

146  {
147  if (n <= 0) return;
148  // A limit on the negative range input to exp to guarantee non-zero output.
149  const T kMaxSoftmaxActivation = 86.0f;
150 
151  T max_output = inout[0];
152  for (int i = 1; i < n; i++) {
153  T output = inout[i];
154  if (output > max_output) max_output = output;
155  }
156  T prob_total = 0.0;
157  for (int i = 0; i < n; i++) {
158  T prob = inout[i] - max_output;
159  prob = exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
160  prob_total += prob;
161  inout[i] = prob;
162  }
163  if (prob_total > 0.0) {
164  for (int i = 0; i < n; i++) inout[i] /= prob_total;
165  }
166 }

◆ sort_cmp()

template<typename T >
int tesseract::sort_cmp ( const void *  t1,
const void *  t2 
)

Definition at line 384 of file genericvector.h.

384  {
385  const T* a = static_cast<const T*>(t1);
386  const T* b = static_cast<const T*>(t2);
387  if (*a < *b) {
388  return -1;
389  }
390  if (*b < *a) {
391  return 1;
392  }
393  return 0;
394 }

◆ sort_ptr_cmp()

template<typename T >
int tesseract::sort_ptr_cmp ( const void *  t1,
const void *  t2 
)

Definition at line 401 of file genericvector.h.

401  {
402  const T* a = *static_cast<T* const*>(t1);
403  const T* b = *static_cast<T* const*>(t2);
404  if (*a < *b) {
405  return -1;
406  }
407  if (*b < *a) {
408  return 1;
409  }
410  return 0;
411 }

◆ SortByBoxBottom()

template<class BBC >
int tesseract::SortByBoxBottom ( const void *  void1,
const void *  void2 
)

Definition at line 407 of file bbgrid.h.

407  {
408  // The void*s are actually doubly indirected, so get rid of one level.
409  const BBC* p1 = *static_cast<const BBC* const*>(void1);
410  const BBC* p2 = *static_cast<const BBC* const*>(void2);
411  int result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
412  if (result != 0)
413  return result;
414  result = p1->bounding_box().top() - p2->bounding_box().top();
415  if (result != 0)
416  return result;
417  result = p1->bounding_box().left() - p2->bounding_box().left();
418  if (result != 0)
419  return result;
420  return p1->bounding_box().right() - p2->bounding_box().right();
421 }

◆ SortByBoxLeft()

template<class BBC >
int tesseract::SortByBoxLeft ( const void *  void1,
const void *  void2 
)

Definition at line 371 of file bbgrid.h.

371  {
372  // The void*s are actually doubly indirected, so get rid of one level.
373  const BBC* p1 = *static_cast<const BBC* const*>(void1);
374  const BBC* p2 = *static_cast<const BBC* const*>(void2);
375  int result = p1->bounding_box().left() - p2->bounding_box().left();
376  if (result != 0)
377  return result;
378  result = p1->bounding_box().right() - p2->bounding_box().right();
379  if (result != 0)
380  return result;
381  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
382  if (result != 0)
383  return result;
384  return p1->bounding_box().top() - p2->bounding_box().top();
385 }

◆ SortByRating()

template<class BLOB_CHOICE >
int tesseract::SortByRating ( const void *  void1,
const void *  void2 
)

Definition at line 81 of file pieces.cpp.

◆ SortByUnicharID()

template<class BLOB_CHOICE >
int tesseract::SortByUnicharID ( const void *  void1,
const void *  void2 
)

Definition at line 73 of file pieces.cpp.

78  {

◆ SortRightToLeft()

template<class BBC >
int tesseract::SortRightToLeft ( const void *  void1,
const void *  void2 
)

Definition at line 389 of file bbgrid.h.

389  {
390  // The void*s are actually doubly indirected, so get rid of one level.
391  const BBC* p1 = *static_cast<const BBC* const*>(void1);
392  const BBC* p2 = *static_cast<const BBC* const*>(void2);
393  int result = p2->bounding_box().right() - p1->bounding_box().right();
394  if (result != 0)
395  return result;
396  result = p2->bounding_box().left() - p1->bounding_box().left();
397  if (result != 0)
398  return result;
399  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
400  if (result != 0)
401  return result;
402  return p1->bounding_box().top() - p2->bounding_box().top();
403 }

◆ SpanUTF8NotWhitespace()

unsigned int tesseract::SpanUTF8NotWhitespace ( const char *  text)

Definition at line 259 of file normstrngs.cpp.

276  {

◆ SpanUTF8Whitespace()

unsigned int tesseract::SpanUTF8Whitespace ( const char *  text)

Definition at line 249 of file normstrngs.cpp.

253  {
254  return IsValidCodepoint(ch) &&
255  !(ch >= 0xFDD0 && ch <= 0xFDEF) && // Noncharacters.
256  !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) &&
257  !(ch >= 0x2FFFE && ch <= 0x2FFFF) &&

◆ StrongModel()

bool tesseract::StrongModel ( const ParagraphModel model)
inline

Definition at line 70 of file paragraphs_internal.h.

71  {
72  return model != nullptr && model != kCrownLeft && model != kCrownRight;

◆ SumVectors()

void tesseract::SumVectors ( int  n,
const double *  v1,
const double *  v2,
const double *  v3,
const double *  v4,
const double *  v5,
double *  sum 
)
inline

Definition at line 192 of file functions.h.

194  {
195  for (int i = 0; i < n; ++i) {
196  sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i];
197  }
198 }

◆ Tanh()

double tesseract::Tanh ( double  x)
inline

Definition at line 43 of file functions.h.

43  {
44  if (x < 0.0) return -Tanh(-x);
45  x *= kScaleFactor;
46  unsigned index = static_cast<unsigned>(x);
47  if (index >= (kTableSize - 1)) return 1.0;
48  double tanh_i0 = TanhTable[index];
49  double tanh_i1 = TanhTable[index + 1];
50  // Linear interpolation.
51  return tanh_i0 + (tanh_i1 - tanh_i0) * (x - index);
52 }

◆ TEST_F() [1/34]

tesseract::TEST_F ( EquationFinderTest  ,
CheckSeedBlobsCount   
)

Definition at line 342 of file equationdetect_test.cc.

342  {
343  TBOX box(0, 950, 999, 999);
344  ColPartition* part1 =
345  ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
346  ColPartition* part2 =
347  ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
348  ColPartition* part3 =
349  ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
350  ColPartition* part4 =
351  ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
352 
353  // Part 1: 8 math, 0 digit, 20 total.
354  equation_det_->AddMathDigitBlobs(8, 0, 20, part1);
355  EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part1));
356 
357  // Part 2: 1 math, 8 digit, 20 total.
358  equation_det_->AddMathDigitBlobs(1, 8, 20, part2);
359  EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part2));
360 
361  // Part 3: 3 math, 8 digit, 8 total.
362  equation_det_->AddMathDigitBlobs(3, 8, 20, part3);
363  EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part3));
364 
365  // Part 4: 8 math, 0 digit, 8 total.
366  equation_det_->AddMathDigitBlobs(0, 0, 8, part4);
367  EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part4));
368 
369  // Release memory.
370  part1->DeleteBoxes();
371  delete (part1);
372  part2->DeleteBoxes();
373  delete (part2);
374  part3->DeleteBoxes();
375  delete (part3);
376  part4->DeleteBoxes();
377  delete (part4);
378 }

◆ TEST_F() [2/34]

tesseract::TEST_F ( EquationFinderTest  ,
ComputeCPsSuperBBox   
)

Definition at line 420 of file equationdetect_test.cc.

420  {
421  Pix* pix = pixCreate(1001, 1001, 1);
422  equation_det_->SetPixBinary(pix);
423  ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
424 
425  TBOX box1(0, 0, 999, 99);
426  ColPartition* part1 =
427  ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
428  TBOX box2(0, 100, 499, 199);
429  ColPartition* part2 =
430  ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
431  TBOX box3(500, 100, 999, 199);
432  ColPartition* part3 =
433  ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
434  TBOX box4(0, 200, 999, 299);
435  ColPartition* part4 =
436  ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
437  TBOX box5(0, 900, 999, 999);
438  ColPartition* part5 =
439  ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
440 
441  // Add part1->part3 into part_grid and test.
442  part_grid.InsertBBox(true, true, part1);
443  part_grid.InsertBBox(true, true, part2);
444  part_grid.InsertBBox(true, true, part3);
445  TBOX super_box(0, 0, 999, 199);
446  equation_det_->TestComputeCPsSuperBBox(super_box, &part_grid);
447 
448  // Add part4 and test.
449  part_grid.InsertBBox(true, true, part4);
450  TBOX super_box2(0, 0, 999, 299);
451  equation_det_->TestComputeCPsSuperBBox(super_box2, &part_grid);
452 
453  // Add part5 and test.
454  part_grid.InsertBBox(true, true, part5);
455  TBOX super_box3(0, 0, 999, 999);
456  equation_det_->TestComputeCPsSuperBBox(super_box3, &part_grid);
457 
458  // Release memory.
459  part1->DeleteBoxes();
460  delete (part1);
461  part2->DeleteBoxes();
462  delete (part2);
463  part3->DeleteBoxes();
464  delete (part3);
465  part4->DeleteBoxes();
466  delete (part4);
467  part5->DeleteBoxes();
468  delete (part5);
469 }

◆ TEST_F() [3/34]

tesseract::TEST_F ( EquationFinderTest  ,
ComputeForegroundDensity   
)

Definition at line 380 of file equationdetect_test.cc.

380  {
381  // Create the pix with top half foreground, bottom half background.
382  int width = 1024, height = 768;
383  Pix* pix = pixCreate(width, height, 1);
384  pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
385  TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
386  box3(100, height - 40, 140, height);
387  equation_det_->SetPixBinary(pix);
388 
389  // Verify
390  EXPECT_NEAR(0.0, equation_det_->RunComputeForegroundDensity(box1), 0.0001f);
391  EXPECT_NEAR(0.5, equation_det_->RunComputeForegroundDensity(box2), 0.0001f);
392  EXPECT_NEAR(1.0, equation_det_->RunComputeForegroundDensity(box3), 0.0001f);
393 }

◆ TEST_F() [4/34]

tesseract::TEST_F ( EquationFinderTest  ,
CountAlignment   
)

Definition at line 395 of file equationdetect_test.cc.

395  {
396  GenericVector<int> vec;
397  vec.push_back(1);
398  vec.push_back(1);
399  vec.push_back(1);
400  vec.push_back(100);
401  vec.push_back(200);
402  vec.push_back(200);
403 
404  // Test the right point.
405  EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 1));
406  EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 100));
407  EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 200));
408 
409  // Test the near neighbors.
410  EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 3));
411  EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 99));
412  EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 202));
413 
414  // Test the far neighbors.
415  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 150));
416  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 50));
417  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 250));
418 }

◆ TEST_F() [5/34]

tesseract::TEST_F ( EquationFinderTest  ,
EstimateTypeForUnichar   
)

Definition at line 233 of file equationdetect_test.cc.

233  {
234  // Test abc characters.
235  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("a"));
236  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("c"));
237 
238  // Test punctuation characters.
239  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("'"));
240  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar(","));
241 
242  // Test digits.
243  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("1"));
244  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("4"));
245  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("|"));
246 
247  // Test math symbols.
248  EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("("));
249  EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("+"));
250 }

◆ TEST_F() [6/34]

tesseract::TEST_F ( EquationFinderTest  ,
IdentifySpecialText   
)

Definition at line 181 of file equationdetect_test.cc.

181  {
182 #if 1
183  GTEST_SKIP();
184 #else // TODO: missing equ_gt1.tif
185  // Load Image.
186  std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
187  Pix* pix_binary = pixRead(imagefile.c_str());
188  CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
189 
190  // Get components.
191  BLOCK_LIST blocks;
192  TO_BLOCK_LIST to_blocks;
193  AddPageBlock(pix_binary, &blocks);
194  Textord* textord = tesseract_->mutable_textord();
195  textord->find_components(pix_binary, &blocks, &to_blocks);
196 
197  // Identify special texts from to_blocks.
198  TO_BLOCK_IT to_block_it(&to_blocks);
199  std::map<int, int> stt_count;
200  for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list();
201  to_block_it.forward()) {
202  TO_BLOCK* to_block = to_block_it.data();
203  BLOBNBOX_IT blob_it(&(to_block->blobs));
204  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
205  BLOBNBOX* blob = blob_it.data();
206  // blob->set_special_text_type(BSTT_NONE);
207  equation_det_->RunIdentifySpecialText(blob, 0);
208  tensorflow::gtl::InsertIfNotPresent(&stt_count, blob->special_text_type(), 0);
209  stt_count[blob->special_text_type()]++;
210  }
211  }
212 
213  // Verify the number, but allow a range of +/- kCountRange before squealing.
214  const int kCountRange = 3;
215  EXPECT_GE(39 + kCountRange, stt_count[BSTT_NONE]);
216  EXPECT_LE(39 - kCountRange, stt_count[BSTT_NONE]);
217 
218  // if you count all the subscripts etc, there are ~45 italic chars.
219  EXPECT_GE(45 + kCountRange, stt_count[BSTT_ITALIC]);
220  EXPECT_LE(45 - kCountRange, stt_count[BSTT_ITALIC]);
221  EXPECT_GE(41 + kCountRange, stt_count[BSTT_DIGIT]);
222  EXPECT_LE(41 - kCountRange, stt_count[BSTT_DIGIT]);
223  EXPECT_GE(50 + kCountRange, stt_count[BSTT_MATH]);
224  EXPECT_LE(50 - kCountRange, stt_count[BSTT_MATH]);
225  EXPECT_GE(10 + kCountRange, stt_count[BSTT_UNCLEAR]);
226  EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
227 
228  // Release memory.
229  pixDestroy(&pix_binary);
230 #endif
231 }

◆ TEST_F() [7/34]

tesseract::TEST_F ( EquationFinderTest  ,
IsIndented   
)

Definition at line 252 of file equationdetect_test.cc.

252  {
253  ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
254 
255  // Create five ColPartitions:
256  // part 1: ************
257  // part 2: *********
258  // part 3: *******
259  // part 4: *****
260  //
261  // part 5: ********
262  TBOX box1(0, 950, 999, 999);
263  ColPartition* part1 =
264  ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
265  part_grid.InsertBBox(true, true, part1);
266  TBOX box2(300, 920, 900, 940);
267  ColPartition* part2 =
268  ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
269  part_grid.InsertBBox(true, true, part2);
270  TBOX box3(0, 900, 600, 910);
271  ColPartition* part3 =
272  ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
273  part_grid.InsertBBox(true, true, part3);
274  TBOX box4(300, 890, 600, 899);
275  ColPartition* part4 =
276  ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
277  part_grid.InsertBBox(true, true, part4);
278  TBOX box5(300, 500, 900, 510);
279  ColPartition* part5 =
280  ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
281  part_grid.InsertBBox(true, true, part5);
282 
283  // Test
284  // part1 should be no indent.
285  EXPECT_EQ(EquationDetect::NO_INDENT,
286  equation_det_->RunIsIndented(&part_grid, part1));
287  // part2 should be left indent in terms of part1.
288  EXPECT_EQ(EquationDetect::LEFT_INDENT,
289  equation_det_->RunIsIndented(&part_grid, part2));
290  // part3 should be right indent.
291  EXPECT_EQ(EquationDetect::RIGHT_INDENT,
292  equation_det_->RunIsIndented(&part_grid, part3));
293  // part4 should be both indented.
294  EXPECT_EQ(EquationDetect::BOTH_INDENT,
295  equation_det_->RunIsIndented(&part_grid, part4));
296  // part5 should be no indent because it is too far from part1.
297  EXPECT_EQ(EquationDetect::NO_INDENT,
298  equation_det_->RunIsIndented(&part_grid, part5));
299 
300  // Release memory.
301  part1->DeleteBoxes();
302  delete (part1);
303  part2->DeleteBoxes();
304  delete (part2);
305  part3->DeleteBoxes();
306  delete (part3);
307  part4->DeleteBoxes();
308  delete (part4);
309  part5->DeleteBoxes();
310  delete (part5);
311 }

◆ TEST_F() [8/34]

tesseract::TEST_F ( EquationFinderTest  ,
IsNearSmallNeighbor   
)

Definition at line 313 of file equationdetect_test.cc.

313  {
314  // Create four tboxes:
315  // part 1, part 2
316  // ***** *****
317  // part 3: *****
318  //
319  // part 4: *****************
320  TBOX box1(0, 950, 499, 999);
321  TBOX box2(500, 950, 999, 998);
322  TBOX box3(0, 900, 499, 949);
323  TBOX box4(0, 550, 499, 590);
324 
325  // Test
326  // box2 should be box1's near neighbor but not vice versa.
327  EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box2));
328  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box1));
329  // box1 and box3 should be near neighbors of each other.
330  EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box3));
331  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
332  // box2 and box3 should not be near neighbors of each other.
333  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
334  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box2));
335 
336  // box4 should not be the near neighbor of any one.
337  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box1, box4));
338  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box4));
339  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box4));
340 }

◆ TEST_F() [9/34]

tesseract::TEST_F ( EquationFinderTest  ,
SplitCPHor   
)

Definition at line 506 of file equationdetect_test.cc.

506  {
507  TBOX box(0, 0, 999, 99);
508  ColPartition* part =
509  ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
510  part->DeleteBoxes();
511  part->set_median_width(10);
512  GenericVector<ColPartition*> parts_splitted;
513 
514  // Test an empty part.
515  equation_det_->RunSplitCPHor(part, &parts_splitted);
516  EXPECT_TRUE(parts_splitted.empty());
517  // Test with one blob.
518  AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
519 
520  equation_det_->RunSplitCPHor(part, &parts_splitted);
521  EXPECT_EQ(1, parts_splitted.size());
522  EXPECT_TRUE(TBOX(0, 0, 10, 50) == parts_splitted[0]->bounding_box());
523 
524  // Add more blob and test.
525  AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
526  AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
527  AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
528  AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
529  AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
530  equation_det_->RunSplitCPHor(part, &parts_splitted);
531 
532  // Verify.
533  EXPECT_EQ(3, parts_splitted.size());
534  EXPECT_TRUE(TBOX(0, 0, 30, 60) == parts_splitted[0]->bounding_box());
535  EXPECT_TRUE(TBOX(100, 0, 140, 45) == parts_splitted[1]->bounding_box());
536  EXPECT_TRUE(TBOX(500, 0, 540, 35) == parts_splitted[2]->bounding_box());
537 
538  parts_splitted.delete_data_pointers();
539  part->DeleteBoxes();
540  delete (part);
541 }

◆ TEST_F() [10/34]

tesseract::TEST_F ( EquationFinderTest  ,
SplitCPHorLite   
)

Definition at line 471 of file equationdetect_test.cc.

471  {
472  TBOX box(0, 0, 999, 99);
473  ColPartition* part =
474  ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
475  part->DeleteBoxes();
476  part->set_median_width(10);
477  GenericVector<TBOX> splitted_boxes;
478 
479  // Test an empty part.
480  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
481  EXPECT_TRUE(splitted_boxes.empty());
482 
483  // Test with one blob.
484  AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
485  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
486  EXPECT_EQ(1, splitted_boxes.size());
487  EXPECT_TRUE(TBOX(0, 0, 10, 50) == splitted_boxes[0]);
488 
489  // Add more blob and test.
490  AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
491  AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
492  AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
493  AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
494  AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
495  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
496  // Verify.
497  EXPECT_EQ(3, splitted_boxes.size());
498  EXPECT_TRUE(TBOX(0, 0, 30, 60) == splitted_boxes[0]);
499  EXPECT_TRUE(TBOX(100, 0, 140, 45) == splitted_boxes[1]);
500  EXPECT_TRUE(TBOX(500, 0, 540, 35) == splitted_boxes[2]);
501 
502  part->DeleteBoxes();
503  delete (part);
504 }

◆ TEST_F() [11/34]

tesseract::TEST_F ( HeapTest  ,
DoublePtrTest   
)

Definition at line 187 of file heap_test.cc.

187  {
188  DoublePtr ptr1;
189  DoublePtr ptr2;
190  ptr1.Connect(&ptr2);
191  // Check that the correct copy constructor is used.
192  DoublePtr ptr3(ptr1);
193  EXPECT_EQ(&ptr3, ptr3.OtherEnd()->OtherEnd());
194  EXPECT_TRUE(ptr1.OtherEnd() == nullptr);
195  // Check that the correct operator= is used.
196  ptr1 = ptr3;
197  EXPECT_EQ(&ptr1, ptr1.OtherEnd()->OtherEnd());
198  EXPECT_TRUE(ptr3.OtherEnd() == nullptr);
199 }

◆ TEST_F() [12/34]

tesseract::TEST_F ( HeapTest  ,
MixedTest   
)

Definition at line 95 of file heap_test.cc.

95  {
96  GenericHeap<IntKDPair> heap;
97  KDVector v;
98  // Push the test data onto both the heap and the KDVector.
99  PushTestData(&heap, &v);
100  // Sort the vector and remove the first 5 values from both heap and v.
101  v.sort();
102  for (int i = 0; i < 5; ++i) {
103  heap.Pop(nullptr);
104  v.remove(0);
105  }
106  // Push the test data onto both the heap and the KDVector.
107  PushTestData(&heap, &v);
108  // Heap and vector should still match!
109  VerifyHeapVectorMatch(&heap, &v);
110 }

◆ TEST_F() [13/34]

tesseract::TEST_F ( HeapTest  ,
PopWorstTest   
)

Definition at line 114 of file heap_test.cc.

114  {
115  GenericHeap<IntKDPair> heap;
116  KDVector v;
117  // Push the test data onto both the heap and the KDVector.
118  PushTestData(&heap, &v);
119  // Get the worst element off the heap.
120  IntKDPair pair;
121  heap.PopWorst(&pair);
122  EXPECT_EQ(pair.key, 65536);
123  EXPECT_EQ(pair.data, 6);
124  // Sort and remove the worst element from the vector.
125  v.sort();
126  v.truncate(v.size() - 1);
127  // After that they should still match!
128  VerifyHeapVectorMatch(&heap, &v);
129 }

◆ TEST_F() [14/34]

tesseract::TEST_F ( HeapTest  ,
RevalueTest   
)

Definition at line 133 of file heap_test.cc.

133  {
134  // Here the data element of the pair is a DoublePtr, which links the entries
135  // in the vector and heap, and we test a MAX heap.
136  typedef KDPairDec<int, DoublePtr> PtrPair;
137  GenericHeap<PtrPair> heap;
139  // Push the test data onto both the heap and the vector.
140  for (size_t i = 0; i < ARRAYSIZE(test_data); ++i) {
141  PtrPair h_pair;
142  h_pair.key = test_data[i];
143  PtrPair v_pair;
144  v_pair.key = test_data[i];
145  h_pair.data.Connect(&v_pair.data);
146  heap.Push(&h_pair);
147  v.push_back(v_pair);
148  }
149  // Test changes both ways. Index 0 is 8, so change it to -1.
150  v[0].key = -1;
151  // v[0].data.OtherEnd() is a pointer to the data element in the appropriate
152  // heap entry, wherever it may be. We can change its value via that pointer.
153  // Without Reshuffle, that would be a terribly bad thing to do, as it violates
154  // the heap invariant, making the heap corrupt.
155  PtrPair* pair_ptr = PtrPair::RecastDataPointer(v[0].data.OtherEnd());
156  pair_ptr->key = v[0].key;
157  heap.Reshuffle(pair_ptr);
158  // Index 1 is 1. Change to 32767.
159  v[1].key = 32767;
160  pair_ptr = PtrPair::RecastDataPointer(v[1].data.OtherEnd());
161  pair_ptr->key = v[1].key;
162  heap.Reshuffle(pair_ptr);
163  // After the changes, popping the heap should still match the sorted order
164  // of the vector.
165  v.sort();
166  EXPECT_GT(v[0].key, v.back().key);
167  for (int i = 0; i < v.size(); ++i) {
168  EXPECT_EQ(v[i].key, heap.PeekTop().key);
169  EXPECT_FALSE(heap.empty());
170  heap.Pop(nullptr);
171  }
172  EXPECT_TRUE(heap.empty());
173 }

◆ TEST_F() [15/34]

tesseract::TEST_F ( HeapTest  ,
SortTest   
)

Definition at line 82 of file heap_test.cc.

82  {
83  GenericHeap<IntKDPair> heap;
84  EXPECT_TRUE(heap.empty());
85  KDVector v;
86  EXPECT_EQ(heap.size(), v.size());
87  // Push the test data onto both the heap and the KDVector.
88  PushTestData(&heap, &v);
89  VerifyHeapVectorMatch(&heap, &v);
90 }

◆ TEST_F() [16/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
BasicTest   
)

Definition at line 29 of file lstm_test.cc.

29  {
30  // A Convolver sliding window classifier without LSTM.
31  SetupTrainer(
32  "[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
33  "Ct1,1,64O1c1]",
34  "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false,
35  2e-4, false, "eng");
36  double non_lstm_err = TrainIterations(kTrainerIterations * 4);
37  EXPECT_LT(non_lstm_err, 98);
38  LOG(INFO) << "********** Expected < 98 ************\n" ;
39 
40  // A basic single-layer, single direction LSTM.
41  SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false);
42  double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
43  EXPECT_LT(lstm_uni_err, 86);
44  LOG(INFO) << "********** Expected < 86 ************\n" ;
45  // Beats the convolver. (Although it does have a lot more weights, it still
46  // iterates faster.)
47  EXPECT_LT(lstm_uni_err, non_lstm_err);
48 }

◆ TEST_F() [17/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
BidiTest   
)

Definition at line 61 of file lstm_test.cc.

61  {
62  // A basic single-layer, bi-di 1d LSTM.
63  SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false);
64  double lstm_bi_err = TrainIterations(kTrainerIterations);
65  EXPECT_LT(lstm_bi_err, 75);
66  LOG(INFO) << "********** Expected < 75 ************\n" ;
67  // Int mode training is dead, so convert the trained network to int and check
68  // that its error rate is close to the float version.
69  TestIntMode(kTrainerIterations);
70 }

◆ TEST_F() [18/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
ColorTest   
)

Definition at line 51 of file lstm_test.cc.

51  {
52  // A basic single-layer, single direction LSTM.
53  SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
54  "2D-color-lstm", true, true);
55  double lstm_uni_err = TrainIterations(kTrainerIterations);
56  EXPECT_LT(lstm_uni_err, 85);
57 // EXPECT_GT(lstm_uni_err, 66);
58  LOG(INFO) << "********** Expected < 85 ************\n" ;
59 }

◆ TEST_F() [19/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
DeterminismTest   
)

Definition at line 111 of file lstm_test.cc.

111  {
112  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
113  "2-D-2-layer-lstm", false, false);
114  double lstm_2d_err_a = TrainIterations(kTrainerIterations);
115  double act_error_a = trainer_->ActivationError();
116  double char_error_a = trainer_->CharError();
117  GenericVector<char> trainer_a_data;
118  EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
119  &trainer_a_data));
120  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
121  "2-D-2-layer-lstm", false, false);
122  double lstm_2d_err_b = TrainIterations(kTrainerIterations);
123  double act_error_b = trainer_->ActivationError();
124  double char_error_b = trainer_->CharError();
125  EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
126  EXPECT_FLOAT_EQ(act_error_a, act_error_b);
127  EXPECT_FLOAT_EQ(char_error_a, char_error_b);
128  // Now train some more iterations.
129  lstm_2d_err_b = TrainIterations(kTrainerIterations / 3);
130  act_error_b = trainer_->ActivationError();
131  char_error_b = trainer_->CharError();
132  // Unpack into a new trainer and train that some more too.
133  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
134  "2-D-2-layer-lstm", false, false);
135  EXPECT_TRUE(trainer_->ReadTrainingDump(trainer_a_data, trainer_.get()));
136  lstm_2d_err_a = TrainIterations(kTrainerIterations / 3);
137  act_error_a = trainer_->ActivationError();
138  char_error_a = trainer_->CharError();
139  EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
140  EXPECT_FLOAT_EQ(act_error_a, act_error_b);
141  EXPECT_FLOAT_EQ(char_error_a, char_error_b);
142  LOG(INFO) << "********** *** ************\n" ;
143 }

◆ TEST_F() [20/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
EncodeDecodeBothTestKor   
)

Definition at line 41 of file lstm_recode_test.cc.

41  {
42  TestEncodeDecodeBoth("kor", "한국어 위키백과에 오신 것을 환영합니다!");
43 }

◆ TEST_F() [21/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
EncodedSoftmaxTest   
)

Definition at line 178 of file lstm_test.cc.

178  {
179  // LSTM with a built-in encoded softmax can beat the external softmax.
180  SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true);
181  double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
182  EXPECT_LT(lstm_sm_err, 62.0);
183  LOG(INFO) << "********** Expected < 62 ************\n" ;
184  // Check that it works in int mode too.
185  TestIntMode(kTrainerIterations);
186 }

◆ TEST_F() [22/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
RecodeTestKor   
)

Definition at line 29 of file lstm_recode_test.cc.

29  {
30  // A basic single-layer, bi-di 1d LSTM on Korean.
31  SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",
32  "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");
33  double kor_recode_err = TrainIterations(kTrainerIterations);
34  EXPECT_LT(kor_recode_err, 60);
35  LOG(INFO) << "********** Expected < 60 ************\n" ;
36 }

◆ TEST_F() [23/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
RecodeTestKorBase   
)

Definition at line 19 of file lstm_recode_test.cc.

19  {
20  // A basic single-layer, bi-di 1d LSTM on Korean.
21  SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",
22  "kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");
23  double kor_full_err = TrainIterations(kTrainerIterations * 2);
24  EXPECT_LT(kor_full_err, 88);
25 // EXPECT_GT(kor_full_err, 85);
26  LOG(INFO) << "********** Expected < 88 ************\n" ;
27 }

◆ TEST_F() [24/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
SoftmaxBaselineTest   
)

Definition at line 146 of file lstm_test.cc.

146  {
147  // A basic single-layer, single direction LSTM.
148  SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true);
149  double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
150  EXPECT_LT(lstm_uni_err, 60);
151 // EXPECT_GT(lstm_uni_err, 48);
152  LOG(INFO) << "********** Expected < 60 ************\n" ;
153  // Check that it works in int mode too.
154  TestIntMode(kTrainerIterations);
155  // If we run TestIntMode again, it tests that int_mode networks can
156  // serialize and deserialize correctly.
157  double delta = TestIntMode(kTrainerIterations);
158  // The two tests (both of int mode this time) should be almost identical.
159  LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n";
160  EXPECT_LT(delta, 0.01);
161 }

◆ TEST_F() [25/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
SoftmaxTest   
)

Definition at line 166 of file lstm_test.cc.

166  {
167  // LSTM with a built-in softmax can beat the external softmax.
168  SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true);
169  double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
170  EXPECT_LT(lstm_sm_err, 49.0);
171  LOG(INFO) << "********** Expected < 49 ************\n" ;
172  // Check that it works in int mode too.
173  TestIntMode(kTrainerIterations);
174 }

◆ TEST_F() [26/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
SpeedTest   
)

Definition at line 100 of file lstm_test.cc.

100  {
101  SetupTrainerEng(
102  "[1,30,0,1 Ct5,5,16 Mp2,2 L2xy24 Ct1,1,48 Mp5,1 Ct1,1,32 S3,1 Lbx64 "
103  "O1c1]",
104  "2-D-2-layer-lstm", false, true);
105  TrainIterations(kTrainerIterations);
106  LOG(INFO) << "********** *** ************\n" ;
107 }

◆ TEST_F() [27/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
Test2D   
)

Definition at line 74 of file lstm_test.cc.

74  {
75  // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
76  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
77  "2-D-2-layer-lstm", false, false);
78  double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2 );
79  EXPECT_LT(lstm_2d_err, 98);
80 // EXPECT_GT(lstm_2d_err, 90);
81  LOG(INFO) << "********** Expected < 98 ************\n" ;
82  // Int mode training is dead, so convert the trained network to int and check
83  // that its error rate is close to the float version.
84  TestIntMode(kTrainerIterations);
85 }

◆ TEST_F() [28/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
TestAdam   
)

Definition at line 89 of file lstm_test.cc.

89  {
90  // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
91  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
92  "2-D-2-layer-lstm", false, true);
93  double lstm_2d_err = TrainIterations(kTrainerIterations);
94  EXPECT_LT(lstm_2d_err, 70);
95  LOG(INFO) << "********** Expected < 70 ************\n" ;
96  TestIntMode(kTrainerIterations);
97 }

◆ TEST_F() [29/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
TestLayerAccess   
)

Definition at line 189 of file lstm_test.cc.

189  {
190  // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom.
191  SetupTrainerEng("[1,32,0,1 Ct5,5,16 Mp2,2 Lfys32 Lbx128 O1c1]", "SQU-lstm",
192  false, false);
193  // Number of layers.
194  const int kNumLayers = 8;
195  // Expected layer names.
196  const char* kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2",
197  ":3:0", ":4:0", ":4:1:0", ":5"};
198  const char* kLayerNames[kNumLayers] = {"Input", "Convolve", "ConvNL",
199  "Maxpool", "Lfys32", "Lbx128LTR",
200  "Lbx128", "Output"};
201  // Expected number of weights.
202  const int kNumWeights[kNumLayers] = {0,
203  0,
204  16 * (25 + 1),
205  0,
206  32 * (4 * (32 + 16 + 1)),
207  128 * (4 * (128 + 32 + 1)),
208  128 * (4 * (128 + 32 + 1)),
209  112 * (2 * 128 + 1)};
210 
211  GenericVector<STRING> layers = trainer_->EnumerateLayers();
212  EXPECT_EQ(kNumLayers, layers.size());
213  for (int i = 0; i < kNumLayers && i < layers.size(); ++i) {
214  EXPECT_STREQ(kLayerIds[i], layers[i].c_str());
215  EXPECT_STREQ(kLayerNames[i],
216  trainer_->GetLayer(layers[i])->name().c_str());
217  EXPECT_EQ(kNumWeights[i], trainer_->GetLayer(layers[i])->num_weights());
218  }
219 }

◆ TEST_F() [30/34]

tesseract::TEST_F ( LSTMTrainerTest  ,
TestSquashed   
)

Definition at line 18 of file lstm_squashed_test.cc.

18  {
19  // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom, and
20  // a small convolution/maxpool below that.
21  // Match training conditions to those typically used with this spec:
22  // recoding on, adam on.
23  SetupTrainerEng("[1,32,0,1 Ct3,3,16 Mp3,3 Lfys48 Lbx96 O1c1]",
24  "SQU-2-layer-lstm", /*recode*/ true, /*adam*/ true);
25  double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
26  EXPECT_LT(lstm_2d_err, 80);
27  LOG(INFO) << "********** < 80 ************\n" ;
28  TestIntMode(kTrainerIterations);
29 }

◆ TEST_F() [31/34]

tesseract::TEST_F ( NthItemTest  ,
BoringTest   
)

Definition at line 69 of file nthitem_test.cc.

69  {
70  KDVector v;
71  // Push the test data onto the KDVector.
72  int test_data[] = {8, 8, 8, 8, 8, 7, 7, 7, 7};
73  for (size_t i = 0; i < ARRAYSIZE(test_data); ++i) {
74  IntKDPair pair(test_data[i], i);
75  v.push_back(pair);
76  }
77  // The 3rd item is 7 but the 4th is 8..
78  int index = v.choose_nth_item(3);
79  // The result is 7.
80  EXPECT_EQ(7, v[index].key);
81  index = v.choose_nth_item(4);
82  // The result is 8.
83  EXPECT_EQ(8, v[index].key);
84  // Get the min item.
85  index = v.choose_nth_item(0);
86  // The result is 7.
87  EXPECT_EQ(7, v[index].key);
88  // Get the max item.
89  index = v.choose_nth_item(v.size() - 1);
90  // The result is 8.
91  EXPECT_EQ(8, v[index].key);
92 }

◆ TEST_F() [32/34]

tesseract::TEST_F ( NthItemTest  ,
EqualTest   
)

Definition at line 107 of file nthitem_test.cc.

107  {
108  KDVector v;
109  // Push the test data onto the KDVector.
110  PushTestData(&v);
111  // Add an extra 8. This makes the median 7.
112  IntKDPair pair(8, 13);
113  v.push_back(pair);
114  // Get the median item.
115  int index = v.choose_nth_item(v.size() / 2);
116  // The result is 7, it started out at index 4 or 12.
117  EXPECT_EQ(7, v[index].key);
118  EXPECT_TRUE(v[index].data == 4 || v[index].data == 12);
119 }

◆ TEST_F() [33/34]

tesseract::TEST_F ( NthItemTest  ,
GeneralTest   
)

Definition at line 45 of file nthitem_test.cc.

45  {
46  KDVector v;
47  // Push the test data onto the KDVector.
48  PushTestData(&v);
49  // Get the min item.
50  int index = v.choose_nth_item(0);
51  // The result is -32767.
52  EXPECT_EQ(-32767, v[index].key);
53  // Get the max item.
54  index = v.choose_nth_item(v.size() - 1);
55  // The result is 65536.
56  EXPECT_EQ(65536, v[index].key);
57  // Invalid items are silently truncated to valid.
58  // Get the min item.
59  index = v.choose_nth_item(-1);
60  // The result is -32767.
61  EXPECT_EQ(-32767, v[index].key);
62  // Get the max item.
63  index = v.choose_nth_item(v.size());
64  // The result is 65536.
65  EXPECT_EQ(65536, v[index].key);
66 }

◆ TEST_F() [34/34]

tesseract::TEST_F ( NthItemTest  ,
UniqueTest   
)

Definition at line 95 of file nthitem_test.cc.

95  {
96  KDVector v;
97  // Push the test data onto the KDVector.
98  PushTestData(&v);
99  // Get the median item.
100  int index = v.choose_nth_item(v.size() / 2);
101  // The result is 6, it started out at index 11.
102  EXPECT_EQ(6, v[index].key);
103  EXPECT_EQ(11, v[index].data);
104 }

◆ TraceBlockOnReducedPix()

Pix * tesseract::TraceBlockOnReducedPix ( BLOCK block,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)

Definition at line 254 of file bbgrid.cpp.

255  {
256  const TBOX& box = block->pdblk.bounding_box();
257  Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
258  int wpl = pixGetWpl(pix);
259  l_uint32* data = pixGetData(pix);
260  ICOORDELT_IT it(block->pdblk.poly_block()->points());
261  for (it.mark_cycle_pt(); !it.cycled_list();) {
262  ICOORD pos = *it.data();
263  it.forward();
264  ICOORD next_pos = *it.data();
265  ICOORD line_vector = next_pos - pos;
266  int major, minor;
267  ICOORD major_step, minor_step;
268  line_vector.setup_render(&major_step, &minor_step, &major, &minor);
269  int accumulator = major / 2;
270  while (pos != next_pos) {
271  int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
272  int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
273  SET_DATA_BIT(data + grid_y * wpl, grid_x);
274  pos += major_step;
275  accumulator += minor;
276  if (accumulator >= major) {
277  accumulator -= major;
278  pos += minor_step;
279  }
280  }
281  }
282  return pix;
283 }

◆ TraceOutlineOnReducedPix()

Pix * tesseract::TraceOutlineOnReducedPix ( C_OUTLINE outline,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)

Definition at line 228 of file bbgrid.cpp.

229  {
230  const TBOX& box = outline->bounding_box();
231  Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
232  int wpl = pixGetWpl(pix);
233  l_uint32* data = pixGetData(pix);
234  int length = outline->pathlength();
235  ICOORD pos = outline->start_pos();
236  for (int i = 0; i < length; ++i) {
237  int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
238  int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
239  SET_DATA_BIT(data + grid_y * wpl, grid_x);
240  pos += outline->step(i);
241  }
242  return pix;
243 }

◆ UnicodeFor()

int tesseract::UnicodeFor ( const UNICHARSET u,
const WERD_CHOICE werd,
int  pos 
)

Definition at line 303 of file paragraphs.cpp.

310  :
311  const UNICHARSET *u_;

◆ ValidBodyLine()

bool tesseract::ValidBodyLine ( const GenericVector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)

Definition at line 1303 of file paragraphs.cpp.

1307  {
1308  return NearlyEqual(row_a.rindent_ + row_a.rmargin_,
1309  row_b.rindent_ + row_b.rmargin_,
1310  Epsilon(row_a.ri_->average_interword_space));
1311  }
1312  return NearlyEqual(row_a.lindent_ + row_a.lmargin_,

◆ ValidFirstLine()

bool tesseract::ValidFirstLine ( const GenericVector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)

Definition at line 1292 of file paragraphs.cpp.

1300  {
1301  if (model != kCrownRight && model != kCrownLeft) {

◆ write_info()

bool tesseract::write_info ( FILE *  f,
const FontInfo fi 
)

Definition at line 166 of file fontinfo.cpp.

166  {
167  int32_t size = strlen(fi.name);
168  return tesseract::Serialize(f, &size) &&
169  tesseract::Serialize(f, &fi.name[0], size) &&
170  tesseract::Serialize(f, &fi.properties);
171 }

◆ write_set()

bool tesseract::write_set ( FILE *  f,
const FontSet fs 
)

Definition at line 235 of file fontinfo.cpp.

235  {
236  return tesseract::Serialize(f, &fs.size) &&
237  tesseract::Serialize(f, &fs.configs[0], fs.size);
238 }

◆ write_spacing_info()

bool tesseract::write_spacing_info ( FILE *  f,
const FontInfo fi 
)

Definition at line 201 of file fontinfo.cpp.

201  {
202  int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
203  if (!tesseract::Serialize(f, &vec_size)) return false;
204  int16_t x_gap_invalid = -1;
205  for (int i = 0; i < vec_size; ++i) {
206  FontSpacingInfo *fs = fi.spacing_vec->get(i);
207  int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size();
208  if (fs == nullptr) {
209  // Writing two invalid x-gaps.
210  if (!tesseract::Serialize(f, &x_gap_invalid, 2) ||
211  !tesseract::Serialize(f, &kern_size)) {
212  return false;
213  }
214  } else {
215  if (!tesseract::Serialize(f, &fs->x_gap_before) ||
216  !tesseract::Serialize(f, &fs->x_gap_after) ||
217  !tesseract::Serialize(f, &kern_size)) {
218  return false;
219  }
220  }
221  if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) ||
222  !fs->kerned_x_gaps.Serialize(f))) {
223  return false;
224  }
225  }
226  return true;
227 }

◆ WriteFile()

bool tesseract::WriteFile ( const std::string output_dir,
const std::string lang,
const std::string suffix,
const GenericVector< char > &  data,
FileWriter  writer 
)

Definition at line 36 of file lang_model_helpers.cpp.

38  {
39  if (lang.empty()) return true;
40  std::string dirname = output_dir + "/" + lang;
41  // Attempt to make the directory, but ignore errors, as it may not be a
42  // standard filesystem, and the writer will complain if not successful.
43 #if defined(_WIN32)
44  _mkdir(dirname.c_str());
45 #else
46  mkdir(dirname.c_str(), S_IRWXU | S_IRWXG);
47 #endif
48  std::string filename = dirname + "/" + lang + suffix;
49  if (writer == nullptr)
50  return SaveDataToFile(data, filename.c_str());
51  else
52  return (*writer)(data, filename.c_str());
53 }

◆ WriteRecoder()

bool tesseract::WriteRecoder ( const UNICHARSET unicharset,
bool  pass_through,
const std::string output_dir,
const std::string lang,
FileWriter  writer,
STRING radical_table_data,
TessdataManager traineddata 
)

Definition at line 85 of file lang_model_helpers.cpp.

88  {
89  UnicharCompress recoder;
90  // Where the unicharset is carefully setup already to contain a good
91  // compact encoding, use a pass-through recoder that does nothing.
92  // For scripts that have a large number of unicodes (Han, Hangul) we want
93  // to use the recoder to compress the symbol space by re-encoding each
94  // unicode as multiple codes from a smaller 'alphabet' that are related to the
95  // shapes in the character. Hangul Jamo is a perfect example of this.
96  // See the Hangul Syllables section, sub-section "Equivalence" in:
97  // http://www.unicode.org/versions/Unicode10.0.0/ch18.pdf
98  if (pass_through) {
99  recoder.SetupPassThrough(unicharset);
100  } else {
101  int null_char =
102  unicharset.has_special_codes() ? UNICHAR_BROKEN : unicharset.size();
103  tprintf("Null char=%d\n", null_char);
104  if (!recoder.ComputeEncoding(unicharset, null_char, radical_table_data)) {
105  tprintf("Creation of encoded unicharset failed!!\n");
106  return false;
107  }
108  }
109  TFile fp;
110  GenericVector<char> recoder_data;
111  fp.OpenWrite(&recoder_data);
112  if (!recoder.Serialize(&fp)) return false;
113  traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0],
114  recoder_data.size());
115  STRING encoding = recoder.GetEncodingAsString(unicharset);
116  recoder_data.init_to_size(encoding.length(), 0);
117  memcpy(&recoder_data[0], &encoding[0], encoding.length());
118  STRING suffix;
119  suffix.add_str_int(".charset_size=", recoder.code_range());
120  suffix += ".txt";
121  return WriteFile(output_dir, lang, suffix.c_str(), recoder_data, writer);
122 }

◆ WriteShapeTable()

void tesseract::WriteShapeTable ( const STRING file_prefix,
const ShapeTable shape_table 
)

Definition at line 179 of file commontraining.cpp.

179  {
180  STRING shape_table_file = file_prefix;
181  shape_table_file += kShapeTableFileSuffix;
182  FILE* fp = fopen(shape_table_file.c_str(), "wb");
183  if (fp != nullptr) {
184  if (!shape_table.Serialize(fp)) {
185  fprintf(stderr, "Error writing shape table: %s\n",
186  shape_table_file.c_str());
187  }
188  fclose(fp);
189  } else {
190  fprintf(stderr, "Error creating shape table: %s\n",
191  shape_table_file.c_str());
192  }
193 }

◆ WriteUnicharset()

bool tesseract::WriteUnicharset ( const UNICHARSET unicharset,
const std::string output_dir,
const std::string lang,
FileWriter  writer,
TessdataManager traineddata 
)

Definition at line 71 of file lang_model_helpers.cpp.

73  {
74  GenericVector<char> unicharset_data;
75  TFile fp;
76  fp.OpenWrite(&unicharset_data);
77  if (!unicharset.save_to_file(&fp)) return false;
78  traineddata->OverwriteEntry(TESSDATA_LSTM_UNICHARSET, &unicharset_data[0],
79  unicharset_data.size());
80  return WriteFile(output_dir, lang, ".unicharset", unicharset_data, writer);
81 }

◆ ZeroVector()

template<typename T >
void tesseract::ZeroVector ( int  n,
T *  vec 
)
inline

Definition at line 202 of file functions.h.

202  {
203  memset(vec, 0, n * sizeof(*vec));
204 }

Variable Documentation

◆ _TFNetworkModel_default_instance_

TFNetworkModelDefaultTypeInternal tesseract::_TFNetworkModel_default_instance_

Definition at line 51 of file tfnetwork.pb.h.

◆ case_state_table

const int tesseract::case_state_table[6][4]
Initial value:
= {
{
0, 1, 5, 4},
{
0, 3, 2, 4},
{
0, -1, 2, -1},
{
0, 3, -1, 4},
{
0, -1, -1, 4},
{
5, -1, 2, -1},
}

Definition at line 44 of file context.cpp.

◆ DotProduct

DotProductFunction tesseract::DotProduct

Definition at line 50 of file simddetect.cpp.

◆ kAdamCorrectionIterations

const int tesseract::kAdamCorrectionIterations = 200000

Definition at line 35 of file weightmatrix.cpp.

◆ kAdamEpsilon

const double tesseract::kAdamEpsilon = 1e-8

Definition at line 37 of file weightmatrix.cpp.

◆ kAdamFlag

const int tesseract::kAdamFlag = 4

Definition at line 165 of file weightmatrix.cpp.

◆ kAdjacentLeaderSearchPadding

const int tesseract::kAdjacentLeaderSearchPadding = 2

Definition at line 116 of file tablefind.cpp.

◆ kAlignedFraction

const double tesseract::kAlignedFraction = 0.03125

Definition at line 38 of file alignedblob.cpp.

◆ kAlignedGapFraction

const double tesseract::kAlignedGapFraction = 0.75

Definition at line 42 of file alignedblob.cpp.

◆ kAllowBlobArea

const double tesseract::kAllowBlobArea = 0.05

Definition at line 57 of file tablefind.cpp.

◆ kAllowBlobHeight

const double tesseract::kAllowBlobHeight = 0.3

Definition at line 55 of file tablefind.cpp.

◆ kAllowBlobWidth

const double tesseract::kAllowBlobWidth = 0.4

Definition at line 56 of file tablefind.cpp.

◆ kAllowTextArea

const double tesseract::kAllowTextArea = 0.8

Definition at line 50 of file tablefind.cpp.

◆ kAllowTextHeight

const double tesseract::kAllowTextHeight = 0.5

Definition at line 48 of file tablefind.cpp.

◆ kAllowTextWidth

const double tesseract::kAllowTextWidth = 0.6

Definition at line 49 of file tablefind.cpp.

◆ kBatchIterations

const int tesseract::kBatchIterations = 100

Definition at line 37 of file lstm_test.h.

◆ kBestCheckpointFraction

const double tesseract::kBestCheckpointFraction = 31.0 / 32.0

Definition at line 69 of file lstmtrainer.cpp.

◆ kBigPartSizeRatio

const double tesseract::kBigPartSizeRatio = 1.75

Definition at line 46 of file colpartitiongrid.cpp.

◆ kBoxClipTolerance

const int tesseract::kBoxClipTolerance = 2

Definition at line 31 of file boxword.cpp.

◆ kBrokenCJKIterationFraction

const double tesseract::kBrokenCJKIterationFraction = 0.125

Definition at line 67 of file strokewidth.cpp.

◆ kBytesPer64BitNumber

const int tesseract::kBytesPer64BitNumber = 20

Max bytes in the decimal representation of int64_t.

Definition at line 1501 of file baseapi.cpp.

◆ kBytesPerBoxFileLine

const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

Multiplier for max expected textlength assumes (kBytesPerNumber + space)

  • kNumbersPerBlob plus the newline. Add to this the original UTF8 characters, and one kMaxBytesPerLine for safety.

Definition at line 1499 of file baseapi.cpp.

◆ kBytesPerNumber

const int tesseract::kBytesPerNumber = 5

The number of bytes taken by each number. Since we use int16_t for ICOORD, assume only 5 digits max.

Definition at line 1493 of file baseapi.cpp.

◆ kCellSplitColumnThreshold

const int tesseract::kCellSplitColumnThreshold = 0

Definition at line 42 of file tablerecog.cpp.

◆ kCellSplitRowThreshold

const int tesseract::kCellSplitRowThreshold = 0

Definition at line 41 of file tablerecog.cpp.

◆ kCertaintyScale

const float tesseract::kCertaintyScale = 7.0f

Definition at line 35 of file linerec.cpp.

◆ kCertOffset

const double tesseract::kCertOffset = -0.085

Definition at line 50 of file lstmrecognizer.cpp.

◆ kCJKAspectRatio

const double tesseract::kCJKAspectRatio = 1.25

Definition at line 61 of file strokewidth.cpp.

◆ kCJKAspectRatioIncrease

const double tesseract::kCJKAspectRatioIncrease = 1.0625

Definition at line 63 of file strokewidth.cpp.

◆ kCJKBrokenDistanceFraction

const double tesseract::kCJKBrokenDistanceFraction = 0.25

Definition at line 57 of file strokewidth.cpp.

◆ kCJKMaxComponents

const int tesseract::kCJKMaxComponents = 8

Definition at line 59 of file strokewidth.cpp.

◆ kCJKRadius

const int tesseract::kCJKRadius = 2

Definition at line 55 of file strokewidth.cpp.

◆ kColumnWidthFactor

const int tesseract::kColumnWidthFactor = 20

Pixel resolution of column width estimates.

Definition at line 41 of file tabfind.h.

◆ kCosMaxSkewAngle

const double tesseract::kCosMaxSkewAngle = 0.866025

Definition at line 60 of file tabfind.cpp.

◆ kCrackSpacing

const int tesseract::kCrackSpacing = 100

Spacing of cracks across the page to break up tall vertical lines.

Definition at line 45 of file linefind.cpp.

◆ kCrownLeft

const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F))

Definition at line 69 of file paragraphs.cpp.

◆ kCrownRight

const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F))

Definition at line 71 of file paragraphs.cpp.

◆ kDefaultResolution

const int tesseract::kDefaultResolution = 300

Definition at line 69 of file pango_font_info.cpp.

◆ kDiacriticXPadRatio

const double tesseract::kDiacriticXPadRatio = 7.0

Definition at line 70 of file strokewidth.cpp.

◆ kDiacriticYPadRatio

const double tesseract::kDiacriticYPadRatio = 1.75

Definition at line 73 of file strokewidth.cpp.

◆ kDictRatio

const double tesseract::kDictRatio = 2.25

Definition at line 48 of file lstmrecognizer.cpp.

◆ kDoNotReverse

const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE"

Definition at line 49 of file trie.cpp.

◆ kDoubleFlag

const int tesseract::kDoubleFlag = 128

Definition at line 169 of file weightmatrix.cpp.

◆ kErrClip

const double tesseract::kErrClip = 1.0f

Definition at line 71 of file lstm.cpp.

◆ kErrorGraphInterval

const int tesseract::kErrorGraphInterval = 1000

Definition at line 57 of file lstmtrainer.cpp.

◆ kExposureFactor

const int tesseract::kExposureFactor = 16

Definition at line 75 of file degradeimage.cpp.

◆ kFeaturePadding

const int tesseract::kFeaturePadding = 2

Definition at line 36 of file imagedata.h.

◆ kFontMergeDistance

const float tesseract::kFontMergeDistance = 0.025

Definition at line 48 of file mastertrainer.cpp.

◆ kForceReverse

const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE"

Definition at line 51 of file trie.cpp.

◆ kGoodRowNumberOfColumnsLarge

const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7

Definition at line 60 of file tablerecog.cpp.

◆ kGoodRowNumberOfColumnsSmall

const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }

Definition at line 56 of file tablerecog.cpp.

◆ kGoodRowNumberOfColumnsSmallSize

const int tesseract::kGoodRowNumberOfColumnsSmallSize
Initial value:
=
sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1

Definition at line 57 of file tablerecog.cpp.

◆ kGutterMultiple

const int tesseract::kGutterMultiple = 4

Definition at line 35 of file tabvector.cpp.

◆ kGutterToNeighbourRatio

const int tesseract::kGutterToNeighbourRatio = 3

Definition at line 37 of file tabvector.cpp.

◆ kHighConfidence

const double tesseract::kHighConfidence = 0.9375

Definition at line 65 of file lstmtrainer.cpp.

◆ kHistogramBuckets

const int tesseract::kHistogramBuckets = 16

Definition at line 367 of file weightmatrix.cpp.

◆ kHistogramSize

const int tesseract::kHistogramSize = 256

Definition at line 27 of file otsuthr.h.

◆ kHorizontalGapMergeFraction

const double tesseract::kHorizontalGapMergeFraction = 0.5

Definition at line 49 of file colfind.cpp.

◆ kHorizontalSpacing

const double tesseract::kHorizontalSpacing = 0.30

Definition at line 35 of file tablerecog.cpp.

◆ kHorzStrongTextlineAspect

const int tesseract::kHorzStrongTextlineAspect = 5

Definition at line 67 of file colpartition.cpp.

◆ kHorzStrongTextlineCount

const int tesseract::kHorzStrongTextlineCount = 8

Definition at line 63 of file colpartition.cpp.

◆ kHorzStrongTextlineHeight

const int tesseract::kHorzStrongTextlineHeight = 10

Definition at line 65 of file colpartition.cpp.

◆ kImagePadding

const int tesseract::kImagePadding = 4

Definition at line 38 of file imagedata.h.

◆ kImprovementFraction

const double tesseract::kImprovementFraction = 15.0 / 16.0

Definition at line 67 of file lstmtrainer.cpp.

◆ kInfiniteDist

const float tesseract::kInfiniteDist = 999.0f

Definition at line 905 of file mastertrainer.cpp.

◆ kInt8Flag

const int tesseract::kInt8Flag = 1

Definition at line 163 of file weightmatrix.cpp.

◆ kLargeTableProjectionThreshold

const double tesseract::kLargeTableProjectionThreshold = 0.45

Definition at line 106 of file tablefind.cpp.

◆ kLargeTableRowCount

const int tesseract::kLargeTableRowCount = 6

Definition at line 108 of file tablefind.cpp.

◆ kLatinChs

const int tesseract::kLatinChs[]
Initial value:
= {
0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
}

Latin chars corresponding to the unicode chars above.

Definition at line 1562 of file baseapi.cpp.

◆ kLearningRateDecay

const double tesseract::kLearningRateDecay = M_SQRT1_2

Definition at line 53 of file lstmtrainer.cpp.

◆ kLeftIndentAlignmentCountTh

const int tesseract::kLeftIndentAlignmentCountTh = 1

Definition at line 85 of file equationdetect.cpp.

◆ kLineCountReciprocal

const double tesseract::kLineCountReciprocal = 4.0

Definition at line 48 of file tabvector.cpp.

◆ kLinedTableMinHorizontalLines

const int tesseract::kLinedTableMinHorizontalLines = 3

Definition at line 45 of file tablerecog.cpp.

◆ kLinedTableMinVerticalLines

const int tesseract::kLinedTableMinVerticalLines = 3

Definition at line 44 of file tablerecog.cpp.

◆ kLineFindGridSize

const int tesseract::kLineFindGridSize = 50

Grid size used by line finder. Not very critical.

Definition at line 47 of file linefind.cpp.

◆ kLineFragmentAspectRatio

const double tesseract::kLineFragmentAspectRatio = 10.0

Definition at line 54 of file tabfind.cpp.

◆ kLineResidueAspectRatio

const double tesseract::kLineResidueAspectRatio = 8.0

Definition at line 94 of file strokewidth.cpp.

◆ kLineResiduePadRatio

const int tesseract::kLineResiduePadRatio = 3

Definition at line 96 of file strokewidth.cpp.

◆ kLineResidueSizeRatio

const double tesseract::kLineResidueSizeRatio = 1.75

Definition at line 98 of file strokewidth.cpp.

◆ kLineTrapLongest

const int tesseract::kLineTrapLongest = 4

Definition at line 87 of file strokewidth.cpp.

◆ kLineTrapShortest

const int tesseract::kLineTrapShortest = 2

Definition at line 89 of file strokewidth.cpp.

◆ kLRM

const char *const tesseract::kLRM = "\u200E"

Left-to-Right Mark.

Definition at line 38 of file unicodes.cpp.

◆ kMarginFactor

const double tesseract::kMarginFactor = 1.1

Definition at line 50 of file tablerecog.cpp.

◆ kMarginOverlapFraction

const double tesseract::kMarginOverlapFraction = 0.25

Definition at line 44 of file colpartitiongrid.cpp.

◆ kMathDigitDensityTh1

const float tesseract::kMathDigitDensityTh1 = 0.25

Definition at line 80 of file equationdetect.cpp.

◆ kMathDigitDensityTh2

const float tesseract::kMathDigitDensityTh2 = 0.1

Definition at line 81 of file equationdetect.cpp.

◆ kMathItalicDensityTh

const float tesseract::kMathItalicDensityTh = 0.5

Definition at line 82 of file equationdetect.cpp.

◆ kMaxAmbigStringSize

const int tesseract::kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)

Definition at line 41 of file ambigs.cpp.

◆ kMaxBaselineError

const double tesseract::kMaxBaselineError = 0.4375

Definition at line 70 of file colpartition.cpp.

◆ kMaxBlobOverlapFactor

const double tesseract::kMaxBlobOverlapFactor = 4.0

Definition at line 76 of file tablefind.cpp.

◆ kMaxBlobWidth

const int tesseract::kMaxBlobWidth = 500

Definition at line 39 of file tablefind.cpp.

◆ kMaxBoxEdgeDiff

const int16_t tesseract::kMaxBoxEdgeDiff = 2

Definition at line 32 of file recogtraining.cpp.

◆ kMaxBoxesInDataPartition

const int tesseract::kMaxBoxesInDataPartition = 20

Definition at line 65 of file tablefind.cpp.

◆ kMaxBytesPerLine

const int tesseract::kMaxBytesPerLine
Initial value:

A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.

Definition at line 1508 of file baseapi.cpp.

◆ kMaxCaptionLines

const int tesseract::kMaxCaptionLines = 7

Definition at line 38 of file colpartitiongrid.cpp.

◆ kMaxCharTopRange

const int tesseract::kMaxCharTopRange = 48

Definition at line 82 of file fixxht.cpp.

◆ kMaxCircleErosions

const int tesseract::kMaxCircleErosions = 8

Definition at line 66 of file pagesegmain.cpp.

◆ kMaxCJKSizeRatio

const int tesseract::kMaxCJKSizeRatio = 5

Definition at line 65 of file strokewidth.cpp.

◆ kMaxColorDistance

const int tesseract::kMaxColorDistance = 900

Definition at line 77 of file colpartition.cpp.

◆ kMaxColumnHeaderDistance

const int tesseract::kMaxColumnHeaderDistance = 4

Definition at line 84 of file tablefind.cpp.

◆ kMaxDiacriticDistanceRatio

const double tesseract::kMaxDiacriticDistanceRatio = 1.25

Definition at line 79 of file strokewidth.cpp.

◆ kMaxDiacriticGapToBaseCharHeight

const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0

Definition at line 82 of file strokewidth.cpp.

◆ kMaxDistToPartSizeRatio

const double tesseract::kMaxDistToPartSizeRatio = 1.5

Definition at line 54 of file colfind.cpp.

◆ kMaxFillinMultiple

const int tesseract::kMaxFillinMultiple = 11

Definition at line 44 of file tabvector.cpp.

◆ kMaxGapInTextPartition

const double tesseract::kMaxGapInTextPartition = 4.0

Definition at line 68 of file tablefind.cpp.

◆ kMaxGutterWidthAbsolute

const double tesseract::kMaxGutterWidthAbsolute = 2.00

Definition at line 49 of file tabfind.cpp.

◆ kMaxIncompatibleColumnCount

const int tesseract::kMaxIncompatibleColumnCount = 2

Definition at line 46 of file colfind.cpp.

◆ kMaxInputHeight

const int tesseract::kMaxInputHeight = 48

Definition at line 28 of file input.cpp.

◆ kMaxIntSize

const int tesseract::kMaxIntSize = 22

Max string length of an int.

Definition at line 121 of file baseapi.cpp.

◆ kMaxLargeOverlapsWithMedium

const int tesseract::kMaxLargeOverlapsWithMedium = 12

Definition at line 43 of file ccnontextdetect.cpp.

◆ kMaxLargeOverlapsWithSmall

const int tesseract::kMaxLargeOverlapsWithSmall = 3

Definition at line 34 of file ccnontextdetect.cpp.

◆ kMaxLeaderGapFractionOfMax

const double tesseract::kMaxLeaderGapFractionOfMax = 0.25

Definition at line 53 of file colpartition.cpp.

◆ kMaxLeaderGapFractionOfMin

const double tesseract::kMaxLeaderGapFractionOfMin = 0.5

Definition at line 55 of file colpartition.cpp.

◆ kMaxLigature

const int tesseract::kMaxLigature = 0xfb17

Definition at line 64 of file ligature_table.cpp.

◆ kMaxLineLength

const int tesseract::kMaxLineLength = 1024

Definition at line 318 of file boxchar.cpp.

◆ kMaxLineResidue

const int tesseract::kMaxLineResidue = 6

Definition at line 53 of file linefind.cpp.

◆ kMaxMediumOverlapsWithSmall

const int tesseract::kMaxMediumOverlapsWithSmall = 12

Definition at line 39 of file ccnontextdetect.cpp.

◆ kMaxNeighbourDistFactor

const int tesseract::kMaxNeighbourDistFactor = 4

Definition at line 36 of file colpartitiongrid.cpp.

◆ kMaxNonLineDensity

const double tesseract::kMaxNonLineDensity = 0.25

Definition at line 58 of file linefind.cpp.

◆ kMaxOffsetDist

const int tesseract::kMaxOffsetDist = 32

Definition at line 32 of file intfeaturemap.cpp.

◆ kMaxOutputRegisters

constexpr int tesseract::kMaxOutputRegisters = 8
constexpr

Definition at line 35 of file intsimdmatrixavx2.cpp.

◆ kMaxPadFactor

const int tesseract::kMaxPadFactor = 6

Definition at line 33 of file colpartitiongrid.cpp.

◆ kMaxParagraphEndingLeftSpaceMultiple

const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0

Definition at line 125 of file tablefind.cpp.

◆ kMaxPartitionSpacing

const double tesseract::kMaxPartitionSpacing = 1.75

Definition at line 61 of file colpartitiongrid.cpp.

◆ kMaxRaggedSearch

const int tesseract::kMaxRaggedSearch = 25

Definition at line 39 of file tabfind.cpp.

◆ kMaxRealDistance

const int tesseract::kMaxRealDistance = 2.0

Definition at line 39 of file detlinefit.cpp.

◆ kMaxRectangularFraction

const double tesseract::kMaxRectangularFraction = 0.75

Definition at line 42 of file imagefind.cpp.

◆ kMaxRectangularGradient

const double tesseract::kMaxRectangularGradient = 0.1

Definition at line 45 of file imagefind.cpp.

◆ kMaxRMSColorNoise

const int tesseract::kMaxRMSColorNoise = 128

Definition at line 74 of file colpartition.cpp.

◆ kMaxRowSize

const double tesseract::kMaxRowSize = 2.5

Definition at line 53 of file tablerecog.cpp.

◆ kMaxSameBlockLineSpacing

const double tesseract::kMaxSameBlockLineSpacing = 3

Definition at line 49 of file colpartition.cpp.

◆ kMaxSizeRatio

const double tesseract::kMaxSizeRatio = 1.5

Definition at line 51 of file colpartition.cpp.

◆ kMaxSkewFactor

const int tesseract::kMaxSkewFactor = 15

Definition at line 64 of file alignedblob.cpp.

◆ kMaxSmallNeighboursPerPix

const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32

Definition at line 31 of file ccnontextdetect.cpp.

◆ kMaxSpacingDrift

const double tesseract::kMaxSpacingDrift = 1.0 / 72

Definition at line 43 of file colpartition.cpp.

◆ kMaxStaveHeight

const double tesseract::kMaxStaveHeight = 1.0

Definition at line 60 of file linefind.cpp.

◆ kMaxTableCellXheight

const double tesseract::kMaxTableCellXheight = 2.0

Definition at line 80 of file tablefind.cpp.

◆ kMaxTopSpacingFraction

const double tesseract::kMaxTopSpacingFraction = 0.25

Definition at line 46 of file colpartition.cpp.

◆ kMaxUnicharsPerCluster

const int tesseract::kMaxUnicharsPerCluster = 2000

Definition at line 46 of file mastertrainer.cpp.

◆ kMaxVerticalSearch

const int tesseract::kMaxVerticalSearch = 12

Definition at line 38 of file tabfind.cpp.

◆ kMaxVerticalSpacing

const int tesseract::kMaxVerticalSpacing = 500

Definition at line 37 of file tablefind.cpp.

◆ kMaxWinSize

const int tesseract::kMaxWinSize = 2000

Definition at line 50 of file network.cpp.

◆ kMaxXProjectionGapFactor

const double tesseract::kMaxXProjectionGapFactor = 2.0

Definition at line 135 of file tablefind.cpp.

◆ kMinAlignedGutter

const double tesseract::kMinAlignedGutter = 0.25

Definition at line 50 of file tabvector.cpp.

◆ kMinAlignedTabs

const int tesseract::kMinAlignedTabs = 4

Definition at line 54 of file alignedblob.cpp.

◆ kMinBaselineCoverage

const double tesseract::kMinBaselineCoverage = 0.5

Definition at line 72 of file colpartition.cpp.

◆ kMinBoxesInTextPartition

const int tesseract::kMinBoxesInTextPartition = 10

Definition at line 62 of file tablefind.cpp.

◆ kMinCaptionGapHeightRatio

const double tesseract::kMinCaptionGapHeightRatio = 0.5

Definition at line 42 of file colpartitiongrid.cpp.

◆ kMinCaptionGapRatio

const double tesseract::kMinCaptionGapRatio = 2.0

Definition at line 40 of file colpartitiongrid.cpp.

◆ kMinCertainty

const float tesseract::kMinCertainty = -20.0f

Definition at line 30 of file networkio.cpp.

◆ kMinChainTextValue

const int tesseract::kMinChainTextValue = 3

Definition at line 61 of file colpartition.cpp.

◆ kMinClusteredShapes

const int tesseract::kMinClusteredShapes = 1

Definition at line 44 of file mastertrainer.cpp.

◆ kMinColorDifference

const int tesseract::kMinColorDifference = 16

Definition at line 51 of file imagefind.cpp.

◆ kMinColumnWidth

const int tesseract::kMinColumnWidth = 2.0 / 3

Definition at line 31 of file colpartitionset.cpp.

◆ kMinDiacriticSizeRatio

const double tesseract::kMinDiacriticSizeRatio = 1.0625

Definition at line 76 of file strokewidth.cpp.

◆ kMinDivergenceRate

const double tesseract::kMinDivergenceRate = 50.0

Definition at line 46 of file lstmtrainer.cpp.

◆ kMinEvaluatedTabs

const int tesseract::kMinEvaluatedTabs = 3

Definition at line 56 of file tabfind.cpp.

◆ kMinFilledArea

const double tesseract::kMinFilledArea = 0.35

Definition at line 63 of file tablerecog.cpp.

◆ kMinFractionalLinesInColumn

const double tesseract::kMinFractionalLinesInColumn = 0.125

Definition at line 45 of file tabfind.cpp.

◆ kMinGoodTextPARatio

const double tesseract::kMinGoodTextPARatio = 1.5

Definition at line 59 of file ccnontextdetect.cpp.

◆ kMinGutterFraction

const double tesseract::kMinGutterFraction = 0.5

Definition at line 46 of file tabvector.cpp.

◆ kMinGutterWidthGrid

const double tesseract::kMinGutterWidthGrid = 0.5

Definition at line 51 of file colfind.cpp.

◆ kMinImageFindSize

const int tesseract::kMinImageFindSize = 100

Definition at line 47 of file imagefind.cpp.

◆ kMinLeaderCount

const int tesseract::kMinLeaderCount = 5

Definition at line 57 of file colpartition.cpp.

◆ kMinLigature

const int tesseract::kMinLigature = 0xfb00

Definition at line 63 of file ligature_table.cpp.

◆ kMinLineLengthFraction

const int tesseract::kMinLineLengthFraction = 4

Denominator of resolution makes min pixels to demand line lengths to be.

Definition at line 43 of file linefind.cpp.

◆ kMinLinesInColumn

const int tesseract::kMinLinesInColumn = 10

Definition at line 41 of file tabfind.cpp.

◆ kMinMaxGapInTextPartition

const double tesseract::kMinMaxGapInTextPartition = 0.5

Definition at line 72 of file tablefind.cpp.

◆ kMinMusicPixelFraction

const double tesseract::kMinMusicPixelFraction = 0.75

Definition at line 62 of file linefind.cpp.

◆ kMinOverlapWithTable

const double tesseract::kMinOverlapWithTable = 0.6

Definition at line 96 of file tablefind.cpp.

◆ kMinParagraphEndingTextToWhitespaceRatio

const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0

Definition at line 131 of file tablefind.cpp.

◆ kMinPointsForErrorCount

const int tesseract::kMinPointsForErrorCount = 16

Definition at line 36 of file detlinefit.cpp.

◆ kMinProb

const float tesseract::kMinProb = exp(kMinCertainty)

Definition at line 32 of file networkio.cpp.

◆ kMinRaggedGutter

const double tesseract::kMinRaggedGutter = 1.5

Definition at line 52 of file tabvector.cpp.

◆ kMinRaggedTabs

const int tesseract::kMinRaggedTabs = 5

Definition at line 52 of file alignedblob.cpp.

◆ kMinRampSize

const int tesseract::kMinRampSize = 1000

Definition at line 79 of file degradeimage.cpp.

◆ kMinRectangularFraction

const double tesseract::kMinRectangularFraction = 0.125

Definition at line 40 of file imagefind.cpp.

◆ kMinRectSize

const int tesseract::kMinRectSize = 10

Minimum sensible image size to be worth running tesseract.

Definition at line 104 of file baseapi.cpp.

◆ kMinRowsInTable

const int tesseract::kMinRowsInTable = 3

Definition at line 111 of file tablefind.cpp.

◆ kMinStallIterations

const int tesseract::kMinStallIterations = 10000

Definition at line 48 of file lstmtrainer.cpp.

◆ kMinStartedErrorRate

const int tesseract::kMinStartedErrorRate = 75

Definition at line 61 of file lstmtrainer.cpp.

◆ kMinStrongTextValue

const int tesseract::kMinStrongTextValue = 6

Definition at line 59 of file colpartition.cpp.

◆ kMinTabGradient

const double tesseract::kMinTabGradient = 4.0

Definition at line 60 of file alignedblob.cpp.

◆ kMinThickLineWidth

const int tesseract::kMinThickLineWidth = 12

Definition at line 49 of file linefind.cpp.

◆ kMinVerticalSearch

const int tesseract::kMinVerticalSearch = 3

Definition at line 37 of file tabfind.cpp.

◆ kMinWinSize

const int tesseract::kMinWinSize = 500

Definition at line 49 of file network.cpp.

◆ kMostlyOneDirRatio

const int tesseract::kMostlyOneDirRatio = 3

Definition at line 92 of file strokewidth.cpp.

◆ kNeighbourSearchFactor

const double tesseract::kNeighbourSearchFactor = 2.5

Definition at line 102 of file strokewidth.cpp.

◆ kNoiseOverlapAreaFactor

const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512

Definition at line 107 of file strokewidth.cpp.

◆ kNoiseOverlapGrowthFactor

const double tesseract::kNoiseOverlapGrowthFactor = 4.0

Definition at line 104 of file strokewidth.cpp.

◆ kNoisePadding

const int tesseract::kNoisePadding = 4

Definition at line 50 of file ccnontextdetect.cpp.

◆ kNumAdjustmentIterations

const int tesseract::kNumAdjustmentIterations = 100

Definition at line 55 of file lstmtrainer.cpp.

◆ kNumbersPerBlob

const int tesseract::kNumbersPerBlob = 5

The 5 numbers output for each box (the usual 4 and a page number.)

Definition at line 1488 of file baseapi.cpp.

◆ kNumEndPoints

const int tesseract::kNumEndPoints = 3

Definition at line 30 of file detlinefit.cpp.

◆ kNumInputGroups

constexpr int tesseract::kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup
constexpr

Definition at line 41 of file intsimdmatrixavx2.cpp.

◆ kNumInputsPerGroup

constexpr int tesseract::kNumInputsPerGroup = 4
constexpr

Definition at line 39 of file intsimdmatrixavx2.cpp.

◆ kNumInputsPerRegister

constexpr int tesseract::kNumInputsPerRegister = 32
constexpr

Definition at line 37 of file intsimdmatrixavx2.cpp.

◆ kNumOutputsPerRegister

constexpr int tesseract::kNumOutputsPerRegister = 8
constexpr

Definition at line 33 of file intsimdmatrixavx2.cpp.

◆ kNumPagesPerBatch

const int tesseract::kNumPagesPerBatch = 100

Definition at line 59 of file lstmtrainer.cpp.

◆ kOriginalNoiseMultiple

const int tesseract::kOriginalNoiseMultiple = 8

Definition at line 46 of file ccnontextdetect.cpp.

◆ kParagraphEndingPreviousLineRatio

const double tesseract::kParagraphEndingPreviousLineRatio = 1.3

Definition at line 121 of file tablefind.cpp.

◆ kPDF

const char *const tesseract::kPDF = "\u202C"

Pop Directional Formatting.

Definition at line 41 of file unicodes.cpp.

◆ kPhotoOffsetFraction

const double tesseract::kPhotoOffsetFraction = 0.375

Definition at line 53 of file ccnontextdetect.cpp.

◆ kPrime1

const int tesseract::kPrime1 = 17

Definition at line 36 of file trainingsampleset.cpp.

◆ kPrime2

const int tesseract::kPrime2 = 13

Definition at line 37 of file trainingsampleset.cpp.

◆ kRadicalRadix

const int tesseract::kRadicalRadix = 29

Definition at line 31 of file unicharcompress.cpp.

◆ kRaggedFraction

const double tesseract::kRaggedFraction = 2.5

Definition at line 40 of file alignedblob.cpp.

◆ kRaggedGapFraction

const double tesseract::kRaggedGapFraction = 1.0

Definition at line 44 of file alignedblob.cpp.

◆ kRaggedGutterMultiple

const int tesseract::kRaggedGutterMultiple = 5

Definition at line 51 of file tabfind.cpp.

◆ kRandomizingCenter

const int tesseract::kRandomizingCenter = 128

Definition at line 36 of file trainingsample.cpp.

◆ kRatingEpsilon

const double tesseract::kRatingEpsilon = 1.0 / 32

Definition at line 31 of file errorcounter.cpp.

◆ kRequiredColumns

const double tesseract::kRequiredColumns = 0.7

Definition at line 48 of file tablerecog.cpp.

◆ kReverseIfHasRTL

const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"

Definition at line 50 of file trie.cpp.

◆ kRGBRMSColors

const int tesseract::kRGBRMSColors = 4

Definition at line 36 of file colpartition.h.

◆ kRLE

const char *const tesseract::kRLE = "\u202A"

Right-to-Left Embedding.

Definition at line 40 of file unicodes.cpp.

◆ kRLM

const char *const tesseract::kRLM = "\u200F"

Right-to-Left Mark.

Definition at line 39 of file unicodes.cpp.

◆ kRMSFitScaling

const double tesseract::kRMSFitScaling = 8.0

Definition at line 49 of file imagefind.cpp.

◆ kRotationRange

const float tesseract::kRotationRange = 0.02f

Definition at line 73 of file degradeimage.cpp.

◆ kSaltnPepper

const int tesseract::kSaltnPepper = 5

Definition at line 77 of file degradeimage.cpp.

◆ kScaleFactor

constexpr double tesseract::kScaleFactor = 256.0
constexpr

Definition at line 36 of file functions.h.

◆ kSeedBlobsCountTh

const int tesseract::kSeedBlobsCountTh = 10

Definition at line 84 of file equationdetect.cpp.

◆ kSideSpaceMargin

const int tesseract::kSideSpaceMargin = 10

Definition at line 101 of file tablefind.cpp.

◆ kSimilarRaggedDist

const int tesseract::kSimilarRaggedDist = 50

Definition at line 42 of file tabvector.cpp.

◆ kSimilarVectorDist

const int tesseract::kSimilarVectorDist = 10

Definition at line 39 of file tabvector.cpp.

◆ ksizeofUniversalAmbigsFile

const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

Definition at line 19036 of file universalambigs.cpp.

◆ kSizeRatioToReject

const float tesseract::kSizeRatioToReject = 2.0

Definition at line 100 of file strokewidth.cpp.

◆ kSmallTableProjectionThreshold

const double tesseract::kSmallTableProjectionThreshold = 0.35

Definition at line 105 of file tablefind.cpp.

◆ kSmoothDecisionMargin

const int tesseract::kSmoothDecisionMargin = 4

Definition at line 64 of file colpartitiongrid.cpp.

◆ kSplitPartitionSize

const double tesseract::kSplitPartitionSize = 2.0

Definition at line 43 of file tablefind.cpp.

◆ kSquareLimit

const int tesseract::kSquareLimit = 25

Definition at line 34 of file trainingsampleset.cpp.

◆ kStageTransitionThreshold

const double tesseract::kStageTransitionThreshold = 10.0

Definition at line 63 of file lstmtrainer.cpp.

◆ kStateClip

const double tesseract::kStateClip = 100.0

Definition at line 69 of file lstm.cpp.

◆ kStrokeWidthCJK

const double tesseract::kStrokeWidthCJK = 2.0

Definition at line 52 of file strokewidth.cpp.

◆ kStrokeWidthConstantTolerance

const double tesseract::kStrokeWidthConstantTolerance = 2.0

Definition at line 140 of file tablefind.cpp.

◆ kStrokeWidthFractionalTolerance

const double tesseract::kStrokeWidthFractionalTolerance = 0.25

Definition at line 139 of file tablefind.cpp.

◆ kStrokeWidthFractionCJK

const double tesseract::kStrokeWidthFractionCJK = 0.25

Definition at line 51 of file strokewidth.cpp.

◆ kStrokeWidthFractionTolerance

const double tesseract::kStrokeWidthFractionTolerance = 0.125

Allowed proportional change in stroke width to be the same font.

Definition at line 44 of file strokewidth.cpp.

◆ kStrokeWidthTolerance

const double tesseract::kStrokeWidthTolerance = 1.5

Allowed constant change in stroke width to be the same font. Really 1.5 pixels.

Definition at line 49 of file strokewidth.cpp.

◆ kSubTrainerMarginFraction

const double tesseract::kSubTrainerMarginFraction = 3.0 / 128

Definition at line 51 of file lstmtrainer.cpp.

◆ kTableColumnThreshold

const double tesseract::kTableColumnThreshold = 3.0

Definition at line 88 of file tablefind.cpp.

◆ kTableSize

constexpr int tesseract::kTableSize = 4096
constexpr

Definition at line 34 of file functions.h.

◆ kTabRadiusFactor

const int tesseract::kTabRadiusFactor = 5

Definition at line 35 of file tabfind.cpp.

◆ kTargetXScale

const int tesseract::kTargetXScale = 5

Definition at line 71 of file lstmtrainer.cpp.

◆ kTargetYScale

const int tesseract::kTargetYScale = 100

Definition at line 72 of file lstmtrainer.cpp.

◆ kTesseractReject

const char tesseract::kTesseractReject = '~'

Character returned when Tesseract couldn't recognize as anything.

Definition at line 106 of file baseapi.cpp.

◆ kTestChar

const int tesseract::kTestChar = -1

Definition at line 32 of file trainingsampleset.cpp.

◆ kThickLengthMultiple

const double tesseract::kThickLengthMultiple = 0.75

Definition at line 56 of file linefind.cpp.

◆ kThinLineFraction

const int tesseract::kThinLineFraction = 20

Denominator of resolution makes max pixel width to allow thin lines.

Definition at line 41 of file linefind.cpp.

◆ kTinyEnoughTextlineOverlapFraction

const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25

Definition at line 48 of file colpartitiongrid.cpp.

◆ kTrainerIterations

const int tesseract::kTrainerIterations = 600

Definition at line 35 of file lstm_test.h.

◆ kUnclearDensityTh

const float tesseract::kUnclearDensityTh = 0.25

Definition at line 83 of file equationdetect.cpp.

◆ kUniChs

const int tesseract::kUniChs[]
Initial value:
= {
0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
}

Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.

Definition at line 1558 of file baseapi.cpp.

◆ kUniversalAmbigsFile

const char tesseract::kUniversalAmbigsFile

Definition at line 24 of file universalambigs.cpp.

◆ kUNLVReject

const char tesseract::kUNLVReject = '~'

Character used by UNLV error counter as a reject.

Definition at line 108 of file baseapi.cpp.

◆ kUNLVSuspect

const char tesseract::kUNLVSuspect = '^'

Character used by UNLV as a suspect marker.

Definition at line 110 of file baseapi.cpp.

◆ kVerticalSpacing

const double tesseract::kVerticalSpacing = -0.2

Definition at line 38 of file tablerecog.cpp.

◆ kVLineAlignment

const int tesseract::kVLineAlignment = 3

Definition at line 46 of file alignedblob.cpp.

◆ kVLineGutter

const int tesseract::kVLineGutter = 1

Definition at line 48 of file alignedblob.cpp.

◆ kVLineMinLength

const int tesseract::kVLineMinLength = 500

Definition at line 56 of file alignedblob.cpp.

◆ kVLineSearchSize

const int tesseract::kVLineSearchSize = 150

Definition at line 50 of file alignedblob.cpp.

◆ kWorstDictCertainty

const float tesseract::kWorstDictCertainty = -25.0f

Definition at line 37 of file linerec.cpp.

◆ kXWinFrameSize

const int tesseract::kXWinFrameSize = 30

Definition at line 52 of file network.cpp.

◆ kYWinFrameSize

const int tesseract::kYWinFrameSize = 80

Definition at line 53 of file network.cpp.

◆ LogisticTable

const double tesseract::LogisticTable

Definition at line 4102 of file functions.cpp.

◆ RTLReversePolicyNames

const char* const tesseract::RTLReversePolicyNames[]
Initial value:

Definition at line 53 of file trie.cpp.

◆ TanhTable

const double tesseract::TanhTable

Definition at line 4 of file functions.cpp.

◆ test_data

int tesseract::test_data = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}

Definition at line 24 of file heap_test.cc.

◆ textord_tabvector_vertical_box_ratio

double tesseract::textord_tabvector_vertical_box_ratio = 0.5

"Fraction of box matches required to declare a line vertical"

Definition at line 58 of file tabvector.cpp.

◆ textord_tabvector_vertical_gap_fraction

double tesseract::textord_tabvector_vertical_gap_fraction = 0.5

"max fraction of mean blob width allowed for vertical gaps in vertical text"

"Max fraction of mean blob width allowed for vertical gaps in vertical text"

Definition at line 55 of file tabvector.cpp.

tesseract::OCRNorm::kNone
ScrollView::GREY
Definition: scrollview.h:133
UNICHARSET::load_from_file
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:378
tesseract::ET_COUNT
Definition: lstmtrainer.h:43
TBOX
Definition: cleanapi_test.cc:19
file::JoinPath
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:43
tesseract::NC_NO_DUP
Definition: recodebeam.h:78
string
std::string string
Definition: equationdetect_test.cc:21
tesseract::TS_ENABLED
Definition: network.h:95
GenericVector::delete_data_pointers
void delete_data_pointers()
Definition: genericvector.h:872
tesseract::FirstWordWouldHaveFit
bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRegisters &after)
Definition: paragraphs.cpp:1671
ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:106
PSM_OSD_ONLY
Definition: capi.h:84
tesseract::TA_LEFT_RAGGED
Definition: tabvector.h:46
tesseract::ShapeTable::Serialize
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:241
tesseract::WriteUnicharset
bool WriteUnicharset(const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)
Definition: lang_model_helpers.cpp:71
tesseract::NormalizeUTF8String
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:163
tesseract::NT_PARALLEL
Definition: network.h:49
tesseract::NPT_WEAK_HTEXT
Definition: colpartitiongrid.cpp:1504
tesseract::STR_NONE
Definition: lstmtrainer.h:64
tesseract::NT_POSCLIP
Definition: network.h:63
tesseract::STR_REPLACED
Definition: lstmtrainer.h:66
tesseract::GetXheightString
std::string GetXheightString(const std::string &script_dir, const UNICHARSET &unicharset)
Definition: unicharset_training_utils.cpp:164
INFO
Definition: log.h:29
tesseract::NT_PAR_2D_LSTM
Definition: network.h:53
tesseract::CST_FLOWING
Definition: colpartition.h:49
UNICHARSET::set_isupper
void set_isupper(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:431
tesseract::IsWhitespace
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:239
tesseract::CT_OK_BROKEN
Definition: errorcounter.h:80
tesseract::DAWG_TYPE_PUNCTUATION
Definition: dawg.h:67
tesseract::LT_CTC
Definition: static_shape.h:31
tesseract::TN_TOP2
Definition: recodebeam.h:86
tesseract::CST_PULLOUT
Definition: colpartition.h:51
tesseract::TA_SEPARATOR
Definition: tabvector.h:50
tesseract::PrintStringVectorWithUnicodes
std::string PrintStringVectorWithUnicodes(const std::vector< std::string > &glyphs)
Definition: normstrngs_test.h:39
tesseract::PTRAIN_FREQ_MED
Definition: params_training_featdef.h:58
BTFT_NONE
Definition: blobbox.h:114
JUSTIFICATION_UNKNOWN
Definition: capi.h:132
tesseract::NT_XYTRANSPOSE
Definition: network.h:58
STRING::add_str_int
void add_str_int(const char *str, int number)
Definition: strngs.cpp:370
tesseract::AMBIG_TYPE_COUNT
Definition: ambigs.h:44
InitFeatureDefs
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:111
tesseract::LogisticTable
const double LogisticTable[]
Definition: functions.cpp:4102
tesseract::TESSDATA_CUBE_SYSTEM_DAWG
Definition: tessdatamanager.h:69
tesseract::CT_SIZE
Definition: errorcounter.h:89
tesseract::XH_SUBNORMAL
Definition: dict.h:78
tesseract::PTRAIN_RATING_PER_CHAR
Definition: params_training_featdef.h:68
tesseract::FN_INCOLOR
Definition: degradeimage.cpp:76
tesseract::kTrainerIterations
const int kTrainerIterations
Definition: lstm_test.h:35
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:58
WRITING_DIRECTION_TOP_TO_BOTTOM
Definition: capi.h:140
TPOINT
Definition: blobs.h:49
tesseract::FN_Y1
Definition: degradeimage.cpp:78
tesseract::PTRAIN_NUM_MED
Definition: params_training_featdef.h:46
PSM_AUTO_ONLY
Definition: capi.h:86
tesseract::CASE_AMBIG
Definition: ambigs.h:42
tesseract::COL_TEXT
Definition: tablefind.h:31
tesseract::NT_SOFTMAX_NO_CTC
Definition: network.h:69
tesseract::kReverseIfHasRTL
const char kReverseIfHasRTL[]
Definition: trie.cpp:50
tesseract::NT_PAR_RL_LSTM
Definition: network.h:51
UNICHARSET::set_islower
void set_islower(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:426
tesseract::TESSDATA_SYSTEM_DAWG
Definition: tessdatamanager.h:64
WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:303
TO_BLOCK::noise_blobs
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:773
tesseract::WriteRecoder
bool WriteRecoder(const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, STRING *radical_table_data, TessdataManager *traineddata)
Definition: lang_model_helpers.cpp:85
tesseract::NM_BASELINE
Definition: normalis.h:42
tesseract::TESSDATA_BIGRAM_DAWG
Definition: tessdatamanager.h:71
tesseract::test_data
int test_data[]
Definition: heap_test.cc:24
WRITING_DIRECTION_RIGHT_TO_LEFT
Definition: capi.h:139
WERD_CHOICE
Definition: ratngs.h:261
tesseract::FN_X1
Definition: degradeimage.cpp:82
tesseract::DEFINITE_AMBIG
Definition: ambigs.h:40
tesseract::NT_COUNT
Definition: network.h:80
tesseract::PTRAIN_DIGITS_SHORT
Definition: params_training_featdef.h:41
tesseract::CS_ROUND_ROBIN
Definition: imagedata.h:53
RIL_WORD
Definition: capi.h:104
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::LoadShapeTable
ShapeTable * LoadShapeTable(const STRING &file_prefix)
Definition: commontraining.cpp:154
NearlyEqual
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:36
tesseract::CST_IMPROPER
Definition: classify.h:99
UNICHARSET::PropertiesIncomplete
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const
Definition: unicharset.h:636
tesseract::TESSDATA_PARAMS_MODEL
Definition: tessdatamanager.h:73
TBLOB::denorm
const DENORM & denorm() const
Definition: blobs.h:361
INT_DESCENDER
#define INT_DESCENDER
Definition: intproto.cpp:49
baseline
Definition: mfoutline.h:62
tesseract::LoadDataFromFile
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
Definition: genericvector.h:341
tesseract::DAWG_TYPE_NUMBER
Definition: dawg.h:69
tesseract::FN_Y2
Definition: degradeimage.cpp:79
tesseract::ShapeTable::NumShapes
int NumShapes() const
Definition: shapetable.h:274
tesseract::SetupBasicProperties
void SetupBasicProperties(bool report_errors, UNICHARSET *unicharset)
Definition: unicharset_training_utils.h:38
ICOORD
integer coordinate
Definition: points.h:30
BLOBNBOX::set_vert_stroke_width
void set_vert_stroke_width(float width)
Definition: blobbox.h:345
tesseract::NormalizeCleanAndSegmentUTF8
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
Definition: normstrngs.cpp:188
UNICHARSET::set_normed
void set_normed(UNICHAR_ID unichar_id, const char *normed)
Definition: unicharset.h:472
UNICHARSET::set_direction
void set_direction(UNICHAR_ID unichar_id, UNICHARSET::Direction value)
Definition: unicharset.h:462
tesseract::COL_COUNT
Definition: tablefind.h:34
tesseract::TN_ALSO_RAN
Definition: recodebeam.h:88
tesseract::IsValidCodepoint
bool IsValidCodepoint(const char32 ch)
Definition: normstrngs.cpp:234
tesseract::ParagraphJustification
ParagraphJustification
Definition: publictypes.h:248
tesseract::PTRAIN_NUM_BAD_FONT
Definition: params_training_featdef.h:67
tesseract::LR_LEFT
Definition: strokewidth.h:40
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::CT_ACCEPTED_JUNK
Definition: errorcounter.h:87
STRING
Definition: strngs.h:45
UNICHARSET::get_script_from_script_id
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:844
tesseract::SetupBasicProperties
void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset)
Definition: unicharset_training_utils.cpp:40
ScrollView::Clear
void Clear()
Definition: scrollview.cpp:588
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
TO_BLOCK
Definition: blobbox.h:691
STRING::truncate_at
void truncate_at(int32_t index)
Definition: strngs.cpp:258
tesseract::ViramaScript::kTelugu
tesseract::CanonicalizeDetectionResults
void CanonicalizeDetectionResults(GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs)
Definition: paragraphs.cpp:2252
PSM_AUTO_OSD
Definition: capi.h:85
GenericVector::Serialize
bool Serialize(FILE *fp) const
Definition: genericvector.h:929
ARRAYSIZE
#define ARRAYSIZE(arr)
Definition: include_gunit.h:53
PSM_RAW_LINE
Definition: capi.h:97
tesseract::TA_RIGHT_ALIGNED
Definition: tabvector.h:48
tesseract::XH_GOOD
Definition: dict.h:78
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:717
ScrollView::DrawTo
void DrawTo(int x, int y)
Definition: scrollview.cpp:524
tesseract::ViramaScript::kGujarati
tesseract::IsOCREquivalent
bool IsOCREquivalent(char32 ch1, char32 ch2)
Definition: normstrngs.cpp:230
tesseract::CT_REJECT
Definition: errorcounter.h:81
tesseract::FN_X0
Definition: degradeimage.cpp:81
tesseract::CST_NGRAM
Definition: classify.h:100
tesseract::SaveDataToFile
bool SaveDataToFile(const GenericVector< char > &data, const char *filename)
Definition: genericvector.h:362
tesseract::CT_RANK
Definition: errorcounter.h:85
PSM_SINGLE_BLOCK
Definition: capi.h:90
tesseract::CST_HEADING
Definition: colpartition.h:50
PSM_AUTO
Definition: capi.h:87
tesseract::ViramaScript::kNonVirama
TEXTLINE_ORDER_TOP_TO_BOTTOM
Definition: capi.h:145
tesseract::TESSDATA_CUBE_UNICHARSET
Definition: tessdatamanager.h:68
tesseract::DetectParagraphs
void DetectParagraphs(int debug_level, bool after_text_recognition, const MutableIterator *block_start, GenericVector< ParagraphModel * > *models)
Definition: paragraphs.cpp:2527
JUSTIFICATION_LEFT
Definition: capi.h:133
tesseract::NT_REPLICATED
Definition: network.h:50
INT_XRADIUS
#define INT_XRADIUS
Definition: intproto.cpp:56
tesseract::CS_SEQUENTIAL
Definition: imagedata.h:48
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
tesseract::kForceReverse
const char kForceReverse[]
Definition: trie.cpp:51
tesseract::PTRAIN_SHAPE_COST_PER_CHAR
Definition: params_training_featdef.h:60
tesseract::NOT_AMBIG
Definition: ambigs.h:38
BSTT_MATH
Definition: blobbox.h:99
BLOBNBOX
Definition: blobbox.h:142
tesseract::NT_LSTM
Definition: network.h:60
tesseract::RightWordAttributes
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
Definition: paragraphs.cpp:470
tesseract::NT_SYMCLIP
Definition: network.h:64
tesseract::OCRNormalize
char32 OCRNormalize(char32 ch)
Definition: normstrngs.cpp:220
tesseract::TESSDATA_LSTM_SYSTEM_DAWG
Definition: tessdatamanager.h:76
OEM_TESSERACT_LSTM_COMBINED
Definition: capi.h:80
tesseract::PTRAIN_NUM_BAD_CASE
Definition: params_training_featdef.h:63
tesseract::DAWG_TYPE_WORD
Definition: dawg.h:68
C_BLOB
Definition: stepblob.h:36
tesseract::CT_UNICHAR_TOPN_ERR
Definition: errorcounter.h:76
RIL_SYMBOL
Definition: capi.h:105
tesseract::ShapeTable::DeSerialize
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:246
ASSERT_HOST_MSG
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:91
tesseract::PTRAIN_FREQ_LONG
Definition: params_training_featdef.h:59
tesseract::NPT_COUNT
Definition: colpartitiongrid.cpp:1509
InitIntegerFX
void InitIntegerFX()
Definition: intfx.cpp:48
tesseract::IntFeatureSpace::Init
void Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets)
Definition: intfeaturespace.cpp:30
GenericVector::back
T & back() const
Definition: genericvector.h:728
UNICHARSET::save_to_file
bool save_to_file(const char *const filename) const
Definition: unicharset.h:350
tesseract::GraphemeNormMode
GraphemeNormMode
Definition: validator.h:48
tesseract::TESSDATA_PUNC_DAWG
Definition: tessdatamanager.h:63
tesseract::ET_WORD_RECERR
Definition: lstmtrainer.h:40
BSTT_DIGIT
Definition: blobbox.h:98
TBOX::height
int16_t height() const
Definition: rect.h:107
UNICHARSET::set_isalpha
void set_isalpha(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:421
tesseract::SetBlobStrokeWidth
void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob)
Definition: tordmain.cpp:67
UNICHARSET::kCustomLigatures
static const TESS_API char * kCustomLigatures[][2]
Definition: unicharset.h:150
UNICHARSET::get_ispunctuation
bool get_ispunctuation(UNICHAR_ID unichar_id) const
Definition: unicharset.h:509
PSM_SINGLE_COLUMN
Definition: capi.h:88
tesseract::PERFECT
Definition: lstmtrainer.h:49
tesseract::TESSDATA_SHAPE_TABLE
Definition: tessdatamanager.h:70
tesseract::TESSDATA_UNICHARSET
Definition: tessdatamanager.h:58
tesseract::ViramaScript::kTamil
PSM_SINGLE_LINE
Definition: capi.h:91
tesseract::PTRAIN_NUM_FEATURE_TYPES
Definition: params_training_featdef.h:70
tesseract::kSaltnPepper
const int kSaltnPepper
Definition: degradeimage.cpp:77
kBoostDirBuckets
const int kBoostDirBuckets
Definition: intfeaturespace.h:30
tesseract::SP_SUBSCRIPT
Definition: ratngs.h:252
tesseract::LT_NONE
Definition: static_shape.h:30
tesseract::TESSDATA_INTTEMP
Definition: tessdatamanager.h:60
tesseract::PTRAIN_DICT_SHORT
Definition: params_training_featdef.h:53
BSTT_UNCLEAR
Definition: blobbox.h:100
BLOBNBOX::special_text_type
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:288
UNICHARSET::SetPropertiesFromOther
void SetPropertiesFromOther(const UNICHARSET &src)
Definition: unicharset.h:535
tesseract::CT_OK_MULTI_FONT
Definition: errorcounter.h:83
ICOORD::setup_render
void setup_render(ICOORD *major_step, ICOORD *minor_step, int *major, int *minor) const
Definition: points.cpp:82
tesseract::FULL
Definition: lstmtrainer.h:59
kBoostXYBuckets
const int kBoostXYBuckets
Definition: intfeaturespace.h:29
STRING::size
int32_t size() const
Definition: strngs.h:68
tesseract::TN_TOPN
Definition: recodebeam.h:87
tesseract::COL_TABLE
Definition: tablefind.h:32
UNICHAR_BROKEN
Definition: unicharset.h:36
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
BLOCK
Definition: ocrblock.h:28
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
tesseract::PTRAIN_DICT_LONG
Definition: params_training_featdef.h:55
CHECK
#define CHECK(test)
Definition: include_gunit.h:57
tesseract::PTRAIN_NUM_LONG
Definition: params_training_featdef.h:47
PT_EQUATION
Definition: capi.h:112
PSM_SINGLE_BLOCK_VERT_TEXT
Definition: capi.h:89
tesseract::ViramaScript::kBengali
tesseract::NT_SERIES
Definition: network.h:54
tesseract::NF_ADAM
Definition: network.h:88
tesseract::TESSDATA_VERSION
Definition: tessdatamanager.h:80
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::LM_PPTYPE_NUM
Definition: lm_pain_points.h:46
INT_MAX_Y
#define INT_MAX_Y
Definition: intproto.cpp:61
TPOINT::x
int16_t x
Definition: blobs.h:91
tesseract::NT_PAR_UD_LSTM
Definition: network.h:52
INT_CAPHEIGHT
#define INT_CAPHEIGHT
Definition: intproto.cpp:52
tesseract::PTRAIN_DOC_SHORT
Definition: params_training_featdef.h:49
DENORM::DenormTransform
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:389
tesseract::LR_RIGHT
Definition: strokewidth.h:41
UNICHARSET::null_sid
int null_sid() const
Definition: unicharset.h:874
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
tesseract::ViramaScript::kOriya
tesseract::CT_OK_MULTI_UNICHAR
Definition: errorcounter.h:78
tesseract::FD_WIDTH
Definition: stridemap.h:35
INT_MAX_X
#define INT_MAX_X
Definition: intproto.cpp:60
tesseract::kScaleFactor
constexpr double kScaleFactor
Definition: functions.h:36
tesseract::OtsuStats
int OtsuStats(const int *histogram, int *H_out, int *omega0_out)
Definition: otsuthr.cpp:187
BRT_TEXT
Definition: blobbox.h:79
tesseract::NT_YREVERSED
Definition: network.h:57
tesseract::TA_LEFT_ALIGNED
Definition: tabvector.h:45
tesseract::CT_UNICHAR_TOP_OK
Definition: errorcounter.h:70
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
tesseract::TESSDATA_LSTM_NUMBER_DAWG
Definition: tessdatamanager.h:77
tesseract::PFR_OK
Definition: strokewidth.h:47
tesseract::LM_PPTYPE_PATH
Definition: lm_pain_points.h:43
tesseract::TESSDATA_LSTM_RECODER
Definition: tessdatamanager.h:79
tesseract::FN_Y0
Definition: degradeimage.cpp:77
tesseract::TN_COUNT
Definition: recodebeam.h:89
tesseract::FN_Y3
Definition: degradeimage.cpp:80
tesseract::NT_TANH
Definition: network.h:65
TPOINT::y
int16_t y
Definition: blobs.h:92
tesseract::XH_INCONSISTENT
Definition: dict.h:78
tesseract::FullwidthToHalfwidth
char32 FullwidthToHalfwidth(const char32 ch)
Definition: normstrngs.cpp:298
ORIENTATION_PAGE_RIGHT
Definition: capi.h:127
GenericVector::resize_no_init
void resize_no_init(int size)
Definition: genericvector.h:65
tesseract::NC_ONLY_DUP
Definition: recodebeam.h:75
tesseract::ViramaScript::kDevanagari
tesseract::STR_UPDATED
Definition: lstmtrainer.h:65
tesseract::NPT_WEAK_VTEXT
Definition: colpartitiongrid.cpp:1506
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
TBOX::width
int16_t width() const
Definition: rect.h:114
UNICHARSET
Definition: unicharset.h:145
tesseract::kHistogramSize
const int kHistogramSize
Definition: otsuthr.h:27
tesseract::PTRAIN_XHEIGHT_CONSISTENCY
Definition: params_training_featdef.h:64
tesseract::ViramaScript::kKannada
UNICHARSET::set_mirror
void set_mirror(UNICHAR_ID unichar_id, UNICHAR_ID mirror)
Definition: unicharset.h:467
tesseract::NPT_HTEXT
Definition: colpartitiongrid.cpp:1502
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::Tanh
double Tanh(double x)
Definition: functions.h:43
tesseract::Logistic
double Logistic(double x)
Definition: functions.h:54
tesseract::PTRAIN_DOC_MED
Definition: params_training_featdef.h:50
tesseract::kBytesPer64BitNumber
const int kBytesPer64BitNumber
Definition: baseapi.cpp:1501
TEXTLINE_ORDER_RIGHT_TO_LEFT
Definition: capi.h:144
tesseract::PrepareDistortedPix
Pix * PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes)
Definition: degradeimage.cpp:196
tesseract::NC_COUNT
Definition: recodebeam.h:81
tesseract::NPT_IMAGE
Definition: colpartitiongrid.cpp:1508
tesseract::PTRAIN_NUM_BAD_PUNC
Definition: params_training_featdef.h:62
feature_defs
FEATURE_DEFS_STRUCT feature_defs
Definition: commontraining.cpp:89
tesseract::PrintString32WithUnicodes
std::string PrintString32WithUnicodes(const std::string &str)
Definition: normstrngs_test.h:34
tesseract::UnicodeNormMode::kNFKC
tesseract::AsciiLikelyListItem
bool AsciiLikelyListItem(const STRING &word)
Definition: paragraphs.cpp:296
GetNextFilename
const char * GetNextFilename(int argc, const char *const *argv)
Definition: commontraining.cpp:323
tesseract::PFR_NOISE
Definition: strokewidth.h:49
UNICHARSET::set_script
void set_script(UNICHAR_ID unichar_id, const char *value)
Definition: unicharset.h:452
tesseract::SP_NORMAL
Definition: ratngs.h:251
tesseract::NT_CONVOLVE
Definition: network.h:47
tesseract::TESSDATA_PFFMTABLE
Definition: tessdatamanager.h:61
tesseract::LM_PPTYPE_BLAMER
Definition: lm_pain_points.h:41
tesseract::CT_REJECTED_JUNK
Definition: errorcounter.h:86
tesseract::TS_RE_ENABLE
Definition: network.h:99
tesseract::COL_MIXED
Definition: tablefind.h:33
tesseract::TESSDATA_AMBIGS
Definition: tessdatamanager.h:59
SPECIAL_UNICHAR_CODES_COUNT
Definition: unicharset.h:38
UNICHARSET::set_ispunctuation
void set_ispunctuation(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:441
INT_FX_RESULT_STRUCT::NumCN
int16_t NumCN
Definition: intfx.h:38
tesseract::OEM_COUNT
Definition: publictypes.h:278
tesseract::kDoNotReverse
const char kDoNotReverse[]
Definition: trie.cpp:49
tesseract::TF_INT_MODE
Definition: lstmrecognizer.h:47
tesseract::PTRAIN_DOC_LONG
Definition: params_training_featdef.h:51
PSM_SPARSE_TEXT_OSD
Definition: capi.h:96
RIL_TEXTLINE
Definition: capi.h:103
tesseract::NT_INPUT
Definition: network.h:45
tesseract::kCrownLeft
const ParagraphModel * kCrownLeft
Definition: paragraphs.cpp:69
STATS
Definition: statistc.h:30
tesseract::NT_TENSORFLOW
Definition: network.h:78
tesseract::LM_PPTYPE_SHAPE
Definition: lm_pain_points.h:44
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
UNICHARSET::has_special_codes
bool has_special_codes() const
Definition: unicharset.h:712
ORIENTATION_PAGE_DOWN
Definition: capi.h:128
tesseract::HistogramRect
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
Definition: otsuthr.cpp:166
tesseract::ET_CHAR_ERROR
Definition: lstmtrainer.h:41
UNICHARSET::post_load_setup
void post_load_setup()
Definition: unicharset.cpp:886
tesseract::SP_DROPCAP
Definition: ratngs.h:254
tesseract::NT_XREVERSED
Definition: network.h:56
INT_BASELINE
#define INT_BASELINE
Definition: intproto.cpp:50
tesseract::LT_UNKNOWN
Definition: paragraphs_internal.h:52
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
JUSTIFICATION_CENTER
Definition: capi.h:134
tesseract::COL_UNKNOWN
Definition: tablefind.h:30
sample
Definition: cluster.h:31
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
GenericVector
Definition: baseapi.h:40
UNICHARSET::common_sid
int common_sid() const
Definition: unicharset.h:875
tesseract::IntFeatureSpace
Definition: intfeaturespace.h:38
UNICHARSET::get_script_table_size
int get_script_table_size() const
Definition: unicharset.h:839
tesseract::NM_CHAR_ANISOTROPIC
Definition: normalis.h:44
tesseract::GeneratePerspectiveDistortion
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix, GenericVector< TBOX > *boxes)
Definition: degradeimage.cpp:237
GenericVector::reserve
void reserve(int size)
Definition: genericvector.h:679
tesseract::TESSDATA_LANG_CONFIG
Definition: tessdatamanager.h:57
POLY_BLOCK::points
ICOORDELT_LIST * points()
Definition: polyblk.h:52
tesseract::NPT_VTEXT
Definition: colpartitiongrid.cpp:1503
tesseract::TS_DISABLED
Definition: network.h:94
tesseract::NT_LSTM_SOFTMAX_ENCODED
Definition: network.h:76
INT_YCENTER
#define INT_YCENTER
Definition: intproto.cpp:55
tesseract::LT_SOFTMAX
Definition: static_shape.h:32
JUSTIFICATION_RIGHT
Definition: capi.h:135
tesseract::SetScriptProperties
void SetScriptProperties(const std::string &script_dir, UNICHARSET *unicharset)
Definition: unicharset_training_utils.cpp:143
tesseract::kTableSize
constexpr int kTableSize
Definition: functions.h:34
tesseract::TESSDATA_UNAMBIG_DAWG
Definition: tessdatamanager.h:72
tesseract::TESSDATA_LSTM_PUNC_DAWG
Definition: tessdatamanager.h:75
tesseract::HI_PRECISION_ERR
Definition: lstmtrainer.h:51
tesseract::CST_COUNT
Definition: colpartition.h:52
tesseract::CT_OK_JOINED
Definition: errorcounter.h:79
tesseract::CodepointList
std::string CodepointList(const std::vector< char32 > &str32)
Definition: normstrngs_test.h:24
tesseract::ET_RMS
Definition: lstmtrainer.h:38
tesseract::ReadFile
STRING ReadFile(const std::string &filename, FileReader reader)
Definition: lang_model_helpers.cpp:57
UNICHAR_LEN
#define UNICHAR_LEN
Definition: unichar.h:32
STRING::length
int32_t length() const
Definition: strngs.cpp:187
tesseract::kRotationRange
const float kRotationRange
Definition: degradeimage.cpp:73
tesseract::FD_DIMSIZE
Definition: stridemap.h:36
INT_XCENTER
#define INT_XCENTER
Definition: intproto.cpp:54
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
OEM_LSTM_ONLY
Definition: capi.h:79
tesseract::WriteFile
bool WriteFile(const std::string &output_dir, const std::string &lang, const std::string &suffix, const GenericVector< char > &data, FileWriter writer)
Definition: lang_model_helpers.cpp:36
tesseract::REPLACE_AMBIG
Definition: ambigs.h:39
PTIsTextType
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:81
tesseract::TrainingSample
Definition: trainingsample.h:53
tesseract::FD_HEIGHT
Definition: stridemap.h:34
tesseract::TESSDATA_NUM_ENTRIES
Definition: tessdatamanager.h:82
TBOX::left
int16_t left() const
Definition: rect.h:71
INT_YRADIUS
#define INT_YRADIUS
Definition: intproto.cpp:57
tesseract::NT_RELU
Definition: network.h:66
STRING::assign
void assign(const char *cstr, int len)
Definition: strngs.cpp:413
tesseract::SP_SUPERSCRIPT
Definition: ratngs.h:253
BLOBNBOX::set_horz_stroke_width
void set_horz_stroke_width(float width)
Definition: blobbox.h:339
tesseract::PTRAIN_NGRAM_COST_PER_CHAR
Definition: params_training_featdef.h:61
tesseract::PTRAIN_DIGITS_LONG
Definition: params_training_featdef.h:43
PT_FLOWING_TEXT
Definition: capi.h:109
BLOCK::blob_list
C_BLOB_LIST * blob_list()
get blobs
Definition: ocrblock.h:127
tesseract::ProjectiveCoeffs
int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
Definition: degradeimage.cpp:283
C_OUTLINE::bounding_box
const TBOX & bounding_box() const
Definition: coutln.h:112
tesseract::IntKDPair
KDPairInc< int, int > IntKDPair
Definition: kdpair.h:179
tesstrain_utils.dest
dest
Definition: tesstrain_utils.py:139
tesseract::CT_NUM_RESULTS
Definition: errorcounter.h:84
tesseract::NT_NONE
Definition: network.h:44
tesseract::TS_TEMP_DISABLE
Definition: network.h:97
tesseract::PTRAIN_DIGITS_MED
Definition: params_training_featdef.h:42
tesseract::DAWG_TYPE_COUNT
Definition: dawg.h:72
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::PTRAIN_DICT_MED
Definition: params_training_featdef.h:54
tesseract::NT_LSTM_SOFTMAX
Definition: network.h:75
GenericVector::init_to_size
void init_to_size(int size, const T &t)
Definition: genericvector.h:706
tesseract::LT_BODY
Definition: paragraphs_internal.h:51
tesseract::PTRAIN_NUM_BAD_SPACING
Definition: params_training_featdef.h:66
tesseract::NT_LSTM_SUMMARY
Definition: network.h:61
char32
signed int char32
Definition: pango_font_info.h:33
tesseract::PTRAIN_NUM_BAD_CHAR_TYPE
Definition: params_training_featdef.h:65
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::ShapeTable
Definition: shapetable.h:261
BSTT_ITALIC
Definition: blobbox.h:97
ORIENTATION_PAGE_LEFT
Definition: capi.h:129
tesseract::DAWG_TYPE_PATTERN
Definition: dawg.h:70
tesseract::IcuErrorCode
Definition: icuerrorcode.h:77
PSM_COUNT
Definition: capi.h:98
tesstrain_utils.type
type
Definition: tesstrain_utils.py:141
INT_XHEIGHT
#define INT_XHEIGHT
Definition: intproto.cpp:51
tesseract::ET_SKIP_RATIO
Definition: lstmtrainer.h:42
tesseract::LIGHT
Definition: lstmtrainer.h:57
WRITING_DIRECTION_LEFT_TO_RIGHT
Definition: capi.h:138
tesseract::NF_LAYER_SPECIFIC_LR
Definition: network.h:87
PSM_CIRCLE_WORD
Definition: capi.h:93
RIL_PARA
Definition: capi.h:102
ScrollView::SetCursor
void SetCursor(int x, int y)
Definition: scrollview.cpp:518
C_OUTLINE::pathlength
int32_t pathlength() const
Definition: coutln.h:134
tesseract::CT_FONT_ATTR_ERR
Definition: errorcounter.h:82
tesseract::LM_PPTYPE_AMBIG
Definition: lm_pain_points.h:42
tesseract::kGoodRowNumberOfColumnsSmall
const double kGoodRowNumberOfColumnsSmall[]
Definition: tablerecog.cpp:56
tesseract::NT_LINEAR
Definition: network.h:67
tesseract::TA_CENTER_JUSTIFIED
Definition: tabvector.h:47
tesseract::CST_WHOLE
Definition: classify.h:98
tesseract::NT_LOGISTIC
Definition: network.h:62
LOG
Definition: cleanapi_test.cc:19
UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
tesseract::TA_COUNT
Definition: tabvector.h:51
UNICHARSET::get_other_case
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
Definition: unicharset.h:673
tesseract::CT_UNICHAR_TOPTOP_ERR
Definition: errorcounter.h:77
C_OUTLINE::step
ICOORD step(int index) const
Definition: coutln.h:143
tesseract::kNumbersPerBlob
const int kNumbersPerBlob
Definition: baseapi.cpp:1488
tesseract::LT_START
Definition: paragraphs_internal.h:50
tesseract::TF_COMPRESS_UNICHARSET
Definition: lstmrecognizer.h:48
tesseract::FD_BATCH
Definition: stridemap.h:33
tesseract::UNENCODABLE
Definition: lstmtrainer.h:50
tesseract::TA_RIGHT_RAGGED
Definition: tabvector.h:49
PSM_SINGLE_CHAR
Definition: capi.h:94
RIL_BLOCK
Definition: capi.h:101
tesseract::FN_NUM_FACTORS
Definition: degradeimage.cpp:86
GenericVector::sort
void sort()
Definition: genericvector.h:1102
tesseract::TESSDATA_LSTM
Definition: tessdatamanager.h:74
tesseract::LT_MULTIPLE
Definition: paragraphs_internal.h:53
ScrollView::Rectangle
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:599
ORIENTATION_PAGE_UP
Definition: capi.h:126
UNICHARSET::set_isdigit
void set_isdigit(UNICHAR_ID unichar_id, bool value)
Definition: unicharset.h:436
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::Serialize
bool Serialize(FILE *fp, const char *data, size_t n=1)
Definition: serialis.cpp:73
tesseract::SetOfModels
GenericVectorEqEq< const ParagraphModel * > SetOfModels
Definition: paragraphs_internal.h:98
tesseract::NT_MAXPOOL
Definition: network.h:48
tesseract::TanhTable
const double TanhTable[]
Definition: functions.cpp:4
BSTT_NONE
Definition: blobbox.h:96
tesseract::kExposureFactor
const int kExposureFactor
Definition: degradeimage.cpp:75
tesseract::PTRAIN_NUM_SHORT
Definition: params_training_featdef.h:45
tesseract::NO_BEST_TRAINER
Definition: lstmtrainer.h:58
TEXTLINE_ORDER_LEFT_TO_RIGHT
Definition: capi.h:143
tesseract::InterwordSpace
int InterwordSpace(const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)
Definition: paragraphs.cpp:1623
tesseract::NM_CHAR_ISOTROPIC
Definition: normalis.h:43
tesseract::ViramaScript::kGurmukhi
tesseract::TRAINABLE
Definition: lstmtrainer.h:48
tesseract::PFR_SKEW
Definition: strokewidth.h:48
tesseract::CST_FRAGMENT
Definition: classify.h:97
tesseract::TESSDATA_LSTM_UNICHARSET
Definition: tessdatamanager.h:78
PSM_SINGLE_WORD
Definition: capi.h:92
UNICHARSET::set_other_case
void set_other_case(UNICHAR_ID unichar_id, UNICHAR_ID other_case)
Definition: unicharset.h:457
BLOCK::reject_blobs
C_BLOB_LIST * reject_blobs()
Definition: ocrblock.h:130
tesseract::CT_UNICHAR_TOP1_ERR
Definition: errorcounter.h:74
tesseract::ET_DELTA
Definition: lstmtrainer.h:39
OEM_TESSERACT_ONLY
Definition: capi.h:78
tesseract::LT_LOGISTIC
Definition: static_shape.h:33
tesseract::TESSDATA_NUMBER_DAWG
Definition: tessdatamanager.h:65
tesseract::TESSDATA_FIXED_LENGTH_DAWGS
Definition: tessdatamanager.h:67
UNICHARSET::size
int size() const
Definition: unicharset.h:341
tesseract::NC_ANYTHING
Definition: recodebeam.h:74
C_OUTLINE::start_pos
const ICOORD & start_pos() const
Definition: coutln.h:147
tesseract::NT_RECONFIG
Definition: network.h:55
tesseract::OCRNorm::kNormalize
tesseract::NT_SOFTMAX
Definition: network.h:68
tesseract::UnicodeNormMode::kNFKD
OEM_DEFAULT
Definition: capi.h:81
tesseract::SIMILAR_AMBIG
Definition: ambigs.h:41
tesseract::kCrownRight
const ParagraphModel * kCrownRight
Definition: paragraphs.cpp:71
tesseract::TESSDATA_NORMPROTO
Definition: tessdatamanager.h:62
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
tesseract::TESSDATA_FREQ_DAWG
Definition: tessdatamanager.h:66
tesseract::CST_NOISE
Definition: colpartition.h:48
tesseract::kMinRampSize
const int kMinRampSize
Definition: degradeimage.cpp:79
tesseract::CT_UNICHAR_TOP2_ERR
Definition: errorcounter.h:75
TBOX
Definition: rect.h:33
tesseract::NOT_BOXED
Definition: lstmtrainer.h:52
PSM_SPARSE_TEXT
Definition: capi.h:95
tesseract::PTRAIN_FREQ_SHORT
Definition: params_training_featdef.h:57