Classes
class	AlignedBlob

struct	AlignedBlobParams

class	AmbigSpec

struct	AssociateStats

class	AssociateUtils

class	BaselineBlock

class	BaselineDetect

class	BaselineRow

class	BBGrid

struct	BestChoiceBundle
	Bundle together all the things pertaining to the best choice/state. More...

class	BitVector

struct	BlobData

class	BlobGrid

struct	BlockGroup

class	BoolParam

class	BoxChar

struct	BoxCharPtrSort

class	BoxWord

class	CCNonTextDetect

class	CCStruct

class	CCUtil

class	ChoiceIterator

class	Classify

class	ClassPruner

struct	ClipFFunc

struct	ClipFPrime

struct	ClipGFunc

struct	ClipGPrime

struct	Cluster

class	ColPartition

class	ColPartitionGrid

class	ColPartitionSet

class	ColSegment

class	ColumnFinder

class	Convolve

class	CTC

class	Dawg

struct	DawgArgs

class	DawgCache

struct	DawgLoader

struct	DawgPosition

class	DawgPositionVector

class	DebugPixa

class	DetLineFit

class	Dict

class	DocumentCache

class	DocumentData

class	DoubleParam

class	DoublePtr

class	DPPoint

class	EquationDetect

class	EquationDetectBase

class	EquationFinderTest

class	ErrorCounter

struct	FFunc

class	File

struct	FloatWordFeature

struct	FontInfo

class	FontInfoTable

struct	FontSet

struct	FontSpacingInfo

class	FontUtils

struct	FPrime

class	FRAGMENT

class	FullyConnected

class	GenericHeap

struct	GeometricClassifierState

struct	GFunc

struct	GPrime

struct	greater_than

class	GridBase

class	GridSearch

class	HeapTest

struct	HFunc

struct	HPrime

class	IcuErrorCode

struct	IdentityFunc

class	ImageData

class	ImageFind

class	ImageThresholder

class	IndexMap

class	IndexMapBiDi

class	Input

class	InputBuffer

struct	Interval

class	IntFeatureDist

class	IntFeatureMap

class	IntFeatureSpace

class	IntGrid

class	IntParam

struct	IntSimdMatrix

struct	KDPair

struct	KDPairDec

struct	KDPairInc

class	KDPtrPair

struct	KDPtrPairDec

struct	KDPtrPairInc

class	KDVector

class	LanguageModel

struct	LanguageModelDawgInfo

struct	LanguageModelNgramInfo

struct	LanguageModelState
	Struct to store information maintained by various language model components. More...

class	LigatureTable

class	LineFinder

struct	LineHypothesis

struct	LMConsistencyInfo

class	LMPainPoints

class	LSTM

class	LSTMRecognizer

class	LSTMTester

class	LSTMTrainer

class	LSTMTrainerTest

class	LTRResultIterator

class	MasterTrainer

class	Maxpool

class	MockClassifier

class	MutableIterator

class	Network

class	NetworkBuilder

class	NetworkIO

class	NetworkScratch

struct	NodeChild

class	NthItemTest

class	ObjectCache

class	OutputBuffer

class	PageIterator

class	PangoFontInfo

class	ParagraphModelSmearer

class	ParagraphTheory

class	Parallel

class	Param

class	ParamsModel

class	ParamsTrainingBundle

struct	ParamsTrainingHypothesis

struct	ParamsVectors

class	ParamUtils

class	PixelHistogram

class	Plumbing

class	PointerVector

struct	PtrHash

class	RecodeBeamSearch

class	RecodedCharID

struct	RecodeNode

class	Reconfig

struct	Relu

struct	ReluPrime

class	ResultIterator

class	Reversed

class	RowInfo

class	RowScratchRegisters

class	SampleIterator

struct	ScoredFont

class	SegSearchPending

class	Series

class	Shape

class	ShapeClassifier

struct	ShapeDist

struct	ShapeQueueEntry

struct	ShapeRating

class	ShapeTable

class	ShiroRekhaSplitter

class	SIMDDetect

class	SimpleClusterer

struct	SpacingProperties

class	SquishedDawg

class	StaticShape

class	StrideMap

class	StringParam

class	StringRenderer

class	StrokeWidth

class	StructuredTable

class	TabConstraint

class	TabEventHandler

class	TabFind

class	TableFinder

class	TableRecognizer

class	TabVector

struct	TESS_CHAR

class	TessAltoRenderer

class	TessBaseAPI

class	TessBoxTextRenderer

class	TessClassifier

class	TessdataManager

class	Tesseract

struct	TesseractStats

class	TessHOcrRenderer

class	TessLSTMBoxRenderer

class	TessOsdRenderer

class	TessPDFRenderer

class	TessResultRenderer

class	TessTextRenderer

class	TessTsvRenderer

class	TessUnlvRenderer

class	TessWordStrBoxRenderer

class	TestableEquationDetect

class	TextlineProjection

class	Textord

class	TFile

class	TFNetworkModel

class	TFNetworkModelDefaultTypeInternal

class	TrainingSample

class	TrainingSampleSet

class	TRand

class	TransposedArray

class	Trie

class	UNICHAR

class	UnicharAmbigs

struct	UnicharAndFonts

class	UnicharCompress

class	UnicharIdArrayUtils

struct	UnicharRating

class	UnicodeSpanSkipper

struct	UnityFunc

class	ValidateGrapheme

class	ValidateIndic

class	ValidateJavanese

class	ValidateKhmer

class	ValidateMyanmar

class	Validator

struct	ViterbiStateEntry

class	WeightMatrix

struct	WordData

class	WordFeature

class	Wordrec

class	WordWithBox

class	WorkingPartSet

Typedefs
using	DictFunc = int(Dict::)(void , const UNICHARSET &, UNICHAR_ID, bool) const

using	ProbabilityInContextFunc = double(Dict::)(const char , const char , int, const char , int)

using	ParamsModelClassifyFunc = float(Dict::)(const char , void *)

using	FillLatticeFunc = void(Wordrec::)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle )

using	TruthCallback = std::function< void(const UNICHARSET &, int, PageIterator , Pix )>

using	FileReader = bool()(const char filename, GenericVector< char > *data)

using	FileWriter = bool()(const GenericVector< char > &data, const char filename)

using	char32 = signed int

using	DotProductFunction = double()(const double , const double *, int)

using	SetOfModels = GenericVectorEqEq< const ParagraphModel * >

using	WordRecognizer = void(Tesseract::)(const WordData &, WERD_RES , PointerVector< WERD_RES > )

using	ParamsTrainingHypothesisList = GenericVector< ParamsTrainingHypothesis >

using	UnicharIdVector = GenericVector< UNICHAR_ID >

using	UnicharAmbigsVector = GenericVector< AmbigSpec_LIST * >

using	IntKDPair = KDPairInc< int, int >

using	RSMap = std::unordered_map< int, std::unique_ptr< std::vector< int > >>

using	RSCounts = std::unordered_map< int, int >

using	ShapeQueue = GenericHeap< ShapeQueueEntry >

using	NodeChildVector = GenericVector< NodeChild >

using	SuccessorList = GenericVector< int >

using	SuccessorListsVector = GenericVector< SuccessorList * >

using	DawgVector = GenericVector< Dawg * >

using	RecodePair = KDPairInc< double, RecodeNode >

using	RecodeHeap = GenericHeap< RecodePair >

using	BlobGridSearch = GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >

using	ColPartitionGridSearch = GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT >

using	PartSetVector = GenericVector< ColPartitionSet * >

using	WidthCallback = std::function< bool(int)>

using	ColSegmentGrid = BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT >

using	ColSegmentGridSearch = GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT >

using	WordGrid = BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >

using	WordSearch = GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >

using	LigHash = std::unordered_map< std::string, std::string, StringHash >

using	TestCallback = std::function< STRING(int, const double *, const TessdataManager &, int)>

using	PainPointHeap = GenericHeap< MatrixCoordPair >

using	LanguageModelFlagsType = unsigned char
	Used for expressing various language model flags. More...

Enumerations
enum	Orientation { ORIENTATION_PAGE_UP = 0, ORIENTATION_PAGE_RIGHT = 1, ORIENTATION_PAGE_DOWN = 2, ORIENTATION_PAGE_LEFT = 3 }

enum	WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0, WRITING_DIRECTION_RIGHT_TO_LEFT = 1, WRITING_DIRECTION_TOP_TO_BOTTOM = 2 }

enum	TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, TEXTLINE_ORDER_TOP_TO_BOTTOM = 2 }

enum	PageSegMode { PSM_OSD_ONLY = 0, PSM_AUTO_OSD = 1, PSM_AUTO_ONLY = 2, PSM_AUTO = 3, PSM_SINGLE_COLUMN = 4, PSM_SINGLE_BLOCK_VERT_TEXT = 5, PSM_SINGLE_BLOCK = 6, PSM_SINGLE_LINE = 7, PSM_SINGLE_WORD = 8, PSM_CIRCLE_WORD = 9, PSM_SINGLE_CHAR = 10, PSM_SPARSE_TEXT, PSM_SPARSE_TEXT_OSD = 12, PSM_RAW_LINE = 13, PSM_COUNT }

enum	PageIteratorLevel { RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL }

enum	ParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT }

enum	OcrEngineMode { OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT, OEM_COUNT }

enum	LineType { LT_START = 'S', LT_BODY = 'C', LT_UNKNOWN = 'U', LT_MULTIPLE = 'M' }

enum	CMD_EVENTS { ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, ACTION_2_CMD_EVENT }

enum	CachingStrategy { CS_SEQUENTIAL, CS_ROUND_ROBIN }

enum	NormalizationMode { NM_BASELINE = -3, NM_CHAR_ISOTROPIC = -2, NM_CHAR_ANISOTROPIC = -1 }

enum	kParamsTrainingFeatureType { PTRAIN_DIGITS_SHORT, PTRAIN_DIGITS_MED, PTRAIN_DIGITS_LONG, PTRAIN_NUM_SHORT, PTRAIN_NUM_MED, PTRAIN_NUM_LONG, PTRAIN_DOC_SHORT, PTRAIN_DOC_MED, PTRAIN_DOC_LONG, PTRAIN_DICT_SHORT, PTRAIN_DICT_MED, PTRAIN_DICT_LONG, PTRAIN_FREQ_SHORT, PTRAIN_FREQ_MED, PTRAIN_FREQ_LONG, PTRAIN_SHAPE_COST_PER_CHAR, PTRAIN_NGRAM_COST_PER_CHAR, PTRAIN_NUM_BAD_PUNC, PTRAIN_NUM_BAD_CASE, PTRAIN_XHEIGHT_CONSISTENCY, PTRAIN_NUM_BAD_CHAR_TYPE, PTRAIN_NUM_BAD_SPACING, PTRAIN_NUM_BAD_FONT, PTRAIN_RATING_PER_CHAR, PTRAIN_NUM_FEATURE_TYPES }

enum	ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP }

enum	AmbigType { NOT_AMBIG, REPLACE_AMBIG, DEFINITE_AMBIG, SIMILAR_AMBIG, CASE_AMBIG, AMBIG_TYPE_COUNT }

enum	SetParamConstraint { SET_PARAM_CONSTRAINT_NONE, SET_PARAM_CONSTRAINT_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_INIT_ONLY }

enum	TessdataType { TESSDATA_LANG_CONFIG, TESSDATA_UNICHARSET, TESSDATA_AMBIGS, TESSDATA_INTTEMP, TESSDATA_PFFMTABLE, TESSDATA_NORMPROTO, TESSDATA_PUNC_DAWG, TESSDATA_SYSTEM_DAWG, TESSDATA_NUMBER_DAWG, TESSDATA_FREQ_DAWG, TESSDATA_FIXED_LENGTH_DAWGS, TESSDATA_CUBE_UNICHARSET, TESSDATA_CUBE_SYSTEM_DAWG, TESSDATA_SHAPE_TABLE, TESSDATA_BIGRAM_DAWG, TESSDATA_UNAMBIG_DAWG, TESSDATA_PARAMS_MODEL, TESSDATA_LSTM, TESSDATA_LSTM_PUNC_DAWG, TESSDATA_LSTM_SYSTEM_DAWG, TESSDATA_LSTM_NUMBER_DAWG, TESSDATA_LSTM_UNICHARSET, TESSDATA_LSTM_RECODER, TESSDATA_VERSION, TESSDATA_NUM_ENTRIES }

enum	CharSegmentationType { CST_FRAGMENT, CST_WHOLE, CST_IMPROPER, CST_NGRAM }

enum	DawgType { DAWG_TYPE_PUNCTUATION, DAWG_TYPE_WORD, DAWG_TYPE_NUMBER, DAWG_TYPE_PATTERN, DAWG_TYPE_COUNT }

enum	XHeightConsistencyEnum { XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT }

enum	TrainingFlags { TF_INT_MODE = 1, TF_COMPRESS_UNICHARSET = 64 }

enum	NetworkType { NT_NONE, NT_INPUT, NT_CONVOLVE, NT_MAXPOOL, NT_PARALLEL, NT_REPLICATED, NT_PAR_RL_LSTM, NT_PAR_UD_LSTM, NT_PAR_2D_LSTM, NT_SERIES, NT_RECONFIG, NT_XREVERSED, NT_YREVERSED, NT_XYTRANSPOSE, NT_LSTM, NT_LSTM_SUMMARY, NT_LOGISTIC, NT_POSCLIP, NT_SYMCLIP, NT_TANH, NT_RELU, NT_LINEAR, NT_SOFTMAX, NT_SOFTMAX_NO_CTC, NT_LSTM_SOFTMAX, NT_LSTM_SOFTMAX_ENCODED, NT_TENSORFLOW, NT_COUNT }

enum	NetworkFlags { NF_LAYER_SPECIFIC_LR = 64, NF_ADAM = 128 }

enum	TrainingState { TS_DISABLED, TS_ENABLED, TS_TEMP_DISABLE, TS_RE_ENABLE }

enum	NodeContinuation { NC_ANYTHING, NC_ONLY_DUP, NC_NO_DUP, NC_COUNT }

enum	TopNState { TN_TOP2, TN_TOPN, TN_ALSO_RAN, TN_COUNT }

enum	LossType { LT_NONE, LT_CTC, LT_SOFTMAX, LT_LOGISTIC }

enum	FlexDimensions { FD_BATCH, FD_HEIGHT, FD_WIDTH, FD_DIMSIZE }

enum	ColumnSpanningType { CST_NOISE, CST_FLOWING, CST_HEADING, CST_PULLOUT, CST_COUNT }

enum	NeighbourPartitionType { NPT_HTEXT, NPT_VTEXT, NPT_WEAK_HTEXT, NPT_WEAK_VTEXT, NPT_IMAGE, NPT_COUNT }

enum	LeftOrRight { LR_LEFT, LR_RIGHT }

enum	PartitionFindResult { PFR_OK, PFR_SKEW, PFR_NOISE }

enum	ColSegType { COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED, COL_COUNT }

enum	TabAlignment { TA_LEFT_ALIGNED, TA_LEFT_RAGGED, TA_CENTER_JUSTIFIED, TA_RIGHT_ALIGNED, TA_RIGHT_RAGGED, TA_SEPARATOR, TA_COUNT }

enum	FactorNames { FN_INCOLOR, FN_Y0, FN_Y1, FN_Y2, FN_Y3, FN_X0, FN_X1, FN_SHEAR, FN_NUM_FACTORS }

enum	CountTypes { CT_UNICHAR_TOP_OK, CT_UNICHAR_TOP1_ERR, CT_UNICHAR_TOP2_ERR, CT_UNICHAR_TOPN_ERR, CT_UNICHAR_TOPTOP_ERR, CT_OK_MULTI_UNICHAR, CT_OK_JOINED, CT_OK_BROKEN, CT_REJECT, CT_FONT_ATTR_ERR, CT_OK_MULTI_FONT, CT_NUM_RESULTS, CT_RANK, CT_REJECTED_JUNK, CT_ACCEPTED_JUNK, CT_SIZE }

enum	ErrorTypes { ET_RMS, ET_DELTA, ET_WORD_RECERR, ET_CHAR_ERROR, ET_SKIP_RATIO, ET_COUNT }

enum	Trainability { TRAINABLE, PERFECT, UNENCODABLE, HI_PRECISION_ERR, NOT_BOXED }

enum	SerializeAmount { LIGHT, NO_BEST_TRAINER, FULL }

enum	SubTrainerResult { STR_NONE, STR_UPDATED, STR_REPLACED }

enum	UnicodeNormMode { UnicodeNormMode::kNFD, UnicodeNormMode::kNFC, UnicodeNormMode::kNFKD, UnicodeNormMode::kNFKC }

enum	OCRNorm { OCRNorm::kNone, OCRNorm::kNormalize }

enum	GraphemeNorm { GraphemeNorm::kNone, GraphemeNorm::kNormalize }

enum	GraphemeNormMode { GraphemeNormMode::kSingleString, GraphemeNormMode::kCombined, GraphemeNormMode::kGlyphSplit, GraphemeNormMode::kIndividualUnicodes }

enum	ViramaScript : char32 { ViramaScript::kNonVirama = 0, ViramaScript::kDevanagari = 0x900, ViramaScript::kBengali = 0x980, ViramaScript::kGurmukhi = 0xa00, ViramaScript::kGujarati = 0xa80, ViramaScript::kOriya = 0xb00, ViramaScript::kTamil = 0xb80, ViramaScript::kTelugu = 0xc00, ViramaScript::kKannada = 0xc80, ViramaScript::kMalayalam = 0xd00, ViramaScript::kSinhala = 0xd80, ViramaScript::kMyanmar = 0x1000, ViramaScript::kKhmer = 0x1780, ViramaScript::kJavanese = 0xa980 }

enum	LMPainPointsType { LM_PPTYPE_BLAMER, LM_PPTYPE_AMBIG, LM_PPTYPE_PATH, LM_PPTYPE_SHAPE, LM_PPTYPE_NUM }

Functions
STRING	HOcrEscape (const char *text)

bool	LoadDataFromFile (const char filename, GenericVector< char > data)

bool	SaveDataToFile (const GenericVector< char > &data, const char *filename)

template<typename T >
bool	cmp_eq (T const &t1, T const &t2)

template<typename T >
int	sort_cmp (const void t1, const void t2)

template<typename T >
int	sort_ptr_cmp (const void t1, const void t2)

bool	PSM_OSD_ENABLED (int pageseg_mode)

bool	PSM_ORIENTATION_ENABLED (int pageseg_mode)

bool	PSM_COL_FIND_ENABLED (int pageseg_mode)

bool	PSM_SPARSE (int pageseg_mode)

bool	PSM_BLOCK_FIND_ENABLED (int pageseg_mode)

bool	PSM_LINE_FIND_ENABLED (int pageseg_mode)

bool	PSM_WORD_FIND_ENABLED (int pageseg_mode)

template<typename T , size_t N>
constexpr size_t	countof (T const (&)[N]) noexcept

bool	DeSerialize (FILE fp, char data, size_t n=1)

bool	DeSerialize (FILE fp, float data, size_t n=1)

bool	DeSerialize (FILE fp, int8_t data, size_t n=1)

bool	DeSerialize (FILE fp, int16_t data, size_t n=1)

bool	DeSerialize (FILE fp, int32_t data, size_t n=1)

bool	DeSerialize (FILE fp, uint8_t data, size_t n=1)

bool	DeSerialize (FILE fp, uint16_t data, size_t n=1)

bool	DeSerialize (FILE fp, uint32_t data, size_t n=1)

bool	Serialize (FILE fp, const char data, size_t n=1)

bool	Serialize (FILE fp, const float data, size_t n=1)

bool	Serialize (FILE fp, const int8_t data, size_t n=1)

bool	Serialize (FILE fp, const int16_t data, size_t n=1)

bool	Serialize (FILE fp, const int32_t data, size_t n=1)

bool	Serialize (FILE fp, const uint8_t data, size_t n=1)

bool	Serialize (FILE fp, const uint16_t data, size_t n=1)

bool	Serialize (FILE fp, const uint32_t data, size_t n=1)

double	DotProductNative (const double u, const double v, int n)

double	DotProductAVX (const double u, const double v, int n)

double	DotProductFMA (const double u, const double v, int n)

double	DotProductSSE (const double u, const double v, int n)

bool	IsTextOrEquationType (PolyBlockType type)

bool	IsLeftIndented (const EquationDetect::IndentType type)

bool	IsRightIndented (const EquationDetect::IndentType type)

bool	AsciiLikelyListItem (const STRING &word)

int	UnicodeFor (const UNICHARSET u, const WERD_CHOICE werd, int pos)

void	LeftWordAttributes (const UNICHARSET unicharset, const WERD_CHOICE werd, const STRING &utf8, bool is_list, bool starts_idea, bool *ends_idea)

void	RightWordAttributes (const UNICHARSET unicharset, const WERD_CHOICE werd, const STRING &utf8, bool is_list, bool starts_idea, bool *ends_idea)

bool	ValidFirstLine (const GenericVector< RowScratchRegisters > rows, int row, const ParagraphModel model)

bool	ValidBodyLine (const GenericVector< RowScratchRegisters > rows, int row, const ParagraphModel model)

bool	CrownCompatible (const GenericVector< RowScratchRegisters > rows, int a, int b, const ParagraphModel model)

void	RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile)

int	InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)

bool	FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)

bool	FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)

bool	RowsFitModel (const GenericVector< RowScratchRegisters > rows, int start, int end, const ParagraphModel model)

void	CanonicalizeDetectionResults (GenericVector< PARA * > row_owners, PARA_LIST paragraphs)

void	DetectParagraphs (int debug_level, GenericVector< RowInfo > row_infos, GenericVector< PARA > row_owners, PARA_LIST paragraphs, GenericVector< ParagraphModel * > *models)

void	DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator block_start, GenericVector< ParagraphModel > *models)

bool	StrongModel (const ParagraphModel *model)

bool	CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2)

bool	CompareFontSet (const FontSet &fs1, const FontSet &fs2)

void	FontInfoDeleteCallback (FontInfo f)

void	FontSetDeleteCallback (FontSet fs)

bool	read_info (TFile f, FontInfo fi)

bool	write_info (FILE *f, const FontInfo &fi)

bool	read_spacing_info (TFile f, FontInfo fi)

bool	write_spacing_info (FILE *f, const FontInfo &fi)

bool	read_set (TFile f, FontSet fs)

bool	write_set (FILE *f, const FontSet &fs)

int	OtsuThreshold (Pix src_pix, int left, int top, int width, int height, int thresholds, int *hi_values)

void	HistogramRect (Pix src_pix, int channel, int left, int top, int width, int height, int histogram)

int	OtsuStats (const int histogram, int H_out, int *omega0_out)

int	ParamsTrainingFeatureByName (const char *name)

const char *	ScriptPosToString (enum ScriptPos script_pos)

void	ExtractFontName (const STRING &filename, STRING *fontname)

TrainingSample *	BlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT fx_info, GenericVector< INT_FEATURE_STRUCT > bl_features)

void	ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window)

double	Tanh (double x)

double	Logistic (double x)

template<class Func >
void	FuncInplace (int n, double *inout)

template<class Func >
void	FuncMultiply (const double u, const double v, int n, double *out)

template<typename T >
void	SoftmaxInPlace (int n, T *inout)

void	CopyVector (int n, const double src, double dest)

void	AccumulateVector (int n, const double src, double dest)

void	MultiplyVectorsInPlace (int n, const double src, double inout)

void	MultiplyAccumulate (int n, const double u, const double v, double *out)

void	SumVectors (int n, const double v1, const double v2, const double v3, const double v4, const double v5, double sum)

template<typename T >
void	ZeroVector (int n, T *vec)

template<typename T >
void	ClipVector (int n, T lower, T upper, T *vec)

void	CodeInBinary (int n, int nf, double *vec)

Pix *	TraceOutlineOnReducedPix (C_OUTLINE outline, int gridsize, ICOORD bleft, int left, int *bottom)

Pix *	TraceBlockOnReducedPix (BLOCK block, int gridsize, ICOORD bleft, int left, int *bottom)

template<class BBC >
int	SortByBoxLeft (const void void1, const void void2)

template<class BBC >
int	SortRightToLeft (const void void1, const void void2)

template<class BBC >
int	SortByBoxBottom (const void void1, const void void2)

template<typename T >
void	DeleteObject (T *object)

void	SetBlobStrokeWidth (Pix pix, BLOBNBOX blob)

void	assign_blobs_to_blocks2 (Pix pix, BLOCK_LIST blocks, TO_BLOCK_LIST *port_blocks)

void	ParseCommandLineFlags (const char usage, int argc, char ***argv, const bool remove_flags)

ShapeTable *	LoadShapeTable (const STRING &file_prefix)

void	WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)

MasterTrainer *	LoadTrainingData (int argc, const char const argv, bool replication, ShapeTable *shape_table, STRING file_prefix)

Pix *	DegradeImage (Pix input, int exposure, TRand randomizer, float *rotation)

Pix *	PrepareDistortedPix (const Pix pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand randomizer, GenericVector< TBOX > *boxes)

void	GeneratePerspectiveDistortion (int width, int height, TRand randomizer, Pix pix, GenericVector< TBOX > boxes)

int	ProjectiveCoeffs (int width, int height, TRand randomizer, float im_coeffs, float *box_coeffs)

bool	LoadFileLinesToStrings (const char filename, GenericVector< STRING > lines)

bool	WriteFile (const std::string &output_dir, const std::string &lang, const std::string &suffix, const GenericVector< char > &data, FileWriter writer)

STRING	ReadFile (const std::string &filename, FileReader reader)

bool	WriteUnicharset (const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)

bool	WriteRecoder (const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, STRING radical_table_data, TessdataManager traineddata)

int	CombineLangModel (const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)

bool	NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char str8, std::string normalized)

bool	NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char str8, std::vector< std::string > graphemes)

char32	OCRNormalize (char32 ch)

bool	IsOCREquivalent (char32 ch1, char32 ch2)

bool	IsValidCodepoint (const char32 ch)

bool	IsWhitespace (const char32 ch)

bool	IsUTF8Whitespace (const char *text)

unsigned int	SpanUTF8Whitespace (const char *text)

unsigned int	SpanUTF8NotWhitespace (const char *text)

bool	IsInterchangeValid (const char32 ch)

bool	IsInterchangeValid7BitAscii (const char32 ch)

char32	FullwidthToHalfwidth (const char32 ch)

void	SetupBasicProperties (bool report_errors, bool decompose, UNICHARSET *unicharset)

void	SetScriptProperties (const std::string &script_dir, UNICHARSET *unicharset)

std::string	GetXheightString (const std::string &script_dir, const UNICHARSET &unicharset)

void	SetPropertiesForInputFile (const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)

void	SetupBasicProperties (bool report_errors, UNICHARSET *unicharset)

template<class BLOB_CHOICE >
int	SortByUnicharID (const void void1, const void void2)

template<class BLOB_CHOICE >
int	SortByRating (const void void1, const void void2)

	TEST_F (EquationFinderTest, IdentifySpecialText)

	TEST_F (EquationFinderTest, EstimateTypeForUnichar)

	TEST_F (EquationFinderTest, IsIndented)

	TEST_F (EquationFinderTest, IsNearSmallNeighbor)

	TEST_F (EquationFinderTest, CheckSeedBlobsCount)

	TEST_F (EquationFinderTest, ComputeForegroundDensity)

	TEST_F (EquationFinderTest, CountAlignment)

	TEST_F (EquationFinderTest, ComputeCPsSuperBBox)

	TEST_F (EquationFinderTest, SplitCPHorLite)

	TEST_F (EquationFinderTest, SplitCPHor)

	TEST_F (HeapTest, SortTest)

	TEST_F (HeapTest, MixedTest)

	TEST_F (HeapTest, PopWorstTest)

	TEST_F (HeapTest, RevalueTest)

	TEST_F (HeapTest, DoublePtrTest)

	TEST_F (LSTMTrainerTest, RecodeTestKorBase)

	TEST_F (LSTMTrainerTest, RecodeTestKor)

	TEST_F (LSTMTrainerTest, EncodeDecodeBothTestKor)

	TEST_F (LSTMTrainerTest, TestSquashed)

	TEST_F (LSTMTrainerTest, BasicTest)

	TEST_F (LSTMTrainerTest, ColorTest)

	TEST_F (LSTMTrainerTest, BidiTest)

	TEST_F (LSTMTrainerTest, Test2D)

	TEST_F (LSTMTrainerTest, TestAdam)

	TEST_F (LSTMTrainerTest, SpeedTest)

	TEST_F (LSTMTrainerTest, DeterminismTest)

	TEST_F (LSTMTrainerTest, SoftmaxBaselineTest)

	TEST_F (LSTMTrainerTest, SoftmaxTest)

	TEST_F (LSTMTrainerTest, EncodedSoftmaxTest)

	TEST_F (LSTMTrainerTest, TestLayerAccess)

std::string	CodepointList (const std::vector< char32 > &str32)

std::string	PrintString32WithUnicodes (const std::string &str)

std::string	PrintStringVectorWithUnicodes (const std::vector< std::string > &glyphs)

void	ExpectGraphemeModeResults (const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)

	TEST_F (NthItemTest, GeneralTest)

	TEST_F (NthItemTest, BoringTest)

	TEST_F (NthItemTest, UniqueTest)

	TEST_F (NthItemTest, EqualTest)

Variables
const int	kMinRectSize = 10

const char	kTesseractReject = '~'

const char	kUNLVReject = '~'

const char	kUNLVSuspect = '^'

const int	kMaxIntSize = 22

const int	kNumbersPerBlob = 5

const int	kBytesPerNumber = 5

const int	kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

const int	kBytesPer64BitNumber = 20

const int	kMaxBytesPerLine

const int	kUniChs []

const int	kLatinChs []

constexpr int	kNumOutputsPerRegister = 8

constexpr int	kMaxOutputRegisters = 8

constexpr int	kNumInputsPerRegister = 32

constexpr int	kNumInputsPerGroup = 4

constexpr int	kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup

DotProductFunction	DotProduct

const float	kMathDigitDensityTh1 = 0.25

const float	kMathDigitDensityTh2 = 0.1

const float	kMathItalicDensityTh = 0.5

const float	kUnclearDensityTh = 0.25

const int	kSeedBlobsCountTh = 10

const int	kLeftIndentAlignmentCountTh = 1

const int	kMaxCharTopRange = 48

const float	kCertaintyScale = 7.0f

const float	kWorstDictCertainty = -25.0f

const int	kMaxCircleErosions = 8

const ParagraphModel *	kCrownLeft = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F))

const ParagraphModel *	kCrownRight = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F))

const int16_t	kMaxBoxEdgeDiff = 2

const int	kBoxClipTolerance = 2

const int	kNumEndPoints = 3

const int	kMinPointsForErrorCount = 16

const int	kMaxRealDistance = 2.0

const int	kFeaturePadding = 2

const int	kImagePadding = 4

const int	kHistogramSize = 256

const int	kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)

const int	kRadicalRadix = 29

const char *const	kLRM = "\u200E"
	Left-to-Right Mark. More...

const char *const	kRLM = "\u200F"
	Right-to-Left Mark. More...

const char *const	kRLE = "\u202A"
	Right-to-Left Embedding. More...

const char *const	kPDF = "\u202C"
	Pop Directional Formatting. More...

const char	kUniversalAmbigsFile []

const int	ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

const int	kRandomizingCenter = 128

const int	case_state_table [6][4]

const char	kDoNotReverse [] = "RRP_DO_NO_REVERSE"

const char	kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL"

const char	kForceReverse [] = "RRP_FORCE_REVERSE"

const char *const	RTLReversePolicyNames []

const double	TanhTable []

const double	LogisticTable []

constexpr int	kTableSize = 4096

constexpr double	kScaleFactor = 256.0

const int	kMaxInputHeight = 48

const double	kStateClip = 100.0

const double	kErrClip = 1.0f

const double	kDictRatio = 2.25

const double	kCertOffset = -0.085

const int	kMinWinSize = 500

const int	kMaxWinSize = 2000

const int	kXWinFrameSize = 30

const int	kYWinFrameSize = 80

const float	kMinCertainty = -20.0f

const float	kMinProb = exp(kMinCertainty)

class tesseract::TFNetworkModelDefaultTypeInternal	_TFNetworkModel_default_instance_

const int	kAdamCorrectionIterations = 200000

const double	kAdamEpsilon = 1e-8

const int	kInt8Flag = 1

const int	kAdamFlag = 4

const int	kDoubleFlag = 128

const int	kHistogramBuckets = 16

const double	kAlignedFraction = 0.03125

const double	kRaggedFraction = 2.5

const double	kAlignedGapFraction = 0.75

const double	kRaggedGapFraction = 1.0

const int	kVLineAlignment = 3

const int	kVLineGutter = 1

const int	kVLineSearchSize = 150

const int	kMinRaggedTabs = 5

const int	kMinAlignedTabs = 4

const int	kVLineMinLength = 500

const double	kMinTabGradient = 4.0

const int	kMaxSkewFactor = 15

const double	kMaxSmallNeighboursPerPix = 1.0 / 32

const int	kMaxLargeOverlapsWithSmall = 3

const int	kMaxMediumOverlapsWithSmall = 12

const int	kMaxLargeOverlapsWithMedium = 12

const int	kOriginalNoiseMultiple = 8

const int	kNoisePadding = 4

const double	kPhotoOffsetFraction = 0.375

const double	kMinGoodTextPARatio = 1.5

const int	kMaxIncompatibleColumnCount = 2

const double	kHorizontalGapMergeFraction = 0.5

const double	kMinGutterWidthGrid = 0.5

const double	kMaxDistToPartSizeRatio = 1.5

const double	kMaxSpacingDrift = 1.0 / 72

const double	kMaxTopSpacingFraction = 0.25

const double	kMaxSameBlockLineSpacing = 3

const double	kMaxSizeRatio = 1.5

const double	kMaxLeaderGapFractionOfMax = 0.25

const double	kMaxLeaderGapFractionOfMin = 0.5

const int	kMinLeaderCount = 5

const int	kMinStrongTextValue = 6

const int	kMinChainTextValue = 3

const int	kHorzStrongTextlineCount = 8

const int	kHorzStrongTextlineHeight = 10

const int	kHorzStrongTextlineAspect = 5

const double	kMaxBaselineError = 0.4375

const double	kMinBaselineCoverage = 0.5

const int	kMaxRMSColorNoise = 128

const int	kMaxColorDistance = 900

const int	kRGBRMSColors = 4

const int	kMaxPadFactor = 6

const int	kMaxNeighbourDistFactor = 4

const int	kMaxCaptionLines = 7

const double	kMinCaptionGapRatio = 2.0

const double	kMinCaptionGapHeightRatio = 0.5

const double	kMarginOverlapFraction = 0.25

const double	kBigPartSizeRatio = 1.75

const double	kTinyEnoughTextlineOverlapFraction = 0.25

const double	kMaxPartitionSpacing = 1.75

const int	kSmoothDecisionMargin = 4

const double	kMinColumnWidth = 2.0 / 3

const double	kMinRectangularFraction = 0.125

const double	kMaxRectangularFraction = 0.75

const double	kMaxRectangularGradient = 0.1

const int	kMinImageFindSize = 100

const double	kRMSFitScaling = 8.0

const int	kMinColorDifference = 16

const int	kThinLineFraction = 20
	Denominator of resolution makes max pixel width to allow thin lines. More...

const int	kMinLineLengthFraction = 4
	Denominator of resolution makes min pixels to demand line lengths to be. More...

const int	kCrackSpacing = 100
	Spacing of cracks across the page to break up tall vertical lines. More...

const int	kLineFindGridSize = 50
	Grid size used by line finder. Not very critical. More...

const int	kMinThickLineWidth = 12

const int	kMaxLineResidue = 6

const double	kThickLengthMultiple = 0.75

const double	kMaxNonLineDensity = 0.25

const double	kMaxStaveHeight = 1.0

const double	kMinMusicPixelFraction = 0.75

const double	kStrokeWidthFractionTolerance = 0.125

const double	kStrokeWidthTolerance = 1.5

const double	kStrokeWidthFractionCJK = 0.25

const double	kStrokeWidthCJK = 2.0

const int	kCJKRadius = 2

const double	kCJKBrokenDistanceFraction = 0.25

const int	kCJKMaxComponents = 8

const double	kCJKAspectRatio = 1.25

const double	kCJKAspectRatioIncrease = 1.0625

const int	kMaxCJKSizeRatio = 5

const double	kBrokenCJKIterationFraction = 0.125

const double	kDiacriticXPadRatio = 7.0

const double	kDiacriticYPadRatio = 1.75

const double	kMinDiacriticSizeRatio = 1.0625

const double	kMaxDiacriticDistanceRatio = 1.25

const double	kMaxDiacriticGapToBaseCharHeight = 1.0

const int	kLineTrapLongest = 4

const int	kLineTrapShortest = 2

const int	kMostlyOneDirRatio = 3

const double	kLineResidueAspectRatio = 8.0

const int	kLineResiduePadRatio = 3

const double	kLineResidueSizeRatio = 1.75

const float	kSizeRatioToReject = 2.0

const double	kNeighbourSearchFactor = 2.5

const double	kNoiseOverlapGrowthFactor = 4.0

const double	kNoiseOverlapAreaFactor = 1.0 / 512

const int	kTabRadiusFactor = 5

const int	kMinVerticalSearch = 3

const int	kMaxVerticalSearch = 12

const int	kMaxRaggedSearch = 25

const int	kMinLinesInColumn = 10

const double	kMinFractionalLinesInColumn = 0.125

const double	kMaxGutterWidthAbsolute = 2.00

const int	kRaggedGutterMultiple = 5

const double	kLineFragmentAspectRatio = 10.0

const int	kMinEvaluatedTabs = 3

const double	kCosMaxSkewAngle = 0.866025

const int	kColumnWidthFactor = 20

const int	kMaxVerticalSpacing = 500

const int	kMaxBlobWidth = 500

const double	kSplitPartitionSize = 2.0

const double	kAllowTextHeight = 0.5

const double	kAllowTextWidth = 0.6

const double	kAllowTextArea = 0.8

const double	kAllowBlobHeight = 0.3

const double	kAllowBlobWidth = 0.4

const double	kAllowBlobArea = 0.05

const int	kMinBoxesInTextPartition = 10

const int	kMaxBoxesInDataPartition = 20

const double	kMaxGapInTextPartition = 4.0

const double	kMinMaxGapInTextPartition = 0.5

const double	kMaxBlobOverlapFactor = 4.0

const double	kMaxTableCellXheight = 2.0

const int	kMaxColumnHeaderDistance = 4

const double	kTableColumnThreshold = 3.0

const double	kMinOverlapWithTable = 0.6

const int	kSideSpaceMargin = 10

const double	kSmallTableProjectionThreshold = 0.35

const double	kLargeTableProjectionThreshold = 0.45

const int	kLargeTableRowCount = 6

const int	kMinRowsInTable = 3

const int	kAdjacentLeaderSearchPadding = 2

const double	kParagraphEndingPreviousLineRatio = 1.3

const double	kMaxParagraphEndingLeftSpaceMultiple = 3.0

const double	kMinParagraphEndingTextToWhitespaceRatio = 3.0

const double	kMaxXProjectionGapFactor = 2.0

const double	kStrokeWidthFractionalTolerance = 0.25

const double	kStrokeWidthConstantTolerance = 2.0

const double	kHorizontalSpacing = 0.30

const double	kVerticalSpacing = -0.2

const int	kCellSplitRowThreshold = 0

const int	kCellSplitColumnThreshold = 0

const int	kLinedTableMinVerticalLines = 3

const int	kLinedTableMinHorizontalLines = 3

const double	kRequiredColumns = 0.7

const double	kMarginFactor = 1.1

const double	kMaxRowSize = 2.5

const double	kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 }

const int	kGoodRowNumberOfColumnsSmallSize

const double	kGoodRowNumberOfColumnsLarge = 0.7

const double	kMinFilledArea = 0.35

const int	kGutterMultiple = 4

const int	kGutterToNeighbourRatio = 3

const int	kSimilarVectorDist = 10

const int	kSimilarRaggedDist = 50

const int	kMaxFillinMultiple = 11

const double	kMinGutterFraction = 0.5

const double	kLineCountReciprocal = 4.0

const double	kMinAlignedGutter = 0.25

const double	kMinRaggedGutter = 1.5

double	textord_tabvector_vertical_gap_fraction = 0.5

double	textord_tabvector_vertical_box_ratio = 0.5

const int	kMaxLineLength = 1024

const float	kRotationRange = 0.02f

const int	kExposureFactor = 16

const int	kSaltnPepper = 5

const int	kMinRampSize = 1000

const double	kRatingEpsilon = 1.0 / 32

const int	kMaxOffsetDist = 32

const int	kMinLigature = 0xfb00

const int	kMaxLigature = 0xfb17

const double	kMinDivergenceRate = 50.0

const int	kMinStallIterations = 10000

const double	kSubTrainerMarginFraction = 3.0 / 128

const double	kLearningRateDecay = M_SQRT1_2

const int	kNumAdjustmentIterations = 100

const int	kErrorGraphInterval = 1000

const int	kNumPagesPerBatch = 100

const int	kMinStartedErrorRate = 75

const double	kStageTransitionThreshold = 10.0

const double	kHighConfidence = 0.9375

const double	kImprovementFraction = 15.0 / 16.0

const double	kBestCheckpointFraction = 31.0 / 32.0

const int	kTargetXScale = 5

const int	kTargetYScale = 100

const int	kMinClusteredShapes = 1

const int	kMaxUnicharsPerCluster = 2000

const float	kFontMergeDistance = 0.025

const float	kInfiniteDist = 999.0f

const int	kDefaultResolution = 300

const int	kTestChar = -1

const int	kSquareLimit = 25

const int	kPrime1 = 17

const int	kPrime2 = 13

int	test_data [] = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}

const int	kTrainerIterations = 600

const int	kBatchIterations = 100

Detailed Description

The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:

*   <UTF8 str> <left> <bottom> <right> <top> <page id>
*

and for word/line-level boxes:

*   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
*

NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.

<page id>=""> is 0-based, and the page number is used for multipage input (tiff).

In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)

In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.

* word -> #w o r d
* multi word line -> #m u l t i w o r d l i n e
*

The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.

In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.

Make a word from the selected blobs and run Tess on them.

Parameters

page_res	recognise blobs
selection_box	within this box

fp_eval_word_spacing() Evaluation function for fixed pitch word lists.

Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars

build_menu()

Construct the menu tree used by the command window

process_cmd_win_event()

Process a command returned from the command window (Just call the appropriate command handler)

word_blank_and_set_display() Word processor

Blank display of word then redisplay word according to current display mode settings

Include Files and Type Defines

Public Function Prototypes

Include Files and Type Defines

Typedef Documentation

◆ BlobGridSearch

using tesseract::BlobGridSearch = typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>

Definition at line 31 of file blobgrid.h.

◆ char32

using tesseract::char32 = typedef signed int

Definition at line 53 of file unichar.h.

◆ ColPartitionGridSearch

using tesseract::ColPartitionGridSearch = typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>

Definition at line 935 of file colpartition.h.

◆ ColSegmentGrid

using tesseract::ColSegmentGrid = typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 117 of file tablefind.h.

◆ ColSegmentGridSearch

using tesseract::ColSegmentGridSearch = typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

Definition at line 120 of file tablefind.h.

◆ DawgVector

using tesseract::DawgVector = typedef GenericVector<Dawg *>

Definition at line 53 of file dict.h.

◆ DictFunc

using tesseract::DictFunc = typedef int (Dict::*)(void*, const UNICHARSET&, UNICHAR_ID, bool) const

Definition at line 80 of file baseapi.h.

◆ DotProductFunction

using tesseract::DotProductFunction = typedef double (*)(const double*, const double*, int)

Definition at line 25 of file simddetect.h.

◆ FileReader

using tesseract::FileReader = typedef bool (*)(const char* filename, GenericVector<char>* data)

Definition at line 47 of file serialis.h.

◆ FileWriter

using tesseract::FileWriter = typedef bool (*)(const GenericVector<char>& data, const char* filename)

Definition at line 51 of file serialis.h.

◆ FillLatticeFunc

using tesseract::FillLatticeFunc = typedef void (Wordrec::*)(const MATRIX&, const WERD_CHOICE_LIST&, const UNICHARSET&, BlamerBundle*)

Definition at line 86 of file baseapi.h.

◆ IntKDPair

using tesseract::IntKDPair = typedef KDPairInc<int, int>

Definition at line 179 of file kdpair.h.

◆ LanguageModelFlagsType

using tesseract::LanguageModelFlagsType = typedef unsigned char

Used for expressing various language model flags.

Definition at line 37 of file lm_state.h.

◆ LigHash

using tesseract::LigHash = typedef std::unordered_map<std::string, std::string, StringHash>

Definition at line 53 of file ligature_table.h.

◆ NodeChildVector

using tesseract::NodeChildVector = typedef GenericVector<NodeChild>

Definition at line 62 of file dawg.h.

◆ PainPointHeap

using tesseract::PainPointHeap = typedef GenericHeap<MatrixCoordPair>

Definition at line 37 of file lm_pain_points.h.

◆ ParamsModelClassifyFunc

using tesseract::ParamsModelClassifyFunc = typedef float (Dict::*)(const char*, void*)

Definition at line 83 of file baseapi.h.

◆ ParamsTrainingHypothesisList

using tesseract::ParamsTrainingHypothesisList = typedef GenericVector<ParamsTrainingHypothesis>

Definition at line 127 of file params_training_featdef.h.

◆ PartSetVector

using tesseract::PartSetVector = typedef GenericVector<ColPartitionSet*>

Definition at line 33 of file colpartitionset.h.

◆ ProbabilityInContextFunc

using tesseract::ProbabilityInContextFunc = typedef double (Dict::*)(const char*, const char*, int, const char*, int)

Definition at line 82 of file baseapi.h.

◆ RecodeHeap

using tesseract::RecodeHeap = typedef GenericHeap<RecodePair>

Definition at line 177 of file recodebeam.h.

◆ RecodePair

using tesseract::RecodePair = typedef KDPairInc<double, RecodeNode>

Definition at line 176 of file recodebeam.h.

◆ RSCounts

using tesseract::RSCounts = typedef std::unordered_map<int, int>

Definition at line 48 of file unicharcompress.cpp.

◆ RSMap

using tesseract::RSMap = typedef std::unordered_map<int, std::unique_ptr<std::vector<int> >>

Definition at line 46 of file unicharcompress.cpp.

◆ SetOfModels

using tesseract::SetOfModels = typedef GenericVectorEqEq<const ParagraphModel *>

Definition at line 98 of file paragraphs_internal.h.

◆ ShapeQueue

using tesseract::ShapeQueue = typedef GenericHeap<ShapeQueueEntry>

Definition at line 155 of file shapetable.h.

◆ SuccessorList

using tesseract::SuccessorList = typedef GenericVector<int>

Definition at line 63 of file dawg.h.

◆ SuccessorListsVector

using tesseract::SuccessorListsVector = typedef GenericVector<SuccessorList *>

Definition at line 64 of file dawg.h.

◆ TestCallback

using tesseract::TestCallback = typedef std::function<STRING(int, const double*, const TessdataManager&, int)>

Definition at line 73 of file lstmtrainer.h.

◆ TruthCallback

using tesseract::TruthCallback = typedef std::function<void(const UNICHARSET&, int, PageIterator*, Pix*)>

Definition at line 88 of file baseapi.h.

◆ UnicharAmbigsVector

using tesseract::UnicharAmbigsVector = typedef GenericVector<AmbigSpec_LIST *>

Definition at line 134 of file ambigs.h.

◆ UnicharIdVector

using tesseract::UnicharIdVector = typedef GenericVector<UNICHAR_ID>

Definition at line 35 of file ambigs.h.

◆ WidthCallback

using tesseract::WidthCallback = typedef std::function<bool(int)>

Definition at line 35 of file tabfind.h.

◆ WordGrid

using tesseract::WordGrid = typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 65 of file textord.h.

◆ WordRecognizer

using tesseract::WordRecognizer = typedef void (Tesseract::*)(const WordData&, WERD_RES**, PointerVector<WERD_RES>*)

Definition at line 170 of file tesseractclass.h.

◆ WordSearch

using tesseract::WordSearch = typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Definition at line 66 of file textord.h.

Enumeration Type Documentation

◆ AmbigType

enum tesseract::AmbigType

Enumerator
NOT_AMBIG
REPLACE_AMBIG
DEFINITE_AMBIG
SIMILAR_AMBIG
CASE_AMBIG
AMBIG_TYPE_COUNT

Definition at line 37 of file ambigs.h.

                {
   NOT_AMBIG,        // the ngram pair is not ambiguous
   REPLACE_AMBIG,    // ocred ngram should always be substituted with correct
   DEFINITE_AMBIG,   // add correct ngram to the classifier results (1-1)
   SIMILAR_AMBIG,    // use pairwise classifier for ocred/correct pair (1-1)
   CASE_AMBIG,       // this is a case ambiguity (1-1)
  
   AMBIG_TYPE_COUNT  // number of enum entries
 };

◆ CachingStrategy

enum tesseract::CachingStrategy

Enumerator
CS_SEQUENTIAL
CS_ROUND_ROBIN

Definition at line 41 of file imagedata.h.

                      {
   // Reads all of one file before moving on to the next. Requires samples to be
   // shuffled across files. Uses the count of samples in the first file as
   // the count in all the files to achieve high-speed random access. As a
   // consequence, if subsequent files are smaller, they get entries used more
   // than once, and if subsequent files are larger, some entries are not used.
   // Best for larger data sets that don't fit in memory.
   CS_SEQUENTIAL,
   // Reads one sample from each file in rotation. Does not require shuffled
   // samples, but is extremely disk-intensive. Samples in smaller files also
   // get used more often than samples in larger files.
   // Best for smaller data sets that mostly fit in memory.
   CS_ROUND_ROBIN,
 };

◆ CharSegmentationType

enum tesseract::CharSegmentationType

Enumerator
CST_FRAGMENT
CST_WHOLE
CST_IMPROPER
CST_NGRAM

Definition at line 96 of file classify.h.

                           {
   CST_FRAGMENT,  // A partial character.
   CST_WHOLE,     // A correctly segmented character.
   CST_IMPROPER,  // More than one but less than 2 characters.
   CST_NGRAM      // Multiple characters.
 };

◆ CMD_EVENTS

enum tesseract::CMD_EVENTS

Enumerator
ACTION_1_CMD_EVENT
RECOG_WERDS
RECOG_PSEUDO
ACTION_2_CMD_EVENT

Definition at line 486 of file tessedit.cpp.

◆ ColSegType

enum tesseract::ColSegType

Enumerator
COL_UNKNOWN
COL_TEXT
COL_TABLE
COL_MIXED
COL_COUNT

Definition at line 29 of file tablefind.h.

                 {
   COL_UNKNOWN,
   COL_TEXT,
   COL_TABLE,
   COL_MIXED,
   COL_COUNT
 };

◆ ColumnSpanningType

enum tesseract::ColumnSpanningType

Enumerator
CST_NOISE
CST_FLOWING
CST_HEADING
CST_PULLOUT
CST_COUNT

Definition at line 47 of file colpartition.h.

                         {
   CST_NOISE,        // Strictly between columns.
   CST_FLOWING,      // Strictly within a single column.
   CST_HEADING,      // Spans multiple columns.
   CST_PULLOUT,      // Touches multiple columns, but doesn't span them.
   CST_COUNT         // Number of entries.
 };

◆ CountTypes

enum tesseract::CountTypes

Enumerator
CT_UNICHAR_TOP_OK
CT_UNICHAR_TOP1_ERR
CT_UNICHAR_TOP2_ERR
CT_UNICHAR_TOPN_ERR
CT_UNICHAR_TOPTOP_ERR
CT_OK_MULTI_UNICHAR
CT_OK_JOINED
CT_OK_BROKEN
CT_REJECT
CT_FONT_ATTR_ERR
CT_OK_MULTI_FONT
CT_NUM_RESULTS
CT_RANK
CT_REJECTED_JUNK
CT_ACCEPTED_JUNK
CT_SIZE

Definition at line 69 of file errorcounter.h.

                 {
   CT_UNICHAR_TOP_OK,     // Top shape contains correct unichar id.
   // The rank of the results in TOP1, TOP2, TOPN is determined by a gap of
   // kRatingEpsilon from the first result in each group. The real top choice
   // is measured using TOPTOP.
   CT_UNICHAR_TOP1_ERR,   // Top shape does not contain correct unichar id.
   CT_UNICHAR_TOP2_ERR,   // Top 2 shapes don't contain correct unichar id.
   CT_UNICHAR_TOPN_ERR,   // No output shape contains correct unichar id.
   CT_UNICHAR_TOPTOP_ERR,   // Very top choice not correct.
   CT_OK_MULTI_UNICHAR,   // Top shape id has correct unichar id, and others.
   CT_OK_JOINED,          // Top shape id is correct but marked joined.
   CT_OK_BROKEN,          // Top shape id is correct but marked broken.
   CT_REJECT,             // Classifier hates this.
   CT_FONT_ATTR_ERR,      // Top unichar OK, but font attributes incorrect.
   CT_OK_MULTI_FONT,      // CT_FONT_ATTR_OK but there are multiple font attrs.
   CT_NUM_RESULTS,        // Number of answers produced.
   CT_RANK,               // Rank of correct answer.
   CT_REJECTED_JUNK,      // Junk that was correctly rejected.
   CT_ACCEPTED_JUNK,      // Junk that was incorrectly classified otherwise.
  
   CT_SIZE                // Number of types for array sizing.
 };

◆ DawgType

enum tesseract::DawgType

Enumerator
DAWG_TYPE_PUNCTUATION
DAWG_TYPE_WORD
DAWG_TYPE_NUMBER
DAWG_TYPE_PATTERN
DAWG_TYPE_COUNT

Definition at line 66 of file dawg.h.

               {
   DAWG_TYPE_PUNCTUATION,
   DAWG_TYPE_WORD,
   DAWG_TYPE_NUMBER,
   DAWG_TYPE_PATTERN,
  
   DAWG_TYPE_COUNT  // number of enum entries

◆ ErrorTypes

enum tesseract::ErrorTypes

Enumerator
ET_RMS
ET_DELTA
ET_WORD_RECERR
ET_CHAR_ERROR
ET_SKIP_RATIO
ET_COUNT

Definition at line 37 of file lstmtrainer.h.

                 {
   ET_RMS,          // RMS activation error.
   ET_DELTA,        // Number of big errors in deltas.
   ET_WORD_RECERR,  // Output text string word recall error.
   ET_CHAR_ERROR,   // Output text string total char error.
   ET_SKIP_RATIO,   // Fraction of samples skipped.
   ET_COUNT         // For array sizing.
 };

◆ FactorNames

enum tesseract::FactorNames

Enumerator
FN_INCOLOR
FN_Y0
FN_Y1
FN_Y2
FN_Y3
FN_X0
FN_X1
FN_SHEAR
FN_NUM_FACTORS

Definition at line 58 of file degradeimage.cpp.

92 {

◆ FlexDimensions

enum tesseract::FlexDimensions

Enumerator
FD_BATCH
FD_HEIGHT
FD_WIDTH
FD_DIMSIZE

Definition at line 32 of file stridemap.h.

                     {
   FD_BATCH,    // Index of multiple images.
   FD_HEIGHT,   // y-coordinate in image.
   FD_WIDTH,    // x-coordinate in image.
   FD_DIMSIZE,  // Number of flexible non-depth dimensions.
 };

◆ GraphemeNorm

enum tesseract::GraphemeNorm

strong

Enumerator
kNone
kNormalize

Definition at line 65 of file normstrngs.h.

◆ GraphemeNormMode

enum tesseract::GraphemeNormMode

strong

Enumerator
kSingleString
kCombined
kGlyphSplit
kIndividualUnicodes

Definition at line 48 of file validator.h.

                         : char32 {
   kNonVirama = 0,
   kDevanagari = 0x900,
   kBengali = 0x980,
   kGurmukhi = 0xa00,
   kGujarati = 0xa80,
   kOriya = 0xb00,
   kTamil = 0xb80,
   kTelugu = 0xc00,
   kKannada = 0xc80,

◆ kParamsTrainingFeatureType

enum tesseract::kParamsTrainingFeatureType

Enumerator
PTRAIN_DIGITS_SHORT
PTRAIN_DIGITS_MED
PTRAIN_DIGITS_LONG
PTRAIN_NUM_SHORT
PTRAIN_NUM_MED
PTRAIN_NUM_LONG
PTRAIN_DOC_SHORT
PTRAIN_DOC_MED
PTRAIN_DOC_LONG
PTRAIN_DICT_SHORT
PTRAIN_DICT_MED
PTRAIN_DICT_LONG
PTRAIN_FREQ_SHORT
PTRAIN_FREQ_MED
PTRAIN_FREQ_LONG
PTRAIN_SHAPE_COST_PER_CHAR
PTRAIN_NGRAM_COST_PER_CHAR
PTRAIN_NUM_BAD_PUNC
PTRAIN_NUM_BAD_CASE
PTRAIN_XHEIGHT_CONSISTENCY
PTRAIN_NUM_BAD_CHAR_TYPE
PTRAIN_NUM_BAD_SPACING
PTRAIN_NUM_BAD_FONT
PTRAIN_RATING_PER_CHAR
PTRAIN_NUM_FEATURE_TYPES

Definition at line 39 of file params_training_featdef.h.

                                 {
   // Digits
   PTRAIN_DIGITS_SHORT,             // 0
   PTRAIN_DIGITS_MED,               // 1
   PTRAIN_DIGITS_LONG,              // 2
   // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
   PTRAIN_NUM_SHORT,                // 3
   PTRAIN_NUM_MED,                  // 4
   PTRAIN_NUM_LONG,                 // 5
   // Document word (DOC_DAWG_PERM)
   PTRAIN_DOC_SHORT,                // 6
   PTRAIN_DOC_MED,                  // 7
   PTRAIN_DOC_LONG,                 // 8
   // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
   PTRAIN_DICT_SHORT,               // 9
   PTRAIN_DICT_MED,                 // 10
   PTRAIN_DICT_LONG,                // 11
   // Frequent word (FREQ_DAWG_PERM)
   PTRAIN_FREQ_SHORT,               // 12
   PTRAIN_FREQ_MED,                 // 13
   PTRAIN_FREQ_LONG,                // 14
   PTRAIN_SHAPE_COST_PER_CHAR,      // 15
   PTRAIN_NGRAM_COST_PER_CHAR,      // 16
   PTRAIN_NUM_BAD_PUNC,             // 17
   PTRAIN_NUM_BAD_CASE,             // 18
   PTRAIN_XHEIGHT_CONSISTENCY,      // 19
   PTRAIN_NUM_BAD_CHAR_TYPE,        // 20
   PTRAIN_NUM_BAD_SPACING,          // 21
   PTRAIN_NUM_BAD_FONT,             // 22
   PTRAIN_RATING_PER_CHAR,          // 23
  
   PTRAIN_NUM_FEATURE_TYPES
 };

◆ LeftOrRight

enum tesseract::LeftOrRight

Enumerator
LR_LEFT
LR_RIGHT

Definition at line 39 of file strokewidth.h.

                  {
   LR_LEFT,
   LR_RIGHT
 };

◆ LineType

enum tesseract::LineType

Enumerator
LT_START
LT_BODY
LT_UNKNOWN
LT_MULTIPLE

Definition at line 49 of file paragraphs_internal.h.

               {
   LT_START = 'S',     // First line of a paragraph.
   LT_BODY = 'C',      // Continuation line of a paragraph.
   LT_UNKNOWN = 'U',   // No clues.
   LT_MULTIPLE = 'M',  // Matches for both LT_START and LT_BODY.

◆ LMPainPointsType

enum tesseract::LMPainPointsType

Enumerator
LM_PPTYPE_BLAMER
LM_PPTYPE_AMBIG
LM_PPTYPE_PATH
LM_PPTYPE_SHAPE
LM_PPTYPE_NUM

Definition at line 40 of file lm_pain_points.h.

                       {
   LM_PPTYPE_BLAMER,
   LM_PPTYPE_AMBIG,
   LM_PPTYPE_PATH,
   LM_PPTYPE_SHAPE,
  
   LM_PPTYPE_NUM
 };

◆ LossType

enum tesseract::LossType

Enumerator
LT_NONE
LT_CTC
LT_SOFTMAX
LT_LOGISTIC

Definition at line 29 of file static_shape.h.

               {
   LT_NONE,      // Undefined.
   LT_CTC,       // Softmax with standard CTC for training/decoding.
   LT_SOFTMAX,   // Outputs sum to 1 in fixed positions.
   LT_LOGISTIC,  // Logistic outputs with independent values.
 };

◆ NeighbourPartitionType

enum tesseract::NeighbourPartitionType

Enumerator
NPT_HTEXT
NPT_VTEXT
NPT_WEAK_HTEXT
NPT_WEAK_VTEXT
NPT_IMAGE
NPT_COUNT

Definition at line 1501 of file colpartitiongrid.cpp.

                             {
   NPT_HTEXT,       // Definite horizontal text.
   NPT_VTEXT,       // Definite vertical text.
   NPT_WEAK_HTEXT,  // Weakly horizontal text. Counts as HTEXT for HTEXT, but
                    // image for image and VTEXT.
   NPT_WEAK_VTEXT,  // Weakly vertical text. Counts as VTEXT for VTEXT, but
                    // image for image and HTEXT.
   NPT_IMAGE,       // Defininte non-text.
   NPT_COUNT        // Number of array elements.
 };

◆ NetworkFlags

enum tesseract::NetworkFlags

Enumerator
NF_LAYER_SPECIFIC_LR
NF_ADAM

Definition at line 85 of file network.h.

                   {
   // Network forward/backprop behavior.
   NF_LAYER_SPECIFIC_LR = 64,  // Separate learning rate for each layer.
   NF_ADAM = 128,              // Weight-specific learning rate.
 };

◆ NetworkType

enum tesseract::NetworkType

Enumerator
NT_NONE
NT_INPUT
NT_CONVOLVE
NT_MAXPOOL
NT_PARALLEL
NT_REPLICATED
NT_PAR_RL_LSTM
NT_PAR_UD_LSTM
NT_PAR_2D_LSTM
NT_SERIES
NT_RECONFIG
NT_XREVERSED
NT_YREVERSED
NT_XYTRANSPOSE
NT_LSTM
NT_LSTM_SUMMARY
NT_LOGISTIC
NT_POSCLIP
NT_SYMCLIP
NT_TANH
NT_RELU
NT_LINEAR
NT_SOFTMAX
NT_SOFTMAX_NO_CTC
NT_LSTM_SOFTMAX
NT_LSTM_SOFTMAX_ENCODED
NT_TENSORFLOW
NT_COUNT

Definition at line 43 of file network.h.

                  {
   NT_NONE,   // The naked base class.
   NT_INPUT,  // Inputs from an image.
   // Plumbing networks combine other networks or rearrange the inputs.
   NT_CONVOLVE,     // Duplicates inputs in a sliding window neighborhood.
   NT_MAXPOOL,      // Chooses the max result from a rectangle.
   NT_PARALLEL,     // Runs networks in parallel.
   NT_REPLICATED,   // Runs identical networks in parallel.
   NT_PAR_RL_LSTM,  // Runs LTR and RTL LSTMs in parallel.
   NT_PAR_UD_LSTM,  // Runs Up and Down LSTMs in parallel.
   NT_PAR_2D_LSTM,  // Runs 4 LSTMs in parallel.
   NT_SERIES,       // Executes a sequence of layers.
   NT_RECONFIG,     // Scales the time/y size but makes the output deeper.
   NT_XREVERSED,    // Reverses the x direction of the inputs/outputs.
   NT_YREVERSED,    // Reverses the y-direction of the inputs/outputs.
   NT_XYTRANSPOSE,  // Transposes x and y (for just a single op).
   // Functional networks actually calculate stuff.
   NT_LSTM,            // Long-Short-Term-Memory block.
   NT_LSTM_SUMMARY,    // LSTM that only keeps its last output.
   NT_LOGISTIC,        // Fully connected logistic nonlinearity.
   NT_POSCLIP,         // Fully connected rect lin version of logistic.
   NT_SYMCLIP,         // Fully connected rect lin version of tanh.
   NT_TANH,            // Fully connected with tanh nonlinearity.
   NT_RELU,            // Fully connected with rectifier nonlinearity.
   NT_LINEAR,          // Fully connected with no nonlinearity.
   NT_SOFTMAX,         // Softmax uses exponential normalization, with CTC.
   NT_SOFTMAX_NO_CTC,  // Softmax uses exponential normalization, no CTC.
   // The SOFTMAX LSTMs both have an extra softmax layer on top, but inside, with
   // the outputs fed back to the input of the LSTM at the next timestep.
   // The ENCODED version binary encodes the softmax outputs, providing log2 of
   // the number of outputs as additional inputs, and the other version just
   // provides all the softmax outputs as additional inputs.
   NT_LSTM_SOFTMAX,          // 1-d LSTM with built-in fully connected softmax.
   NT_LSTM_SOFTMAX_ENCODED,  // 1-d LSTM with built-in binary encoded softmax.
   // A TensorFlow graph encapsulated as a Tesseract network.
   NT_TENSORFLOW,
  
   NT_COUNT  // Array size.
 };

◆ NodeContinuation

enum tesseract::NodeContinuation

Enumerator
NC_ANYTHING
NC_ONLY_DUP
NC_NO_DUP
NC_COUNT

Definition at line 73 of file recodebeam.h.

                       {
   NC_ANYTHING,  // This node used just its own score, so anything can follow.
   NC_ONLY_DUP,  // The current node combined another score with the score for
                 // itself, without a stand-alone duplicate before, so must be
                 // followed by a stand-alone duplicate.
   NC_NO_DUP,    // The current node combined another score with the score for
                 // itself, after a stand-alone, so can only be followed by
                 // something other than a duplicate of the current node.
   NC_COUNT
 };

◆ NormalizationMode

enum tesseract::NormalizationMode

Enumerator
NM_BASELINE
NM_CHAR_ISOTROPIC
NM_CHAR_ANISOTROPIC

Definition at line 41 of file normalis.h.

                        {
   NM_BASELINE = -3,         // The original BL normalization mode.
   NM_CHAR_ISOTROPIC = -2,   // Character normalization but isotropic.
   NM_CHAR_ANISOTROPIC = -1  // The original CN normalization mode.

◆ OcrEngineMode

enum tesseract::OcrEngineMode

When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.

ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.

Enumerator
OEM_TESSERACT_ONLY
OEM_LSTM_ONLY
OEM_TESSERACT_LSTM_COMBINED
OEM_DEFAULT
OEM_COUNT

Definition at line 265 of file publictypes.h.

                    {
   OEM_TESSERACT_ONLY,           // Run Tesseract only - fastest; deprecated
   OEM_LSTM_ONLY,                // Run just the LSTM line recognizer.
   OEM_TESSERACT_LSTM_COMBINED,  // Run the LSTM recognizer, but allow fallback
                                 // to Tesseract when things get difficult.
                                 // deprecated
   OEM_DEFAULT,                  // Specify this mode when calling init_*(),
                                 // to indicate that any of the above modes
                                 // should be automatically inferred from the
                                 // variables in the language-specific config,
                                 // command-line configs, or if not specified
                                 // in any of the above should be set to the
                                 // default OEM_TESSERACT_ONLY.
   OEM_COUNT                     // Number of OEMs
 };

◆ OCRNorm

enum tesseract::OCRNorm

strong

Enumerator
kNone
kNormalize

Definition at line 57 of file normstrngs.h.

◆ Orientation

enum tesseract::Orientation

+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto

If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.

In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).

The values of this enum match the convention of Tesseract's osdetect.h

Enumerator
ORIENTATION_PAGE_UP
ORIENTATION_PAGE_RIGHT
ORIENTATION_PAGE_DOWN
ORIENTATION_PAGE_LEFT

Definition at line 116 of file publictypes.h.

                  {
   ORIENTATION_PAGE_UP = 0,
   ORIENTATION_PAGE_RIGHT = 1,
   ORIENTATION_PAGE_DOWN = 2,
   ORIENTATION_PAGE_LEFT = 3,
 };

◆ PageIteratorLevel

enum tesseract::PageIteratorLevel

enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.

Enumerator
RIL_BLOCK
RIL_PARA
RIL_TEXTLINE
RIL_WORD
RIL_SYMBOL

Definition at line 216 of file publictypes.h.

                        {
   RIL_BLOCK,     // Block of text/image/separator line.
   RIL_PARA,      // Paragraph within a block.
   RIL_TEXTLINE,  // Line within a paragraph.
   RIL_WORD,      // Word within a textline.
   RIL_SYMBOL     // Symbol/character within a word.
 };

◆ PageSegMode

enum tesseract::PageSegMode

Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.

Enumerator
PSM_OSD_ONLY	Orientation and script detection only.
PSM_AUTO_OSD	Automatic page segmentation with orientation and script detection. (OSD)
PSM_AUTO_ONLY	Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO	Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN	Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT	Assume a single uniform block of vertically aligned text.
PSM_SINGLE_BLOCK	Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE	Treat the image as a single text line.
PSM_SINGLE_WORD	Treat the image as a single word.
PSM_CIRCLE_WORD	Treat the image as a single word in a circle.
PSM_SINGLE_CHAR	Treat the image as a single character.
PSM_SPARSE_TEXT	Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD	Sparse text with orientation and script det.
PSM_RAW_LINE	Treat the image as a single text line, bypassing hacks that are Tesseract-specific.
PSM_COUNT	Number of enum entries.

Definition at line 159 of file publictypes.h.

                  {
   PSM_OSD_ONLY = 0,       
   PSM_AUTO_OSD = 1,       
   PSM_AUTO_ONLY = 2,      
   PSM_AUTO = 3,           
   PSM_SINGLE_COLUMN = 4,  
   PSM_SINGLE_BLOCK_VERT_TEXT = 5,  
   PSM_SINGLE_BLOCK = 6,  
   PSM_SINGLE_LINE = 7,   
   PSM_SINGLE_WORD = 8,   
   PSM_CIRCLE_WORD = 9,   
   PSM_SINGLE_CHAR = 10,  
   PSM_SPARSE_TEXT =
       11,  
   PSM_SPARSE_TEXT_OSD = 12,  
   PSM_RAW_LINE = 13,  
  
   PSM_COUNT  
 };

◆ ParagraphJustification

enum tesseract::ParagraphJustification

JUSTIFICATION_UNKNOWN The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.

NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.

Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.

JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.

JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.

JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.

Enumerator
JUSTIFICATION_UNKNOWN
JUSTIFICATION_LEFT
JUSTIFICATION_CENTER
JUSTIFICATION_RIGHT

Definition at line 248 of file publictypes.h.

                             {
   JUSTIFICATION_UNKNOWN,
   JUSTIFICATION_LEFT,
   JUSTIFICATION_CENTER,
   JUSTIFICATION_RIGHT,
 };

◆ PartitionFindResult

enum tesseract::PartitionFindResult

Enumerator
PFR_OK
PFR_SKEW
PFR_NOISE

Definition at line 46 of file strokewidth.h.

                          {
   PFR_OK,    // Everything is OK.
   PFR_SKEW,  // Skew was detected and rotated.
   PFR_NOISE  // Noise was detected and removed.
 };

◆ ScriptPos

enum tesseract::ScriptPos

Enumerator
SP_NORMAL
SP_SUBSCRIPT
SP_SUPERSCRIPT
SP_DROPCAP

Definition at line 250 of file ratngs.h.

                {
   SP_NORMAL,
   SP_SUBSCRIPT,
   SP_SUPERSCRIPT,
   SP_DROPCAP

◆ SerializeAmount

enum tesseract::SerializeAmount

Enumerator
LIGHT
NO_BEST_TRAINER
FULL

Definition at line 56 of file lstmtrainer.h.

                      {
   LIGHT,            // Minimal data for remote training.
   NO_BEST_TRAINER,  // Save an empty vector in place of best_trainer_.
   FULL,             // All data including best_trainer_.
 };

◆ SetParamConstraint

enum tesseract::SetParamConstraint

Enumerator
SET_PARAM_CONSTRAINT_NONE
SET_PARAM_CONSTRAINT_DEBUG_ONLY
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY
SET_PARAM_CONSTRAINT_NON_INIT_ONLY

Definition at line 49 of file params.h.

                  {
  public:
   // Reads a file of parameter definitions and set/modify the values therein.
   // If the filename begins with a + or -, the BoolVariables will be
   // ORed or ANDed with any current values.

◆ SubTrainerResult

enum tesseract::SubTrainerResult

Enumerator
STR_NONE
STR_UPDATED
STR_REPLACED

Definition at line 63 of file lstmtrainer.h.

                       {
   STR_NONE,     // Did nothing as not good enough.
   STR_UPDATED,  // Subtrainer was updated, but didn't replace *this.
   STR_REPLACED  // Subtrainer replaced *this.
 };

◆ TabAlignment

enum tesseract::TabAlignment

Enumerator
TA_LEFT_ALIGNED
TA_LEFT_RAGGED
TA_CENTER_JUSTIFIED
TA_RIGHT_ALIGNED
TA_RIGHT_RAGGED
TA_SEPARATOR
TA_COUNT

Definition at line 44 of file tabvector.h.

                   {
   TA_LEFT_ALIGNED,
   TA_LEFT_RAGGED,
   TA_CENTER_JUSTIFIED,
   TA_RIGHT_ALIGNED,
   TA_RIGHT_RAGGED,
   TA_SEPARATOR,
   TA_COUNT
 };

◆ TessdataType

enum tesseract::TessdataType

Enumerator
TESSDATA_LANG_CONFIG
TESSDATA_UNICHARSET
TESSDATA_AMBIGS
TESSDATA_INTTEMP
TESSDATA_PFFMTABLE
TESSDATA_NORMPROTO
TESSDATA_PUNC_DAWG
TESSDATA_SYSTEM_DAWG
TESSDATA_NUMBER_DAWG
TESSDATA_FREQ_DAWG
TESSDATA_FIXED_LENGTH_DAWGS
TESSDATA_CUBE_UNICHARSET
TESSDATA_CUBE_SYSTEM_DAWG
TESSDATA_SHAPE_TABLE
TESSDATA_BIGRAM_DAWG
TESSDATA_UNAMBIG_DAWG
TESSDATA_PARAMS_MODEL
TESSDATA_LSTM
TESSDATA_LSTM_PUNC_DAWG
TESSDATA_LSTM_SYSTEM_DAWG
TESSDATA_LSTM_NUMBER_DAWG
TESSDATA_LSTM_UNICHARSET
TESSDATA_LSTM_RECODER
TESSDATA_VERSION
TESSDATA_NUM_ENTRIES

Definition at line 56 of file tessdatamanager.h.

                   {
   TESSDATA_LANG_CONFIG,         // 0
   TESSDATA_UNICHARSET,          // 1
   TESSDATA_AMBIGS,              // 2
   TESSDATA_INTTEMP,             // 3
   TESSDATA_PFFMTABLE,           // 4
   TESSDATA_NORMPROTO,           // 5
   TESSDATA_PUNC_DAWG,           // 6
   TESSDATA_SYSTEM_DAWG,         // 7
   TESSDATA_NUMBER_DAWG,         // 8
   TESSDATA_FREQ_DAWG,           // 9
   TESSDATA_FIXED_LENGTH_DAWGS,  // 10  // deprecated
   TESSDATA_CUBE_UNICHARSET,     // 11  // deprecated
   TESSDATA_CUBE_SYSTEM_DAWG,    // 12  // deprecated
   TESSDATA_SHAPE_TABLE,         // 13
   TESSDATA_BIGRAM_DAWG,         // 14
   TESSDATA_UNAMBIG_DAWG,        // 15
   TESSDATA_PARAMS_MODEL,        // 16
   TESSDATA_LSTM,                // 17
   TESSDATA_LSTM_PUNC_DAWG,      // 18
   TESSDATA_LSTM_SYSTEM_DAWG,    // 19
   TESSDATA_LSTM_NUMBER_DAWG,    // 20
   TESSDATA_LSTM_UNICHARSET,     // 21
   TESSDATA_LSTM_RECODER,        // 22
   TESSDATA_VERSION,             // 23
  
   TESSDATA_NUM_ENTRIES
 };

◆ TextlineOrder

enum tesseract::TextlineOrder

The text lines are read in the given sequence.

In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.

Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM

Enumerator
TEXTLINE_ORDER_LEFT_TO_RIGHT
TEXTLINE_ORDER_RIGHT_TO_LEFT
TEXTLINE_ORDER_TOP_TO_BOTTOM

Definition at line 148 of file publictypes.h.

                    {
   TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
   TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
   TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
 };

◆ TopNState

enum tesseract::TopNState

Enumerator
TN_TOP2
TN_TOPN
TN_ALSO_RAN
TN_COUNT

Definition at line 85 of file recodebeam.h.

                {
   TN_TOP2,      // Winner or 2nd.
   TN_TOPN,      // Runner up in top-n, but not 1st or 2nd.
   TN_ALSO_RAN,  // Not in the top-n.
   TN_COUNT
 };

◆ Trainability

enum tesseract::Trainability

Enumerator
TRAINABLE
PERFECT
UNENCODABLE
HI_PRECISION_ERR
NOT_BOXED

Definition at line 47 of file lstmtrainer.h.

                   {
   TRAINABLE,         // Non-zero delta error.
   PERFECT,           // Zero delta error.
   UNENCODABLE,       // Not trainable due to coding/alignment trouble.
   HI_PRECISION_ERR,  // Hi confidence disagreement.
   NOT_BOXED,         // Early in training and has no character boxes.
 };

◆ TrainingFlags

enum tesseract::TrainingFlags

Enumerator
TF_INT_MODE
TF_COMPRESS_UNICHARSET

Definition at line 46 of file lstmrecognizer.h.

                    {
   TF_INT_MODE = 1,
   TF_COMPRESS_UNICHARSET = 64,
 };

◆ TrainingState

enum tesseract::TrainingState

Enumerator
TS_DISABLED
TS_ENABLED
TS_TEMP_DISABLE
TS_RE_ENABLE

Definition at line 92 of file network.h.

                    {
   // Valid states of training_.
   TS_DISABLED,      // Disabled permanently.
   TS_ENABLED,       // Enabled for backprop and to write a training dump.
                     // Re-enable from ANY disabled state.
   TS_TEMP_DISABLE,  // Temporarily disabled to write a recognition dump.
   // Valid only for SetEnableTraining.
   TS_RE_ENABLE,  // Re-Enable from TS_TEMP_DISABLE, but not TS_DISABLED.
 };

◆ UnicodeNormMode

enum tesseract::UnicodeNormMode

strong

Enumerator
kNFD
kNFC
kNFKD
kNFKC

Definition at line 48 of file normstrngs.h.

                         {
   kNone,
   kNormalize,
 };
  

◆ ViramaScript

enum tesseract::ViramaScript : char32

strong

Enumerator
kNonVirama
kDevanagari
kBengali
kGurmukhi
kGujarati
kOriya
kTamil
kTelugu
kKannada
kMalayalam
kSinhala
kMyanmar
kKhmer
kJavanese

Definition at line 67 of file validator.h.

                 {
  public:
   // Validates and cleans the src vector of unicodes to the *dest, according to
   // g_mode. In the case of kSingleString, a single vector containing the whole
   // result is added to *dest. With kCombined, multiple vectors are added to
   // *dest with one grapheme in each. With kGlyphSplit, multiple vectors are
   // added to *dest with a smaller unit representing a glyph in each.
   // In case of validation error, returns false and as much as possible of the
   // input, without discarding invalid text.
   static bool ValidateCleanAndSegment(GraphemeNormMode g_mode,
                                       bool report_errors,
                                       const std::vector<char32>& src,

◆ WritingDirection

enum tesseract::WritingDirection

The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".

For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.

Enumerator
WRITING_DIRECTION_LEFT_TO_RIGHT
WRITING_DIRECTION_RIGHT_TO_LEFT
WRITING_DIRECTION_TOP_TO_BOTTOM

Definition at line 131 of file publictypes.h.

                       {
   WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
   WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
   WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
 };

◆ XHeightConsistencyEnum

enum tesseract::XHeightConsistencyEnum

Enumerator
XH_GOOD
XH_SUBNORMAL
XH_INCONSISTENT

Definition at line 78 of file dict.h.

78 {XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT};

Function Documentation

◆ AccumulateVector()

void tesseract::AccumulateVector	(	int	n,
		const double *	src,
		double *	dest
	)

inline

Definition at line 174 of file functions.h.

                                                                      {
   for (int i = 0; i < n; ++i) dest[i] += src[i];
 }

◆ AsciiLikelyListItem()

bool tesseract::AsciiLikelyListItem ( const STRING & word )

Definition at line 296 of file paragraphs.cpp.

                          {
  public:
   UnicodeSpanSkipper(const UNICHARSET *unicharset, const WERD_CHOICE *word)

◆ assign_blobs_to_blocks2()

void tesseract::assign_blobs_to_blocks2	(	Pix *	pix,
		BLOCK_LIST *	blocks,
		TO_BLOCK_LIST *	port_blocks
	)

Definition at line 165 of file tordmain.cpp.

                                                          {  // output list
   BLOCK *block;                  // current block
   BLOBNBOX *newblob;             // created blob
   C_BLOB *blob;                  // current blob
   BLOCK_IT block_it = blocks;
   C_BLOB_IT blob_it;             // iterator
   BLOBNBOX_IT port_box_it;       // iterator
                                  // destination iterator
   TO_BLOCK_IT port_block_it = port_blocks;
   TO_BLOCK *port_block;          // created block
  
   for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
     block = block_it.data();
     port_block = new TO_BLOCK(block);
  
     // Convert the good outlines to block->blob_list
     port_box_it.set_to_list(&port_block->blobs);
     blob_it.set_to_list(block->blob_list());
     for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
       blob = blob_it.extract();
       newblob = new BLOBNBOX(blob);  // Convert blob to BLOBNBOX.
       SetBlobStrokeWidth(pix, newblob);
       port_box_it.add_after_then_move(newblob);
     }
  
     // Put the rejected outlines in block->noise_blobs, which allows them to
     // be reconsidered and sorted back into rows and recover outlines mistakenly
     // rejected.
     port_box_it.set_to_list(&port_block->noise_blobs);
     blob_it.set_to_list(block->reject_blobs());
     for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
       blob = blob_it.extract();
       newblob = new BLOBNBOX(blob);  // Convert blob to BLOBNBOX.
       SetBlobStrokeWidth(pix, newblob);
       port_box_it.add_after_then_move(newblob);
     }
  

◆ BlobToTrainingSample()

TrainingSample * tesseract::BlobToTrainingSample	(	const TBLOB &	blob,
		bool	nonlinear_norm,
		INT_FX_RESULT_STRUCT *	fx_info,
		GenericVector< INT_FEATURE_STRUCT > *	bl_features
	)

Definition at line 75 of file intfx.cpp.

                                                     {
   GenericVector<INT_FEATURE_STRUCT> cn_features;
   Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
                             &cn_features, fx_info, nullptr);
   // TODO(rays) Use blob->PreciseBoundingBox() instead.
   TBOX box = blob.bounding_box();
   TrainingSample* sample = nullptr;
   int num_features = fx_info->NumCN;
   if (num_features > 0) {
     sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
                                               num_features);
   }
   if (sample != nullptr) {
     // Set the bounding box (in original image coordinates) in the sample.
     TPOINT topleft, botright;
     topleft.x = box.left();
     topleft.y = box.top();
     botright.x = box.right();
     botright.y = box.bottom();
     TPOINT original_topleft, original_botright;
     blob.denorm().DenormTransform(nullptr, topleft, &original_topleft);
     blob.denorm().DenormTransform(nullptr, botright, &original_botright);
     sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y,
                                   original_botright.x, original_topleft.y));
   }
   return sample;

◆ CanonicalizeDetectionResults()

void tesseract::CanonicalizeDetectionResults	(	GenericVector< PARA * > *	row_owners,
		PARA_LIST *	paragraphs
	)

Definition at line 2252 of file paragraphs.cpp.

                                                 {
       continue;
     }
     out.add_after_then_move(rows[i]);
   }
 }
  
 // Main entry point for Paragraph Detection Algorithm.
 //
 // Given a set of equally spaced textlines (described by row_infos),
 // Split them into paragraphs.
 //
 // Output:
 //   row_owners - one pointer for each row, to the paragraph it belongs to.
 //   paragraphs - this is the actual list of PARA objects.
 //   models - the list of paragraph models referenced by the PARA objects.
 //            caller is responsible for deleting the models.
 void DetectParagraphs(int debug_level,
                       GenericVector<RowInfo> *row_infos,
                       GenericVector<PARA *> *row_owners,
                       PARA_LIST *paragraphs,

◆ ClearFeatureSpaceWindow()

void tesseract::ClearFeatureSpaceWindow	(	NORM_METHOD	norm_method,
		ScrollView *	window
	)

Clears the given window and draws the featurespace guides for the appropriate normalization method.

Definition at line 987 of file intproto.cpp.

                                                                           {
   window->Clear();
  
   window->Pen(ScrollView::GREY);
   // Draw the feature space limit rectangle.
   window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y);
   if (norm_method == baseline) {
     window->SetCursor(0, INT_DESCENDER);
     window->DrawTo(INT_MAX_X, INT_DESCENDER);
     window->SetCursor(0, INT_BASELINE);
     window->DrawTo(INT_MAX_X, INT_BASELINE);
     window->SetCursor(0, INT_XHEIGHT);
     window->DrawTo(INT_MAX_X, INT_XHEIGHT);
     window->SetCursor(0, INT_CAPHEIGHT);
     window->DrawTo(INT_MAX_X, INT_CAPHEIGHT);
   } else {
     window->Rectangle(INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS,
                       INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS);
   }

◆ ClipVector()

template<typename T >

void tesseract::ClipVector	(	int	n,
		T	lower,
		T	upper,
		T *	vec
	)

inline

Definition at line 208 of file functions.h.

                                                         {
   for (int i = 0; i < n; ++i) vec[i] = ClipToRange(vec[i], lower, upper);
 }

◆ cmp_eq()

template<typename T >

bool tesseract::cmp_eq	(	T const &	t1,
		T const &	t2
	)

Definition at line 375 of file genericvector.h.

                                       {
   return t1 == t2;
 }

◆ CodeInBinary()

void tesseract::CodeInBinary	(	int	n,
		int	nf,
		double *	vec
	)

inline

Definition at line 214 of file functions.h.

                                                      {
   if (nf <= 0 || n < nf) return;
   int index = 0;
   double best_score = vec[0];
   for (int i = 1; i < n; ++i) {
     if (vec[i] > best_score) {
       best_score = vec[i];
       index = i;
     }
   }
   int mask = 1;
   for (int i = 0; i < nf; ++i, mask *= 2) {
     vec[i] = (index & mask) ? 1.0 : 0.0;
   }
 }

◆ CodepointList()

std::string tesseract::CodepointList ( const std::vector< char32 > & str32 )

inline

Definition at line 24 of file normstrngs_test.h.

                                                                {
   std::stringstream result;
   int total_chars = str32.size();
   result << std::hex;
   for (int i = 0; i < total_chars; ++i) {
     result << "[" << str32[i] << "]";
   }
   return result.str();
 }

◆ CombineLangModel()

int tesseract::CombineLangModel	(	const UNICHARSET &	unicharset,
		const std::string &	script_dir,
		const std::string &	version_str,
		const std::string &	output_dir,
		const std::string &	lang,
		bool	pass_through_recoder,
		const GenericVector< STRING > &	words,
		const GenericVector< STRING > &	puncs,
		const GenericVector< STRING > &	numbers,
		bool	lang_is_rtl,
		FileReader	reader,
		FileWriter	writer
	)

Definition at line 185 of file lang_model_helpers.cpp.

                                                            {
   // Build the traineddata file.
   TessdataManager traineddata;
   if (!version_str.empty()) {
     traineddata.SetVersionString(traineddata.VersionString() + ":" +
                                  version_str);
   }
   // Unicharset and recoder.
   if (!WriteUnicharset(unicharset, output_dir, lang, writer, &traineddata)) {
     tprintf("Error writing unicharset!!\n");
     return EXIT_FAILURE;
   } else {
     tprintf("Config file is optional, continuing...\n");
   }
   // If there is a config file, read it and add to traineddata.
   std::string config_filename = script_dir + "/" + lang + "/" + lang + ".config";
   STRING config_file = ReadFile(config_filename, reader);
   if (config_file.length() > 0) {
     traineddata.OverwriteEntry(TESSDATA_LANG_CONFIG, &config_file[0],
                                config_file.length());
   }
   std::string radical_filename = script_dir + "/radical-stroke.txt";
   STRING radical_data = ReadFile(radical_filename, reader);
   if (radical_data.length() == 0) {
     tprintf("Error reading radical code table %s\n", radical_filename.c_str());
     return EXIT_FAILURE;
   }
   if (!WriteRecoder(unicharset, pass_through_recoder, output_dir, lang, writer,
                     &radical_data, &traineddata)) {
     tprintf("Error writing recoder!!\n");
   }
   if (!words.empty() || !puncs.empty() || !numbers.empty()) {
     if (!WriteDawgs(words, puncs, numbers, lang_is_rtl, unicharset,
                     &traineddata)) {
       tprintf("Error during conversion of wordlists to DAWGs!!\n");
       return EXIT_FAILURE;
     }
   }
  
   // Traineddata file.
   GenericVector<char> traineddata_data;
   traineddata.Serialize(&traineddata_data);
   if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
     tprintf("Error writing output traineddata file!!\n");
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }

◆ CompareFontInfo()

bool tesseract::CompareFontInfo	(	const FontInfo &	fi1,
		const FontInfo &	fi2
	)

Definition at line 122 of file fontinfo.cpp.

                                                                {
   // The font properties are required to be the same for two font with the same
   // name, so there is no need to test them.
   // Consequently, querying the table with only its font name as information is
   // enough to retrieve its properties.
   return strcmp(fi1.name, fi2.name) == 0;
 }

◆ CompareFontSet()

bool tesseract::CompareFontSet	(	const FontSet &	fs1,
		const FontSet &	fs2
	)

Definition at line 130 of file fontinfo.cpp.

                                                             {
   if (fs1.size != fs2.size)
     return false;
   for (int i = 0; i < fs1.size; ++i) {
     if (fs1.configs[i] != fs2.configs[i])
       return false;
   }
   return true;
 }

◆ CopyVector()

void tesseract::CopyVector	(	int	n,
		const double *	src,
		double *	dest
	)

inline

Definition at line 169 of file functions.h.

                                                                {
   memcpy(dest, src, n * sizeof(dest[0]));
 }

◆ countof()

template<typename T , size_t N>

constexpr size_t tesseract::countof ( T const(&)[N] )

constexprnoexcept

Definition at line 41 of file serialis.h.

43 {

◆ CrownCompatible()

bool tesseract::CrownCompatible	(	const GenericVector< RowScratchRegisters > *	rows,
		int	a,
		int	b,
		const ParagraphModel *	model
	)

Definition at line 1314 of file paragraphs.cpp.

         : theory_(theory), rows_(rows), row_start_(row_start),
           row_end_(row_end) {
   if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
     row_start_ = 0;
     row_end_ = 0;
     return;
   }
   SetOfModels no_models;

◆ DegradeImage()

struct Pix * tesseract::DegradeImage	(	Pix *	input,
		int	exposure,
		TRand *	randomizer,
		float *	rotation
	)

Definition at line 108 of file degradeimage.cpp.

                            {
     float radians_clockwise = 0.0f;
     if (*rotation) {
       radians_clockwise = *rotation;
     } else if (randomizer != nullptr) {
       radians_clockwise = randomizer->SignedRand(kRotationRange);
     }
  
     input = pixRotate(pix, radians_clockwise,
                       L_ROTATE_AREA_MAP, L_BRING_IN_WHITE,
                       0, 0);
     // Rotate the boxes to match.
     *rotation = radians_clockwise;
     pixDestroy(&pix);
   } else {
     input = pix;
   }
  
   if (exposure >= 3 || exposure == 1) {
     // Erosion after the convolution is not as heavy as before, so it is
     // good for level 1 and in addition as a level 3.
     // This is backwards to binary morphology,
     // see http://www.leptonica.com/grayscale-morphology.html
     pix = input;
     input = pixErodeGray(pix, 3, 3);
     pixDestroy(&pix);
   }
   // The convolution really needed to be 2x2 to be realistic enough, but
   // we only have 3x3, so we have to bias the image darker or lose thin
   // strokes.
   int erosion_offset = 0;
   // For light and 0 exposure, there is no dilation, so compensate for the
   // convolution with a big darkening bias which is undone for lighter
   // exposures.
   if (exposure <= 0)
     erosion_offset = -3 * kExposureFactor;
   // Add in a general offset of the greyscales for the exposure level so
   // a threshold of 128 gives a reasonable binary result.
   erosion_offset -= exposure * kExposureFactor;
   // Add a gradual fade over the page and a small amount of salt and pepper
   // noise to simulate noise in the sensor/paper fibres and varying
   // illumination.
   l_uint32* data = pixGetData(input);
   for (int y = 0; y < height; ++y) {
     for (int x = 0; x < width; ++x) {
       int pixel = GET_DATA_BYTE(data, x);
       if (randomizer != nullptr)
         pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;
       if (height + width > kMinRampSize)
         pixel -= (2*x + y) * 32 / (height + width);
       pixel += erosion_offset;
       if (pixel < 0)
         pixel = 0;
       if (pixel > 255)
         pixel = 255;
       SET_DATA_BYTE(data, x, pixel);
     }
     data += input->wpl;
   }
   return input;
 }
  
 // Creates and returns a Pix distorted by various means according to the bool
 // flags. If boxes is not nullptr, the boxes are resized/positioned according to
 // any spatial distortion and also by the integer reduction factor box_scale
 // so they will match what the network will output.
 // Returns nullptr on error. The returned Pix must be pixDestroyed.
 Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
                          bool white_noise, bool smooth_noise, bool blur,
                          int box_reduction, TRand* randomizer,
                          GenericVector<TBOX>* boxes) {
   Pix* distorted = pixCopy(nullptr, const_cast<Pix*>(pix));
   // Things to do to synthetic training data.
   if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
     // TODO(rays) Cook noise in a more thread-safe manner than rand().
     // Attempt to make the sequences reproducible.
     srand(randomizer->IntRand());
     Pix* pixn = pixAddGaussianNoise(distorted, 8.0);

◆ DeleteObject()

template<typename T >

void tesseract::DeleteObject ( T * object )

Definition at line 155 of file tablefind.cpp.

                                                    {
   delete object;
 }

◆ DeSerialize() [1/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		char *	data,
		size_t	n = `1`
	)

Definition at line 41 of file serialis.cpp.

43 {

◆ DeSerialize() [2/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		float *	data,
		size_t	n = `1`
	)

Definition at line 45 of file serialis.cpp.

47 {

◆ DeSerialize() [3/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		int16_t *	data,
		size_t	n = `1`
	)

Definition at line 53 of file serialis.cpp.

55 {

◆ DeSerialize() [4/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		int32_t *	data,
		size_t	n = `1`
	)

Definition at line 57 of file serialis.cpp.

59 {

◆ DeSerialize() [5/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		int8_t *	data,
		size_t	n = `1`
	)

Definition at line 49 of file serialis.cpp.

51 {

◆ DeSerialize() [6/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		uint16_t *	data,
		size_t	n = `1`
	)

Definition at line 65 of file serialis.cpp.

67 {

◆ DeSerialize() [7/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		uint32_t *	data,
		size_t	n = `1`
	)

Definition at line 69 of file serialis.cpp.

71 {

◆ DeSerialize() [8/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		uint8_t *	data,
		size_t	n = `1`
	)

Definition at line 61 of file serialis.cpp.

63 {

◆ DetectParagraphs() [1/2]

void tesseract::DetectParagraphs	(	int	debug_level,
		bool	after_text_recognition,
		const MutableIterator *	block_start,
		GenericVector< ParagraphModel * > *	models
	)

Definition at line 2527 of file paragraphs.cpp.

      {
     if (!row.PageResIt()->row())
       continue;  // empty row.
     row.PageResIt()->row()->row->set_para(nullptr);
     row_infos.push_back(RowInfo());
     RowInfo &ri = row_infos.back();
     InitializeRowInfo(after_text_recognition, row, &ri);
   } while (!row.IsAtFinalElement(RIL_BLOCK, RIL_TEXTLINE) &&
            row.Next(RIL_TEXTLINE));
  
   // If we're called before text recognition, we might not have
   // tight block bounding boxes, so trim by the minimum on each side.
   if (!row_infos.empty()) {
     int min_lmargin = row_infos[0].pix_ldistance;
     int min_rmargin = row_infos[0].pix_rdistance;
     for (int i = 1; i < row_infos.size(); i++) {
       if (row_infos[i].pix_ldistance < min_lmargin)
         min_lmargin = row_infos[i].pix_ldistance;
       if (row_infos[i].pix_rdistance < min_rmargin)
         min_rmargin = row_infos[i].pix_rdistance;
     }
     if (min_lmargin > 0 || min_rmargin > 0) {
       for (int i = 0; i < row_infos.size(); i++) {
         row_infos[i].pix_ldistance -= min_lmargin;
         row_infos[i].pix_rdistance -= min_rmargin;
       }
     }
   }
  
   // Run the paragraph detection algorithm.
   GenericVector<PARA *> row_owners;
   GenericVector<PARA *> the_paragraphs;
   if (!is_image_block) {
     DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(),
                      models);
   } else {
     row_owners.init_to_size(row_infos.size(), nullptr);
     CanonicalizeDetectionResults(&row_owners, block->para_list());
   }
  
   // Now stitch in the row_owners into the rows.
   row = *block_start;
   for (int i = 0; i < row_owners.size(); i++) {
     while (!row.PageResIt()->row())
       row.Next(RIL_TEXTLINE);
     row.PageResIt()->row()->row->set_para(row_owners[i]);
     row.Next(RIL_TEXTLINE);
   }
 }
  
 }  // namespace

◆ DetectParagraphs() [2/2]

void tesseract::DetectParagraphs	(	int	debug_level,
		GenericVector< RowInfo > *	row_infos,
		GenericVector< PARA * > *	row_owners,
		PARA_LIST *	paragraphs,
		GenericVector< ParagraphModel * > *	models
	)

Definition at line 2284 of file paragraphs.cpp.

                                              {
     // Pass 2a:
     //   Find any strongly evidenced start-of-paragraph lines.  If they're
     //   followed by two lines that look like body lines, make a paragraph
     //   model for that and see if that model applies throughout the text
     //   (that is, "smear" it).
     StrongEvidenceClassify(debug_level, &rows,
                            leftovers[i].begin, leftovers[i].end, &theory);
  
     // Pass 2b:
     //   If we had any luck in pass 2a, we got part of the page and didn't
     //   know how to classify a few runs of rows. Take the segments that
     //   didn't find a model and reprocess them individually.
     GenericVector<Interval> leftovers2;
     LeftoverSegments(rows, &leftovers2, leftovers[i].begin, leftovers[i].end);
     bool pass2a_was_useful = leftovers2.size() > 1 ||
         (leftovers2.size() == 1 &&
          (leftovers2[0].begin != 0 || leftovers2[0].end != rows.size()));
     if (pass2a_was_useful) {
       for (int j = 0; j < leftovers2.size(); j++) {
         StrongEvidenceClassify(debug_level, &rows,
                                leftovers2[j].begin, leftovers2[j].end,
                                &theory);
       }
     }
   }
  
   DebugDump(debug_level > 1, "End of Pass 2", theory, rows);
  
   // Pass 3:
   //   These are the dregs for which we didn't have enough strong textual
   //   and geometric clues to form matching models for.  Let's see if
   //   the geometric clues are simple enough that we could just use those.
   LeftoverSegments(rows, &leftovers, 0, rows.size());
   for (int i = 0; i < leftovers.size(); i++) {
     GeometricClassify(debug_level, &rows,
                       leftovers[i].begin, leftovers[i].end, &theory);
   }
  
   // Undo any flush models for which there's little evidence.
   DowngradeWeakestToCrowns(debug_level, &theory, &rows);
  
   DebugDump(debug_level > 1, "End of Pass 3", theory, rows);
  
   // Pass 4:
   //   Take everything that's still not marked up well and clear all markings.
   LeftoverSegments(rows, &leftovers, 0, rows.size());
   for (int i = 0; i < leftovers.size(); i++) {
     for (int j = leftovers[i].begin; j < leftovers[i].end; j++) {
       rows[j].SetUnknown();
     }
   }
  
   DebugDump(debug_level > 1, "End of Pass 4", theory, rows);
  
   // Convert all of the unique hypothesis runs to PARAs.
   ConvertHypothesizedModelRunsToParagraphs(debug_level, rows, row_owners,
                                            &theory);
  
   DebugDump(debug_level > 0, "Final Paragraph Segmentation", theory, rows);
  
   // Finally, clean up any dangling nullptr row paragraph parents.
   CanonicalizeDetectionResults(row_owners, paragraphs);
 }
  
 // ============ Code interfacing with the rest of Tesseract ==================
  
 static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it,
                                                  RowInfo *info) {
   // Set up text, lword_text, and rword_text (mostly for debug printing).
   STRING fake_text;
   PageIterator pit(static_cast<const PageIterator&>(it));
   bool first_word = true;
   if (!pit.Empty(RIL_WORD)) {
     do {
       fake_text += "x";
       if (first_word) info->lword_text += "x";
       info->rword_text += "x";
       if (pit.IsAtFinalElement(RIL_WORD, RIL_SYMBOL) &&

◆ DotProductAVX()

double tesseract::DotProductAVX	(	const double *	u,
		const double *	v,
		int	n
	)

Definition at line 30 of file dotproductavx.cpp.

                                                               {
   const unsigned quot = n / 8;
   const unsigned rem = n % 8;
   __m256d t0 = _mm256_setzero_pd();
   __m256d t1 = _mm256_setzero_pd();
   for (unsigned k = 0; k < quot; k++) {
     __m256d f0 = _mm256_loadu_pd(u);
     __m256d f1 = _mm256_loadu_pd(v);
     f0 = _mm256_mul_pd(f0, f1);
     t0 = _mm256_add_pd(t0, f0);
     u += 4;
     v += 4;
     __m256d f2 = _mm256_loadu_pd(u);
     __m256d f3 = _mm256_loadu_pd(v);
     f2 = _mm256_mul_pd(f2, f3);
     t1 = _mm256_add_pd(t1, f2);
     u += 4;
     v += 4;
   }
   t0 = _mm256_hadd_pd(t0, t1);
   alignas(32) double tmp[4];
   _mm256_store_pd(tmp, t0);
   double result = tmp[0] + tmp[1] + tmp[2] + tmp[3];
   for (unsigned k = 0; k < rem; k++) {
     result += *u++ * *v++;
   }
   return result;
 }

◆ DotProductFMA()

double tesseract::DotProductFMA	(	const double *	u,
		const double *	v,
		int	n
	)

Definition at line 30 of file dotproductfma.cpp.

                                                               {
   const unsigned quot = n / 8;
   const unsigned rem = n % 8;
   __m256d t0 = _mm256_setzero_pd();
   __m256d t1 = _mm256_setzero_pd();
   for (unsigned k = 0; k < quot; k++) {
     __m256d f0 = _mm256_loadu_pd(u);
     __m256d f1 = _mm256_loadu_pd(v);
     t0 = _mm256_fmadd_pd(f0, f1, t0);
     u += 4;
     v += 4;
     __m256d f2 = _mm256_loadu_pd(u);
     __m256d f3 = _mm256_loadu_pd(v);
     t1 = _mm256_fmadd_pd(f2, f3, t1);
     u += 4;
     v += 4;
   }
   t0 = _mm256_hadd_pd(t0, t1);
   alignas(32) double tmp[4];
   _mm256_store_pd(tmp, t0);
   double result = tmp[0] + tmp[1] + tmp[2] + tmp[3];
   for (unsigned k = 0; k < rem; k++) {
     result += *u++ * *v++;
   }
   return result;
 }

◆ DotProductNative()

double tesseract::DotProductNative	(	const double *	u,
		const double *	v,
		int	n
	)

Definition at line 22 of file dotproduct.cpp.

                                                                  {
   double total = 0.0;
   for (int k = 0; k < n; ++k) total += u[k] * v[k];
   return total;
 }

◆ DotProductSSE()

double tesseract::DotProductSSE	(	const double *	u,
		const double *	v,
		int	n
	)

Definition at line 31 of file dotproductsse.cpp.

                                                               {
   int max_offset = n - 2;
   int offset = 0;
   // Accumulate a set of 2 sums in sum, by loading pairs of 2 values from u and
   // v, and multiplying them together in parallel.
   __m128d sum = _mm_setzero_pd();
   if (offset <= max_offset) {
     offset = 2;
     // Aligned load is reputedly faster but requires 16 byte aligned input.
     if ((reinterpret_cast<uintptr_t>(u) & 15) == 0 &&
         (reinterpret_cast<uintptr_t>(v) & 15) == 0) {
       // Use aligned load.
       sum = _mm_load_pd(u);
       __m128d floats2 = _mm_load_pd(v);
       // Multiply.
       sum = _mm_mul_pd(sum, floats2);
       while (offset <= max_offset) {
         __m128d floats1 = _mm_load_pd(u + offset);
         floats2 = _mm_load_pd(v + offset);
         offset += 2;
         floats1 = _mm_mul_pd(floats1, floats2);
         sum = _mm_add_pd(sum, floats1);
       }
     } else {
       // Use unaligned load.
       sum = _mm_loadu_pd(u);
       __m128d floats2 = _mm_loadu_pd(v);
       // Multiply.
       sum = _mm_mul_pd(sum, floats2);
       while (offset <= max_offset) {
         __m128d floats1 = _mm_loadu_pd(u + offset);
         floats2 = _mm_loadu_pd(v + offset);
         offset += 2;
         floats1 = _mm_mul_pd(floats1, floats2);
         sum = _mm_add_pd(sum, floats1);
       }
     }
   }
   // Add the 2 sums in sum horizontally.
   sum = _mm_hadd_pd(sum, sum);
   // Extract the low result.
   double result = _mm_cvtsd_f64(sum);
   // Add on any left-over products.
   while (offset < n) {
     result += u[offset] * v[offset];
     ++offset;
   }
   return result;
 }

◆ ExpectGraphemeModeResults()

void tesseract::ExpectGraphemeModeResults	(	const std::string &	str,
		UnicodeNormMode	u_mode,
		int	unicode_count,
		int	glyph_count,
		int	grapheme_count,
		const std::string &	target_str
	)

inline

Definition at line 48 of file normstrngs_test.h.

                                                                    {
   std::vector<std::string> glyphs;
   EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(
       u_mode, OCRNorm::kNone, GraphemeNormMode::kIndividualUnicodes, true,
       str.c_str(), &glyphs));
   EXPECT_EQ(glyphs.size(), unicode_count)
       << PrintStringVectorWithUnicodes(glyphs);
   EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
   EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone,
                                            GraphemeNormMode::kGlyphSplit, true,
                                            str.c_str(), &glyphs));
   EXPECT_EQ(glyphs.size(), glyph_count)
       << PrintStringVectorWithUnicodes(glyphs);
   EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
   EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone,
                                            GraphemeNormMode::kCombined, true,
                                            str.c_str(), &glyphs));
   EXPECT_EQ(glyphs.size(), grapheme_count)
       << PrintStringVectorWithUnicodes(glyphs);
   EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
   EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone,
                                            GraphemeNormMode::kSingleString,
                                            true, str.c_str(), &glyphs));
   EXPECT_EQ(glyphs.size(), 1) << PrintStringVectorWithUnicodes(glyphs);
   EXPECT_EQ(target_str, glyphs[0]);
   std::string result;
   EXPECT_TRUE(NormalizeUTF8String(
       u_mode, OCRNorm::kNone, GraphemeNorm::kNormalize, str.c_str(), &result));
   EXPECT_EQ(target_str, result);
 }

◆ ExtractFontName()

void tesseract::ExtractFontName	(	const STRING &	filename,
		STRING *	fontname
	)

Public Code

Definition at line 45 of file blobclass.cpp.

                                                                {
   *fontname = classify_font_name;
   if (*fontname == kUnknownFontName) {
     // filename is expected to be of the form [lang].[fontname].exp[num]
     // The [lang], [fontname] and [num] fields should not have '.' characters.
     const char *basename = strrchr(filename.c_str(), '/');
     const char *firstdot = strchr(basename ? basename : filename.c_str(), '.');
     const char *lastdot  = strrchr(filename.c_str(), '.');
     if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
       ++firstdot;
       *fontname = firstdot;
       fontname->truncate_at(lastdot - firstdot);
     }
   }
 }

◆ FirstWordWouldHaveFit() [1/2]

bool tesseract::FirstWordWouldHaveFit	(	const RowScratchRegisters &	before,
		const RowScratchRegisters &	after
	)

Definition at line 1671 of file paragraphs.cpp.

                                                                 {
   if (before.ri_->ltr) {
     return before.ri_->rword_likely_ends_idea &&
            after.ri_->lword_likely_starts_idea;
   } else {
     return before.ri_->lword_likely_ends_idea &&
            after.ri_->rword_likely_starts_idea;
   }
 }
  
 static bool LikelyParagraphStart(const RowScratchRegisters &before,
                                  const RowScratchRegisters &after,
                                  tesseract::ParagraphJustification j) {

◆ FirstWordWouldHaveFit() [2/2]

bool tesseract::FirstWordWouldHaveFit	(	const RowScratchRegisters &	before,
		const RowScratchRegisters &	after,
		tesseract::ParagraphJustification	justification
	)

Definition at line 1646 of file paragraphs.cpp.

                                                              {
   if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
     return true;
  
   int available_space = before.lindent_;
   if (before.rindent_ > available_space)
     available_space = before.rindent_;
   available_space -= before.ri_->average_interword_space;
  
   if (before.ri_->ltr)

◆ FontInfoDeleteCallback()

void tesseract::FontInfoDeleteCallback ( FontInfo f )

Definition at line 141 of file fontinfo.cpp.

                                         {
   if (f.spacing_vec != nullptr) {
     f.spacing_vec->delete_data_pointers();
     delete f.spacing_vec;
     f.spacing_vec = nullptr;
   }
   delete[] f.name;
   f.name = nullptr;
 }

◆ FontSetDeleteCallback()

void tesseract::FontSetDeleteCallback ( FontSet fs )

Definition at line 150 of file fontinfo.cpp.

                                        {
   delete[] fs.configs;
 }

◆ FullwidthToHalfwidth()

char32 tesseract::FullwidthToHalfwidth ( const char32 ch )

Definition at line 298 of file normstrngs.cpp.

◆ FuncInplace()

template<class Func >

void tesseract::FuncInplace	(	int	n,
		double *	inout
	)

inline

Definition at line 129 of file functions.h.

                                               {
   Func f;
   for (int i = 0; i < n; ++i) {
     inout[i] = f(inout[i]);
   }
 }

◆ FuncMultiply()

template<class Func >

void tesseract::FuncMultiply	(	const double *	u,
		const double *	v,
		int	n,
		double *	out
	)

inline

Definition at line 138 of file functions.h.

                                                                                {
   Func f;
   for (int i = 0; i < n; ++i) {
     out[i] = f(u[i]) * v[i];
   }
 }

◆ GeneratePerspectiveDistortion()

void tesseract::GeneratePerspectiveDistortion	(	int	width,
		int	height,
		TRand *	randomizer,
		Pix **	pix,
		GenericVector< TBOX > *	boxes
	)

Definition at line 237 of file degradeimage.cpp.

                         {
     // Transform the boxes.
     for (int b = 0; b < boxes->size(); ++b) {
       int x1, y1, x2, y2;
       const TBOX& box = (*boxes)[b];
       projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,
                                &y1);
       projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),
                                &x2, &y2);
       TBOX new_box1(x1, height - y2, x2, height - y1);
       projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),
                                &x1, &y1);
       projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,
                                &y2);
       TBOX new_box2(x1, height - y1, x2, height - y2);
       (*boxes)[b] = new_box1.bounding_union(new_box2);
     }
   }
   free(im_coeffs);
   free(box_coeffs);
 }
  
 // Computes the coefficients of a randomized projective transformation.
 // The image transform requires backward transformation coefficient, and the
 // box transform the forward coefficients.
 // Returns the incolor arg to pixProjective.
 int ProjectiveCoeffs(int width, int height, TRand* randomizer,
                      float** im_coeffs, float** box_coeffs) {
   // Setup "from" points.
   Pta* src_pts = ptaCreate(4);
   ptaAddPt(src_pts, 0.0f, 0.0f);
   ptaAddPt(src_pts, width, 0.0f);
   ptaAddPt(src_pts, width, height);
   ptaAddPt(src_pts, 0.0f, height);
   // Extract factors from pseudo-random sequence.
   float factors[FN_NUM_FACTORS];
   float shear = 0.0f;  // Shear is signed.
   for (int i = 0; i < FN_NUM_FACTORS; ++i) {

◆ GetXheightString()

std::string tesseract::GetXheightString	(	const std::string &	script_dir,
		const UNICHARSET &	unicharset
	)

Definition at line 164 of file unicharset_training_utils.cpp.

                                                       {
   std::string xheights_str;
   for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
     // Load the xheights for the script if available.
     std::string filename = script_dir + "/" +
                       unicharset.get_script_from_script_id(s) + ".xheights";
     std::string script_heights;
     if (File::ReadFileToString(filename, &script_heights))
       xheights_str += script_heights;
   }
   return xheights_str;
 }

◆ HistogramRect()

void tesseract::HistogramRect	(	Pix *	src_pix,
		int	channel,
		int	left,
		int	top,
		int	width,
		int	height,
		int *	histogram
	)

Definition at line 166 of file otsuthr.cpp.

                                                                  {
   int H = 0;
   double mu_T = 0.0;
   for (int i = 0; i < kHistogramSize; ++i) {
     H += histogram[i];
     mu_T += static_cast<double>(i) * histogram[i];
   }
  
   // Now maximize sig_sq_B over t.
   // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
   int best_t = -1;
   int omega_0, omega_1;

◆ HOcrEscape()

STRING tesseract::HOcrEscape ( const char * text )

Escape a char string - remove &<>"' with HTML codes.

Escape a char string - remove <>&"' with HTML codes.

Definition at line 2307 of file baseapi.cpp.

                                     {
   STRING ret;
   const char *ptr;
   for (ptr = text; *ptr; ptr++) {
     switch (*ptr) {
       case '<': ret += "&lt;"; break;
       case '>': ret += "&gt;"; break;
       case '&': ret += "&amp;"; break;
       case '"': ret += "&quot;"; break;
       case '\'': ret += "&#39;"; break;
       default: ret += *ptr;
     }
   }
   return ret;
 }

◆ InterwordSpace()

int tesseract::InterwordSpace	(	const GenericVector< RowScratchRegisters > &	rows,
		int	row_start,
		int	row_end
	)

Definition at line 1623 of file paragraphs.cpp.

                : minimum_reasonable_space;
 }
  
 // Return whether the first word on the after line can fit in the space at
 // the end of the before line (knowing which way the text is aligned and read).
 bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
                            const RowScratchRegisters &after,
                            tesseract::ParagraphJustification justification) {
   if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
     return true;
  
   if (justification == JUSTIFICATION_UNKNOWN) {
     tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n");
   }
   int available_space;
   if (justification == JUSTIFICATION_CENTER) {
     available_space = before.lindent_ + before.rindent_;

◆ IsInterchangeValid()

bool tesseract::IsInterchangeValid ( const char32 ch )

Definition at line 269 of file normstrngs.cpp.

                                                   {
   return IsValidCodepoint(ch) && ch <= 128 &&
          (!u_isISOControl(static_cast<UChar32>(ch)) || ch == '\n' ||
           ch == '\f' || ch == '\t' || ch == '\r');
 }
  
 char32 FullwidthToHalfwidth(const char32 ch) {
   // Return unchanged if not in the fullwidth-halfwidth Unicode block.
   if (ch < 0xFF00 || ch > 0xFFEF || !IsValidCodepoint(ch)) {
     if (ch != 0x3000) return ch;
   }
   // Special case for fullwidth left and right "white parentheses".
   if (ch == 0xFF5F) return 0x2985;
   if (ch == 0xFF60) return 0x2986;
   // Construct a full-to-half width transliterator.

◆ IsInterchangeValid7BitAscii()

bool tesseract::IsInterchangeValid7BitAscii ( const char32 ch )

Definition at line 292 of file normstrngs.cpp.

◆ IsLeftIndented()

bool tesseract::IsLeftIndented ( const EquationDetect::IndentType type )

inline

Definition at line 92 of file equationdetect.cpp.

                                                                 {
   return type == EquationDetect::LEFT_INDENT ||
       type == EquationDetect::BOTH_INDENT;
 }

◆ IsOCREquivalent()

bool tesseract::IsOCREquivalent	(	char32	ch1,
		char32	ch2
	)

Definition at line 230 of file normstrngs.cpp.

233 {

◆ IsRightIndented()

bool tesseract::IsRightIndented ( const EquationDetect::IndentType type )

inline

Definition at line 97 of file equationdetect.cpp.

                                                                  {
   return type == EquationDetect::RIGHT_INDENT ||
       type == EquationDetect::BOTH_INDENT;
 }

◆ IsTextOrEquationType()

bool tesseract::IsTextOrEquationType ( PolyBlockType type )

inline

Definition at line 88 of file equationdetect.cpp.

                                                      {
   return PTIsTextType(type) || type == PT_EQUATION;
 }

◆ IsUTF8Whitespace()

bool tesseract::IsUTF8Whitespace ( const char * text )

Definition at line 245 of file normstrngs.cpp.

246 {

247 if (IsWhitespace(*it)) break;

◆ IsValidCodepoint()

bool tesseract::IsValidCodepoint ( const char32 ch )

Definition at line 234 of file normstrngs.cpp.

236 {

237 if (!IsWhitespace(*it)) break;

◆ IsWhitespace()

bool tesseract::IsWhitespace ( const char32 ch )

Definition at line 239 of file normstrngs.cpp.

243 {

◆ LeftWordAttributes()

void tesseract::LeftWordAttributes	(	const UNICHARSET *	unicharset,
		const WERD_CHOICE *	werd,
		const STRING &	utf8,
		bool *	is_list,
		bool *	starts_idea,
		bool *	ends_idea
	)

Definition at line 423 of file paragraphs.cpp.

                                                       {
       *starts_idea = true;
     }
     if (unicharset->get_ispunctuation(werd->unichar_id(0))) {
       *starts_idea = true;
       *ends_idea = true;
     }
   } else {  // Assume utf8 is mostly ASCII
     if (AsciiLikelyListItem(utf8)) {
       *is_list = true;
       *starts_idea = true;
     }
     int start_letter = utf8[0];
     if (IsOpeningPunct(start_letter)) {
       *starts_idea = true;
     }
     if (IsTerminalPunct(start_letter)) {
       *ends_idea = true;
     }
     if (start_letter >= 'A' && start_letter <= 'Z') {
       *starts_idea = true;
     }
   }
 }
  
 // Given the rightmost word of a line either as a Tesseract unicharset + werd
 // or a utf8 string, set the following attributes for it:
 //   is_list -      this word might be a list number or bullet.
 //   starts_idea -  this word is likely to start a sentence.
 //   ends_idea -    this word is likely to end a sentence.
 void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
                          const STRING &utf8,
                          bool *is_list, bool *starts_idea, bool *ends_idea) {
   *is_list = false;
   *starts_idea = false;
   *ends_idea = false;
   if (utf8.size() == 0 || (werd != nullptr && werd->length() == 0)) {  // Empty
     *ends_idea = true;
     return;

◆ LoadDataFromFile()

bool tesseract::LoadDataFromFile	(	const char *	filename,
		GenericVector< char > *	data
	)

inline

Definition at line 341 of file genericvector.h.

                                                                               {
   bool result = false;
   FILE* fp = fopen(filename, "rb");
   if (fp != nullptr) {
     fseek(fp, 0, SEEK_END);
     auto size = std::ftell(fp);
     fseek(fp, 0, SEEK_SET);
     // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
     if (size > 0 && size < LONG_MAX) {
       // reserve an extra byte in case caller wants to append a '\0' character
       data->reserve(size + 1);
       data->resize_no_init(size);
       result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
     }
     fclose(fp);
   }
   return result;
 }

◆ LoadFileLinesToStrings()

bool tesseract::LoadFileLinesToStrings	(	const char *	filename,
		GenericVector< STRING > *	lines
	)

inline

Definition at line 43 of file fileio.h.

            {
  public:
   // Try to open the file 'filename' in mode 'mode'.
   // Stop the program if it cannot open it.
   static FILE* OpenOrDie(const std::string& filename, const std::string& mode);
   static FILE* Open(const std::string& filename, const std::string& mode);
  
   // Try to open the file 'filename' and to write 'str' in it.
   // Stop the program if it fails.
   static void WriteStringToFileOrDie(const std::string& str, const std::string& filename);

◆ LoadShapeTable()

ShapeTable * tesseract::LoadShapeTable ( const STRING & file_prefix )

Definition at line 154 of file commontraining.cpp.

                                                       {
   ShapeTable* shape_table = nullptr;
   STRING shape_table_file = file_prefix;
   shape_table_file += kShapeTableFileSuffix;
   TFile shape_fp;
   if (shape_fp.Open(shape_table_file.c_str(), nullptr)) {
     shape_table = new ShapeTable;
     if (!shape_table->DeSerialize(&shape_fp)) {
       delete shape_table;
       shape_table = nullptr;
       tprintf("Error: Failed to read shape table %s\n",
               shape_table_file.c_str());
     } else {
       int num_shapes = shape_table->NumShapes();
       tprintf("Read shape table %s of %d shapes\n",
               shape_table_file.c_str(), num_shapes);
     }
   } else {
     tprintf("Warning: No shape table file present: %s\n",
             shape_table_file.c_str());
   }
   return shape_table;
 }

◆ LoadTrainingData()

MasterTrainer * tesseract::LoadTrainingData	(	int	argc,
		const char const	argv,
		bool	replication,
		ShapeTable **	shape_table,
		STRING *	file_prefix
	)

Creates a MasterTrainer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != nullptr. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:

Loads font info from -F option.
Loads xheights from -X option.
Loads samples from .tr files in remaining command-line args.
Deletes outliers and computes canonical samples.
If FLAGS_output_trainer is set, saves the trainer for future use. TODO: Who uses that? There is currently no code which reads it. Computes canonical and cloud features. If shape_table is not nullptr, but failed to load, make a fake flat one, as shape clustering was not run.

Definition at line 211 of file commontraining.cpp.

                                                      {
   InitFeatureDefs(&feature_defs);
   InitIntegerFX();
   *file_prefix = "";
   if (!FLAGS_D.empty()) {
     *file_prefix += FLAGS_D.c_str();
     *file_prefix += "/";
   }
   // If we are shape clustering (nullptr shape_table) or we successfully load
   // a shape_table written by a previous shape clustering, then
   // shape_analysis will be true, meaning that the MasterTrainer will replace
   // some members of the unicharset with their fragments.
   bool shape_analysis = false;
   if (shape_table != nullptr) {
     *shape_table = LoadShapeTable(*file_prefix);
     if (*shape_table != nullptr) shape_analysis = true;
   } else {
     shape_analysis = true;
   }
   MasterTrainer* trainer = new MasterTrainer(NM_CHAR_ANISOTROPIC,
                                              shape_analysis,
                                              replication,
                                              FLAGS_debug_level);
   IntFeatureSpace fs;
   fs.Init(kBoostXYBuckets, kBoostXYBuckets, kBoostDirBuckets);
   trainer->LoadUnicharset(FLAGS_U.c_str());
   // Get basic font information from font_properties.
   if (!FLAGS_F.empty()) {
     if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
       delete trainer;
       return nullptr;
     }
   }
   if (!FLAGS_X.empty()) {
     if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
       delete trainer;
       return nullptr;
     }
   }
   trainer->SetFeatureSpace(fs);
   const char* page_name;
   // Load training data from .tr files on the command line.
   while ((page_name = GetNextFilename(argc, argv)) != nullptr) {
     tprintf("Reading %s ...\n", page_name);
     trainer->ReadTrainingSamples(page_name, feature_defs, false);
  
     // If there is a file with [lang].[fontname].exp[num].fontinfo present,
     // read font spacing information in to fontinfo_table.
     int pagename_len = strlen(page_name);
     char* fontinfo_file_name = new char[pagename_len + 7];
     strncpy(fontinfo_file_name, page_name, pagename_len - 2);   // remove "tr"
     strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo");  // +"fontinfo"
     trainer->AddSpacingInfo(fontinfo_file_name);
     delete[] fontinfo_file_name;
  
     // Load the images into memory if required by the classifier.
     if (FLAGS_load_images) {
       STRING image_name = page_name;
       // Chop off the tr and replace with tif. Extension must be tif!
       image_name.truncate_at(image_name.length() - 2);
       image_name += "tif";
       trainer->LoadPageImages(image_name.c_str());
     }
   }
   trainer->PostLoadCleanup();
   // Write the master trainer if required.
   if (!FLAGS_output_trainer.empty()) {
     FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb");
     if (fp == nullptr) {
       tprintf("Can't create saved trainer data!\n");
     } else {
       trainer->Serialize(fp);
       fclose(fp);
     }
   }
   trainer->PreTrainingSetup();
   if (!FLAGS_O.empty() &&
       !trainer->unicharset().save_to_file(FLAGS_O.c_str())) {
     fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
     delete trainer;
     return nullptr;
   }
   if (shape_table != nullptr) {
     // If we previously failed to load a shapetable, then shape clustering
     // wasn't run so make a flat one now.
     if (*shape_table == nullptr) {
       *shape_table = new ShapeTable;
       trainer->SetupFlatShapeTable(*shape_table);
       tprintf("Flat shape table summary: %s\n",
               (*shape_table)->SummaryStr().c_str());
     }
     (*shape_table)->set_unicharset(trainer->unicharset());
   }
   return trainer;
 }

◆ Logistic()

double tesseract::Logistic ( double x )

inline

Definition at line 54 of file functions.h.

                                  {
   if (x < 0.0) return 1.0 - Logistic(-x);
   x *= kScaleFactor;
   unsigned index = static_cast<unsigned>(x);
   if (index >= (kTableSize - 1)) return 1.0;
   double l0 = LogisticTable[index];
   double l1 = LogisticTable[index + 1];
   // Linear interpolation.
   return l0 + (l1 - l0) * (x - index);
 }

◆ MultiplyAccumulate()

void tesseract::MultiplyAccumulate	(	int	n,
		const double *	u,
		const double *	v,
		double *	out
	)

inline

Definition at line 184 of file functions.h.

                                             {
   for (int i = 0; i < n; i++) {
     out[i] += u[i] * v[i];
   }
 }

◆ MultiplyVectorsInPlace()

void tesseract::MultiplyVectorsInPlace	(	int	n,
		const double *	src,
		double *	inout
	)

inline

Definition at line 179 of file functions.h.

                                                                             {
   for (int i = 0; i < n; ++i) inout[i] *= src[i];
 }

◆ NormalizeCleanAndSegmentUTF8()

bool tesseract::NormalizeCleanAndSegmentUTF8	(	UnicodeNormMode	u_mode,
		OCRNorm	ocr_normalize,
		GraphemeNormMode	g_mode,
		bool	report_errors,
		const char *	str8,
		std::vector< std::string > *	graphemes
	)

Definition at line 188 of file normstrngs.cpp.

                                {
       graphemes32.clear();
       success = Validator::ValidateCleanAndSegment(g_mode, report_errors,
                                                    cleaned32, &graphemes32);
     }
   }
   graphemes->clear();
   graphemes->reserve(graphemes32.size());
   for (const auto& grapheme : graphemes32) {
     graphemes->push_back(UNICHAR::UTF32ToUTF8(grapheme));
   }
   return success;
 }
  
 // Apply just the OCR-specific normalizations and return the normalized char.
 char32 OCRNormalize(char32 ch) {
   if (is_hyphen_punc(ch))
     return '-';
   else if (is_single_quote(ch))
     return '\'';
   else if (is_double_quote(ch))
     return '"';
   return ch;
 }
  
 bool IsOCREquivalent(char32 ch1, char32 ch2) {
   return OCRNormalize(ch1) == OCRNormalize(ch2);
 }
  

◆ NormalizeUTF8String()

bool tesseract::NormalizeUTF8String	(	UnicodeNormMode	u_mode,
		OCRNorm	ocr_normalize,
		GraphemeNorm	grapheme_normalize,
		const char *	str8,
		std::string *	normalized
	)

Definition at line 163 of file normstrngs.cpp.

                                                                    {
   std::vector<char32> normed32;
   NormalizeUTF8ToUTF32(u_mode, ocr_normalize, str8, &normed32);
   StripJoiners(&normed32);
   std::vector<std::vector<char32>> graphemes32;
   bool success = Validator::ValidateCleanAndSegment(g_mode, report_errors,
                                                     normed32, &graphemes32);
   if (g_mode != GraphemeNormMode::kSingleString && success) {

◆ OCRNormalize()

char32 tesseract::OCRNormalize ( char32 ch )

Definition at line 220 of file normstrngs.cpp.

                                    {
   ASSERT_HOST_MSG(IsValidCodepoint(ch), "Invalid Unicode codepoint: 0x%x\n",
                   ch);
   return u_isUWhiteSpace(static_cast<UChar32>(ch));
 }
  

◆ OtsuStats()

int tesseract::OtsuStats	(	const int *	histogram,
		int *	H_out,
		int *	omega0_out
	)

Definition at line 187 of file otsuthr.cpp.

                                                {
     omega_0 += histogram[t];
     mu_t += t * static_cast<double>(histogram[t]);
     if (omega_0 == 0)
       continue;
     omega_1 = H - omega_0;
     if (omega_1 == 0)
       break;
     mu_0 = mu_t / omega_0;
     mu_1 = (mu_T - mu_t) / omega_1;
     double sig_sq_B = mu_1 - mu_0;
     sig_sq_B *= sig_sq_B * omega_0 * omega_1;
     if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
       best_sig_sq_B = sig_sq_B;
       best_t = t;
       best_omega_0 = omega_0;
     }
   }
   if (H_out != nullptr) *H_out = H;
   if (omega0_out != nullptr) *omega0_out = best_omega_0;
   return best_t;
 }
  
 }  // namespace tesseract.

◆ OtsuThreshold()

int tesseract::OtsuThreshold	(	Pix *	src_pix,
		int	left,
		int	top,
		int	width,
		int	height,
		int **	thresholds,
		int **	hi_values
	)

Definition at line 56 of file otsuthr.cpp.

                              {
     od.HistogramRectOCL(pixGetData(src_pix), num_channels,
                         pixGetWpl(src_pix) * 4, left, top, width, height,
                         kHistogramSize, histogramAllChannels);
  
     // Calculate Threshold from Histogram on cpu
     for (int ch = 0; ch < num_channels; ++ch) {
       (*thresholds)[ch] = -1;
       (*hi_values)[ch] = -1;
       int *histogram = &histogramAllChannels[kHistogramSize * ch];
       int H;
       int best_omega_0;
       int best_t = OtsuStats(histogram, &H, &best_omega_0);
       if (best_omega_0 == 0 || best_omega_0 == H) {
          // This channel is empty.
          continue;
        }
       // To be a convincing foreground we must have a small fraction of H
       // or to be a convincing background we must have a large fraction of H.
       // In between we assume this channel contains no thresholding information.
       int hi_value = best_omega_0 < H * 0.5;
       (*thresholds)[ch] = best_t;
       if (best_omega_0 > H * 0.75) {
         any_good_hivalue = true;
         (*hi_values)[ch] = 0;
       } else if (best_omega_0 < H * 0.25) {
         any_good_hivalue = true;
         (*hi_values)[ch] = 1;
       } else {
         // In case all channels are like this, keep the best of the bad lot.
         double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
         if (hi_dist > best_hi_dist) {
           best_hi_dist = hi_dist;
           best_hi_value = hi_value;
           best_hi_index = ch;
         }
       }
     }
   } else {
 #endif
     for (int ch = 0; ch < num_channels; ++ch) {
       (*thresholds)[ch] = -1;
       (*hi_values)[ch] = -1;
       // Compute the histogram of the image rectangle.
       int histogram[kHistogramSize];
       HistogramRect(src_pix, ch, left, top, width, height, histogram);
       int H;
       int best_omega_0;
       int best_t = OtsuStats(histogram, &H, &best_omega_0);
       if (best_omega_0 == 0 || best_omega_0 == H) {
          // This channel is empty.
          continue;
        }
       // To be a convincing foreground we must have a small fraction of H
       // or to be a convincing background we must have a large fraction of H.
       // In between we assume this channel contains no thresholding information.
       int hi_value = best_omega_0 < H * 0.5;
       (*thresholds)[ch] = best_t;
       if (best_omega_0 > H * 0.75) {
         any_good_hivalue = true;
         (*hi_values)[ch] = 0;
       } else if (best_omega_0 < H * 0.25) {
         any_good_hivalue = true;
         (*hi_values)[ch] = 1;
       } else {
         // In case all channels are like this, keep the best of the bad lot.
         double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
         if (hi_dist > best_hi_dist) {
           best_hi_dist = hi_dist;
           best_hi_value = hi_value;
           best_hi_index = ch;
         }
       }
     }
 #ifdef USE_OPENCL
   }
   delete[] histogramAllChannels;
 #endif  // USE_OPENCL
  
   if (!any_good_hivalue) {
     // Use the best of the ones that were not good enough.
     (*hi_values)[best_hi_index] = best_hi_value;
   }
   return num_channels;
 }
  
 // Computes the histogram for the given image rectangle, and the given
 // single channel. Each channel is always one byte per pixel.
 // Histogram is always a kHistogramSize(256) element array to count
 // occurrences of each pixel value.
 void HistogramRect(Pix* src_pix, int channel,
                    int left, int top, int width, int height,
                    int* histogram) {
   int num_channels = pixGetDepth(src_pix) / 8;
   channel = ClipToRange(channel, 0, num_channels - 1);
   int bottom = top + height;
   memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
   int src_wpl = pixGetWpl(src_pix);
   l_uint32* srcdata = pixGetData(src_pix);
   for (int y = top; y < bottom; ++y) {
     const l_uint32* linedata = srcdata + y * src_wpl;

◆ ParamsTrainingFeatureByName()

int tesseract::ParamsTrainingFeatureByName ( const char * name )

Definition at line 26 of file params_training_featdef.cpp.

                                                   {
   if (name == nullptr)
     return -1;
   int array_size = sizeof(kParamsTrainingFeatureTypeName) /
     sizeof(kParamsTrainingFeatureTypeName[0]);
   for (int i = 0; i < array_size; i++) {
     if (kParamsTrainingFeatureTypeName[i] == nullptr)
       continue;
     if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0)
       return i;
   }
   return -1;
 }

◆ ParseCommandLineFlags()

void tesseract::ParseCommandLineFlags	(	const char *	usage,
		int *	argc,
		char ***	argv,
		const bool	remove_flags
	)

Definition at line 166 of file commandlineflags.cpp.

                                                     {
   if (*argc == 1) {
     printf("USAGE: %s\n", usage);
     PrintCommandLineFlags();
     exit(0);
   }
  
   if (*argc > 1 && (!strcmp((*argv)[1], "-v") || !strcmp((*argv)[1], "--version"))) {
     printf("%s\n", TessBaseAPI::Version());
     exit(0);
   }
  
   int i;
   for (i = 1; i < *argc; ++i) {
     const char* current_arg = (*argv)[i];
     // If argument does not start with a hyphen then break.
     if (current_arg[0] != '-') {
       break;
     }
     // Position current_arg after startings hyphens. We treat a sequence of
     // one or two consecutive hyphens identically.
     ++current_arg;
     if (current_arg[0] == '-') {
       ++current_arg;
     }
     // If this is asking for usage, print the help message and abort.
     if (!strcmp(current_arg, "help")) {
       printf("Usage:\n  %s [OPTION ...]\n\n", usage);
       PrintCommandLineFlags();
       exit(0);
     }
     // Find the starting position of the value if it was specified in this
     // string.
     const char* equals_position = strchr(current_arg, '=');
     const char* rhs = nullptr;
     if (equals_position != nullptr) {
       rhs = equals_position + 1;
     }
     // Extract the flag name.
     STRING lhs;
     if (equals_position == nullptr) {
       lhs = current_arg;
     } else {
       lhs.assign(current_arg, equals_position - current_arg);
     }
     if (!lhs.length()) {
       tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
       exit(1);
     }
  
     // Find the flag name in the list of global flags.
     // int32_t flag
     int32_t int_val;
     if (IntFlagExists(lhs.c_str(), &int_val)) {
       if (rhs != nullptr) {
         if (!strlen(rhs)) {
           // Bad input of the format --int_flag=
           tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
           exit(1);
         }
         if (!SafeAtoi(rhs, &int_val)) {
           tprintf("ERROR: Could not parse int from %s in flag %s\n",
                   rhs, (*argv)[i]);
           exit(1);
         }
       } else {
         // We need to parse the next argument
         if (i + 1 >= *argc) {
           tprintf("ERROR: Could not find value argument for flag %s\n",
                   lhs.c_str());
           exit(1);
         } else {
           ++i;
           if (!SafeAtoi((*argv)[i], &int_val)) {
             tprintf("ERROR: Could not parse int32_t from %s\n", (*argv)[i]);
             exit(1);
           }
         }
       }
       SetIntFlagValue(lhs.c_str(), int_val);
       continue;
     }
  
     // double flag
     double double_val;
     if (DoubleFlagExists(lhs.c_str(), &double_val)) {
       if (rhs != nullptr) {
         if (!strlen(rhs)) {
           // Bad input of the format --double_flag=
           tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
           exit(1);
         }
         if (!SafeAtod(rhs, &double_val)) {
           tprintf("ERROR: Could not parse double from %s in flag %s\n",
                   rhs, (*argv)[i]);
           exit(1);
         }
       } else {
         // We need to parse the next argument
         if (i + 1 >= *argc) {
           tprintf("ERROR: Could not find value argument for flag %s\n",
                   lhs.c_str());
           exit(1);
         } else {
           ++i;
           if (!SafeAtod((*argv)[i], &double_val)) {
             tprintf("ERROR: Could not parse double from %s\n", (*argv)[i]);
             exit(1);
           }
         }
       }
       SetDoubleFlagValue(lhs.c_str(), double_val);
       continue;
     }
  
     // Bool flag. Allow input forms --flag (equivalent to --flag=true),
     // --flag=false, --flag=true, --flag=0 and --flag=1
     bool bool_val;
     if (BoolFlagExists(lhs.c_str(), &bool_val)) {
       if (rhs == nullptr) {
         // --flag form
         bool_val = true;
       } else {
         if (!strlen(rhs)) {
           // Bad input of the format --bool_flag=
           tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
           exit(1);
         }
         if (!strcmp(rhs, "false") || !strcmp(rhs, "0")) {
           bool_val = false;
         } else if (!strcmp(rhs, "true") || !strcmp(rhs, "1")) {
           bool_val = true;
         } else {
           tprintf("ERROR: Could not parse bool from flag %s\n", (*argv)[i]);
           exit(1);
         }
       }
       SetBoolFlagValue(lhs.c_str(), bool_val);
       continue;
     }
  
     // string flag
     const char* string_val;
     if (StringFlagExists(lhs.c_str(), &string_val)) {
       if (rhs != nullptr) {
         string_val = rhs;
       } else {
         // Pick the next argument
         if (i + 1 >= *argc) {
           tprintf("ERROR: Could not find string value for flag %s\n",
                   lhs.c_str());
           exit(1);
         } else {
           string_val = (*argv)[++i];
         }
       }
       SetStringFlagValue(lhs.c_str(), string_val);
       continue;
     }
  
     // Flag was not found. Exit with an error message.
     tprintf("ERROR: Non-existent flag %s\n", (*argv)[i]);
     exit(1);
   }  // for each argv
   if (remove_flags) {
     (*argv)[i - 1] = (*argv)[0];
     (*argv) += (i - 1);
     (*argc) -= (i - 1);
   }
 }

◆ PrepareDistortedPix()

Pix * tesseract::PrepareDistortedPix	(	const Pix *	pix,
		bool	perspective,
		bool	invert,
		bool	white_noise,
		bool	smooth_noise,
		bool	blur,
		int	box_reduction,
		TRand *	randomizer,
		GenericVector< TBOX > *	boxes
	)

Definition at line 196 of file degradeimage.cpp.

                                                  {
     Pix* blurred = pixBlockconv(distorted, 1, 1);
     pixDestroy(&distorted);
     distorted = blurred;
   }
   if (perspective)
     GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
   if (boxes != nullptr) {
     for (int b = 0; b < boxes->size(); ++b) {
       (*boxes)[b].scale(1.0f / box_reduction);
       if ((*boxes)[b].width() <= 0)
         (*boxes)[b].set_right((*boxes)[b].left() + 1);
     }
   }
   if (invert && randomizer->SignedRand(1.0) < -0)
     pixInvert(distorted, distorted);
   return distorted;
 }
  
 // Distorts anything that has a non-null pointer with the same pseudo-random
 // perspective distortion. Width and height only need to be set if there
 // is no pix. If there is a pix, then they will be taken from there.
 void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
                                    Pix** pix, GenericVector<TBOX>* boxes) {
   if (pix != nullptr && *pix != nullptr) {
     width = pixGetWidth(*pix);
     height = pixGetHeight(*pix);
   }
   float* im_coeffs = nullptr;
   float* box_coeffs = nullptr;
   l_int32 incolor =
       ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
   if (pix != nullptr && *pix != nullptr) {
     // Transform the image.
     Pix* transformed = pixProjective(*pix, im_coeffs, incolor);

◆ PrintString32WithUnicodes()

std::string tesseract::PrintString32WithUnicodes ( const std::string & str )

inline

Definition at line 34 of file normstrngs_test.h.

                                                                  {
   std::vector<char32> str32 = UNICHAR::UTF8ToUTF32(str.c_str());
   return absl::StrCat("\"", str, "\" ", CodepointList(str32));
 }

◆ PrintStringVectorWithUnicodes()

std::string tesseract::PrintStringVectorWithUnicodes ( const std::vector< std::string > & glyphs )

inline

Definition at line 39 of file normstrngs_test.h.

                                                                                    {
   std::string result;
   for (const auto& s : glyphs) {
     result += "Glyph:";
     result += PrintString32WithUnicodes(s) + "\n";
   }
   return result;
 }

◆ ProjectiveCoeffs()

int tesseract::ProjectiveCoeffs	(	int	width,
		int	height,
		TRand *	randomizer,
		float **	im_coeffs,
		float **	box_coeffs
	)

Definition at line 283 of file degradeimage.cpp.

                                 {
       factors[i] = fabs(randomizer->SignedRand(1.0));
       if (i <= FN_Y3)
         factors[i] *= 5.0 / 8.0;
       else
         factors[i] *= 0.5;
       factors[i] *= factors[i];
     }
   }
   // Setup "to" points.
   Pta* dest_pts = ptaCreate(4);
   ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
   ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
   ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,
            (1 - factors[FN_Y2]) * height);
   ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,
            (1 - factors[FN_Y3]) * height);
   getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
   getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
   ptaDestroy(&src_pts);
   ptaDestroy(&dest_pts);
   return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
 }
  
 }  // namespace tesseract

◆ PSM_BLOCK_FIND_ENABLED()

bool tesseract::PSM_BLOCK_FIND_ENABLED ( int pageseg_mode )

inline

Definition at line 200 of file publictypes.h.

                                                      {
   return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
 }

◆ PSM_COL_FIND_ENABLED()

bool tesseract::PSM_COL_FIND_ENABLED ( int pageseg_mode )

inline

Definition at line 194 of file publictypes.h.

                                                    {
   return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
 }

◆ PSM_LINE_FIND_ENABLED()

bool tesseract::PSM_LINE_FIND_ENABLED ( int pageseg_mode )

inline

Definition at line 203 of file publictypes.h.

                                                     {
   return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
 }

◆ PSM_ORIENTATION_ENABLED()

bool tesseract::PSM_ORIENTATION_ENABLED ( int pageseg_mode )

inline

Definition at line 191 of file publictypes.h.

                                                       {
   return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
 }

◆ PSM_OSD_ENABLED()

bool tesseract::PSM_OSD_ENABLED ( int pageseg_mode )

inline

Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.

Definition at line 188 of file publictypes.h.

                                               {
   return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
 }

◆ PSM_SPARSE()

bool tesseract::PSM_SPARSE ( int pageseg_mode )

inline

Definition at line 197 of file publictypes.h.

                                          {
   return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
 }

◆ PSM_WORD_FIND_ENABLED()

bool tesseract::PSM_WORD_FIND_ENABLED ( int pageseg_mode )

inline

Definition at line 206 of file publictypes.h.

                                                     {
   return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
          pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
 }

◆ read_info()

bool tesseract::read_info	(	TFile *	f,
		FontInfo *	fi
	)

Definition at line 156 of file fontinfo.cpp.

                                        {
   uint32_t size;
   if (!f->DeSerialize(&size)) return false;
   char* font_name = new char[size + 1];
   fi->name = font_name;
   if (!f->DeSerialize(font_name, size)) return false;
   font_name[size] = '\0';
   return f->DeSerialize(&fi->properties);
 }

◆ read_set()

bool tesseract::read_set	(	TFile *	f,
		FontSet *	fs
	)

Definition at line 229 of file fontinfo.cpp.

                                      {
   if (!f->DeSerialize(&fs->size)) return false;
   fs->configs = new int[fs->size];
   return f->DeSerialize(&fs->configs[0], fs->size);
 }

◆ read_spacing_info()

bool tesseract::read_spacing_info	(	TFile *	f,
		FontInfo *	fi
	)

Definition at line 173 of file fontinfo.cpp.

                                                {
   int32_t vec_size, kern_size;
   if (!f->DeSerialize(&vec_size)) return false;
   ASSERT_HOST(vec_size >= 0);
   if (vec_size == 0) return true;
   fi->init_spacing(vec_size);
   for (int i = 0; i < vec_size; ++i) {
     auto *fs = new FontSpacingInfo();
     if (!f->DeSerialize(&fs->x_gap_before) ||
         !f->DeSerialize(&fs->x_gap_after) ||
         !f->DeSerialize(&kern_size)) {
       delete fs;
       return false;
     }
     if (kern_size < 0) {  // indication of a nullptr entry in fi->spacing_vec
       delete fs;
       continue;
     }
     if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(f) ||
                           !fs->kerned_x_gaps.DeSerialize(f))) {
       delete fs;
       return false;
     }
     fi->add_spacing(i, fs);
   }
   return true;
 }

◆ ReadFile()

STRING tesseract::ReadFile	(	const std::string &	filename,
		FileReader	reader
	)

Definition at line 57 of file lang_model_helpers.cpp.

                                                               {
   if (filename.empty()) return STRING();
   GenericVector<char> data;
   bool read_result;
   if (reader == nullptr)
     read_result = LoadDataFromFile(filename.c_str(), &data);
   else
     read_result = (*reader)(filename.c_str(), &data);
   if (read_result) return STRING(&data[0], data.size());
   tprintf("Failed to read data from: %s\n", filename.c_str());
   return STRING();
 }

◆ RecomputeMarginsAndClearHypotheses()

void tesseract::RecomputeMarginsAndClearHypotheses	(	GenericVector< RowScratchRegisters > *	rows,
		int	start,
		int	end,
		int	percentile
	)

Definition at line 1583 of file paragraphs.cpp.

                                     {
     RowScratchRegisters &sr = (*rows)[i];
     if (sr.ri_->num_words == 0)
       continue;
     lefts.add(sr.lmargin_ + sr.lindent_, 1);
     rights.add(sr.rmargin_ + sr.rindent_, 1);
   }
   int ignorable_left = lefts.ile(ClipToRange(percentile, 0, 100) / 100.0);
   int ignorable_right = rights.ile(ClipToRange(percentile, 0, 100) / 100.0);
   for (int i = start; i < end; i++) {
     RowScratchRegisters &sr = (*rows)[i];
     int ldelta = ignorable_left - sr.lmargin_;
     sr.lmargin_ += ldelta;
     sr.lindent_ -= ldelta;
     int rdelta = ignorable_right - sr.rmargin_;
     sr.rmargin_ += rdelta;
     sr.rindent_ -= rdelta;
   }
 }
  
 // Return the median inter-word space in rows[row_start, row_end).
 int InterwordSpace(const GenericVector<RowScratchRegisters> &rows,
                    int row_start, int row_end) {
   if (row_end < row_start + 1) return 1;
   int word_height = (rows[row_start].ri_->lword_box.height() +
                      rows[row_end - 1].ri_->lword_box.height()) / 2;
   int word_width = (rows[row_start].ri_->lword_box.width() +
                     rows[row_end - 1].ri_->lword_box.width())  / 2;
   STATS spacing_widths(0, 5 + word_width);
   for (int i = row_start; i < row_end; i++) {
     if (rows[i].ri_->num_words > 1) {
       spacing_widths.add(rows[i].ri_->average_interword_space, 1);
     }
   }

◆ RightWordAttributes()

void tesseract::RightWordAttributes	(	const UNICHARSET *	unicharset,
		const WERD_CHOICE *	werd,
		const STRING &	utf8,
		bool *	is_list,
		bool *	starts_idea,
		bool *	ends_idea
	)

Definition at line 470 of file paragraphs.cpp.

                                                     {
       *ends_idea = true;
     }
   } else {  // Assume utf8 is mostly ASCII
     if (AsciiLikelyListItem(utf8)) {
       *is_list = true;
       *starts_idea = true;
     }
     int last_letter = utf8[utf8.size() - 1];
     if (IsOpeningPunct(last_letter) || IsTerminalPunct(last_letter)) {
       *ends_idea = true;
     }
   }
 }
  
 // =============== Implementation of RowScratchRegisters =====================
 /* static */
 void RowScratchRegisters::AppendDebugHeaderFields(
     GenericVector<STRING> *header) {
   header->push_back("[lmarg,lind;rind,rmarg]");
   header->push_back("model");
 }
  
 void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
                                           GenericVector<STRING> *dbg) const {
   char s[30];
   snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]",
            lmargin_, lindent_, rindent_, rmargin_);
   dbg->push_back(s);

◆ RowsFitModel()

bool tesseract::RowsFitModel	(	const GenericVector< RowScratchRegisters > *	rows,
		int	start,
		int	end,
		const ParagraphModel *	model
	)

Definition at line 1826 of file paragraphs.cpp.

1834 {

1835 // Record patently obvious body text.

◆ SaveDataToFile()

bool tesseract::SaveDataToFile	(	const GenericVector< char > &	data,
		const char *	filename
	)

inline

Definition at line 362 of file genericvector.h.

                                                  {
   FILE* fp = fopen(filename, "wb");
   if (fp == nullptr) {
     return false;
   }
   bool result =
       static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
   fclose(fp);
   return result;
 }

◆ ScriptPosToString()

const char * tesseract::ScriptPosToString ( enum ScriptPos script_pos )

Definition at line 202 of file ratngs.cpp.

                                                          {
   switch (script_pos) {
     case SP_NORMAL: return "NORM";
     case SP_SUBSCRIPT: return "SUB";
     case SP_SUPERSCRIPT: return "SUPER";
     case SP_DROPCAP: return "DROPC";
   }
   return "SP_UNKNOWN";

◆ Serialize() [1/8]

bool tesseract::Serialize	(	FILE *	fp,
		const char *	data,
		size_t	n = `1`
	)

Definition at line 73 of file serialis.cpp.

75 {

◆ Serialize() [2/8]

bool tesseract::Serialize	(	FILE *	fp,
		const float *	data,
		size_t	n = `1`
	)

Definition at line 77 of file serialis.cpp.

79 {

◆ Serialize() [3/8]

bool tesseract::Serialize	(	FILE *	fp,
		const int16_t *	data,
		size_t	n = `1`
	)

Definition at line 85 of file serialis.cpp.

87 {

◆ Serialize() [4/8]

bool tesseract::Serialize	(	FILE *	fp,
		const int32_t *	data,
		size_t	n = `1`
	)

Definition at line 89 of file serialis.cpp.

92 : offset_(0),

◆ Serialize() [5/8]

bool tesseract::Serialize	(	FILE *	fp,
		const int8_t *	data,
		size_t	n = `1`
	)

Definition at line 81 of file serialis.cpp.

83 {

◆ Serialize() [6/8]

bool tesseract::Serialize	(	FILE *	fp,
		const uint16_t *	data,
		size_t	n = `1`
	)

Definition at line 97 of file serialis.cpp.

98 {

99 if (data_is_owned_)

◆ Serialize() [7/8]

bool tesseract::Serialize	(	FILE *	fp,
		const uint32_t *	data,
		size_t	n = `1`
	)

Definition at line 101 of file serialis.cpp.

103 {

◆ Serialize() [8/8]

bool tesseract::Serialize	(	FILE *	fp,
		const uint8_t *	data,
		size_t	n = `1`
	)

Definition at line 93 of file serialis.cpp.

96 {}

◆ SetBlobStrokeWidth()

void tesseract::SetBlobStrokeWidth	(	Pix *	pix,
		BLOBNBOX *	blob
	)

Definition at line 67 of file tordmain.cpp.

                                                   {
   // Cut the blob rectangle into a Pix.
   int pix_height = pixGetHeight(pix);
   const TBOX& box = blob->bounding_box();
   int width = box.width();
   int height = box.height();
   Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(),
                                 width, height);
   Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
   boxDestroy(&blob_pix_box);
   Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
   pixDestroy(&pix_blob);
   // Compute the stroke widths.
   uint32_t* data = pixGetData(dist_pix);
   int wpl = pixGetWpl(dist_pix);
   // Horizontal width of stroke.
   STATS h_stats(0, width + 1);
   for (int y = 0; y < height; ++y) {
     uint32_t* pixels = data + y*wpl;
     int prev_pixel = 0;
     int pixel = GET_DATA_BYTE(pixels, 0);
     for (int x = 1; x < width; ++x) {
       int next_pixel = GET_DATA_BYTE(pixels, x);
       // We are looking for a pixel that is equal to its vertical neighbours,
       // yet greater than its left neighbour.
       if (prev_pixel < pixel &&
           (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
           (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
         if (pixel > next_pixel) {
           // Single local max, so an odd width.
           h_stats.add(pixel * 2 - 1, 1);
         } else if (pixel == next_pixel && x + 1 < width &&
                  pixel > GET_DATA_BYTE(pixels, x + 1)) {
           // Double local max, so an even width.
           h_stats.add(pixel * 2, 1);
         }
       }
       prev_pixel = pixel;
       pixel = next_pixel;
     }
   }
   // Vertical width of stroke.
   STATS v_stats(0, height + 1);
   for (int x = 0; x < width; ++x) {
     int prev_pixel = 0;
     int pixel = GET_DATA_BYTE(data, x);
     for (int y = 1; y < height; ++y) {
       uint32_t* pixels = data + y*wpl;
       int next_pixel = GET_DATA_BYTE(pixels, x);
       // We are looking for a pixel that is equal to its horizontal neighbours,
       // yet greater than its upper neighbour.
       if (prev_pixel < pixel &&
           (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
           (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
         if (pixel > next_pixel) {
           // Single local max, so an odd width.
           v_stats.add(pixel * 2 - 1, 1);
         } else if (pixel == next_pixel && y + 1 < height &&
                  pixel > GET_DATA_BYTE(pixels + wpl, x)) {
           // Double local max, so an even width.
           v_stats.add(pixel * 2, 1);
         }
       }
       prev_pixel = pixel;
       pixel = next_pixel;
     }
   }
   pixDestroy(&dist_pix);
   // Store the horizontal and vertical width in the blob, keeping both
   // widths if there is enough information, otherwise only the one with
   // the most samples.
   // If there are insufficient samples, store zero, rather than using
   // 2*area/perimeter, as the numbers that gives do not match the numbers
   // from the distance method.
   if (h_stats.get_total() >= (width + height) / 4) {
     blob->set_horz_stroke_width(h_stats.ile(0.5f));
     if (v_stats.get_total() >= (width + height) / 4)
       blob->set_vert_stroke_width(v_stats.ile(0.5f));
     else
       blob->set_vert_stroke_width(0.0f);
   } else {
     if (v_stats.get_total() >= (width + height) / 4 ||
         v_stats.get_total() > h_stats.get_total()) {
       blob->set_horz_stroke_width(0.0f);
       blob->set_vert_stroke_width(v_stats.ile(0.5f));
     } else {
       blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f)
                                                           : 0.0f);
       blob->set_vert_stroke_width(0.0f);
     }

◆ SetPropertiesForInputFile()

void tesseract::SetPropertiesForInputFile	(	const std::string &	script_dir,
		const std::string &	input_unicharset_file,
		const std::string &	output_unicharset_file,
		const std::string &	output_xheights_file
	)

Definition at line 183 of file unicharset_training_utils.cpp.

                                                                       {
   UNICHARSET unicharset;
  
   // Load the input unicharset
   unicharset.load_from_file(input_unicharset_file.c_str());
   tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(),
           input_unicharset_file.c_str());
  
   // Set unichar properties
   tprintf("Setting unichar properties\n");
   SetupBasicProperties(true, false, &unicharset);
   tprintf("Setting script properties\n");
   SetScriptProperties(script_dir, &unicharset);
   if (!output_xheights_file.empty()) {
     std::string xheights_str = GetXheightString(script_dir, unicharset);
     File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
   }
  
   // Write the output unicharset
   tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
   unicharset.save_to_file(output_unicharset_file.c_str());
 }

◆ SetScriptProperties()

void tesseract::SetScriptProperties	(	const std::string &	script_dir,
		UNICHARSET *	unicharset
	)

Definition at line 143 of file unicharset_training_utils.cpp.

                                                                               {
   for (int s = 0; s < unicharset->get_script_table_size(); ++s) {
     // Load the unicharset for the script if available.
     std::string filename = script_dir + "/" +
                       unicharset->get_script_from_script_id(s) + ".unicharset";
     UNICHARSET script_set;
     if (script_set.load_from_file(filename.c_str())) {
       unicharset->SetPropertiesFromOther(script_set);
     } else if (s != unicharset->common_sid() && s != unicharset->null_sid()) {
       tprintf("Failed to load script unicharset from:%s\n", filename.c_str());
     }
   }
   for (int c = SPECIAL_UNICHAR_CODES_COUNT; c < unicharset->size(); ++c) {
     if (unicharset->PropertiesIncomplete(c)) {
       tprintf("Warning: properties incomplete for index %d = %s\n", c,
               unicharset->id_to_unichar(c));
     }
   }
 }

◆ SetupBasicProperties() [1/2]

void tesseract::SetupBasicProperties	(	bool	report_errors,
		bool	decompose,
		UNICHARSET *	unicharset
	)

Definition at line 40 of file unicharset_training_utils.cpp.

                                                   {
   for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
     // Convert any custom ligatures.
     const char* unichar_str = unicharset->id_to_unichar(unichar_id);
     for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
       if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
         unichar_str = UNICHARSET::kCustomLigatures[i][0];
         break;
       }
     }
  
     // Convert the unichar to UTF32 representation
     std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);
  
     // Assume that if the property is true for any character in the string,
     // then it holds for the whole "character".
     bool unichar_isalpha = false;
     bool unichar_islower = false;
     bool unichar_isupper = false;
     bool unichar_isdigit = false;
     bool unichar_ispunct = false;
  
     for (char32 u_ch : uni_vector) {
       if (u_isalpha(u_ch)) unichar_isalpha = true;
       if (u_islower(u_ch)) unichar_islower = true;
       if (u_isupper(u_ch)) unichar_isupper = true;
       if (u_isdigit(u_ch)) unichar_isdigit = true;
       if (u_ispunct(u_ch)) unichar_ispunct = true;
     }
  
     unicharset->set_isalpha(unichar_id, unichar_isalpha);
     unicharset->set_islower(unichar_id, unichar_islower);
     unicharset->set_isupper(unichar_id, unichar_isupper);
     unicharset->set_isdigit(unichar_id, unichar_isdigit);
     unicharset->set_ispunctuation(unichar_id, unichar_ispunct);
  
     tesseract::IcuErrorCode err;
     unicharset->set_script(unichar_id, uscript_getName(
         uscript_getScript(uni_vector[0], err)));
  
     const int num_code_points = uni_vector.size();
     // Obtain the lower/upper case if needed and record it in the properties.
     unicharset->set_other_case(unichar_id, unichar_id);
     if (unichar_islower || unichar_isupper) {
       std::vector<char32> other_case(num_code_points, 0);
       for (int i = 0; i < num_code_points; ++i) {
         // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.
         // However since they deal with UChars (so need a conversion function
         // from char32 or UTF8string) and require a meaningful locale string,
         // for now u_tolower()/u_toupper() are used.
         other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) :
           u_tolower(uni_vector[i]);
       }
       std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);
       UNICHAR_ID other_case_id =
           unicharset->unichar_to_id(other_case_uch.c_str());
       if (other_case_id != INVALID_UNICHAR_ID) {
         unicharset->set_other_case(unichar_id, other_case_id);
       } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) {
         tprintf("Other case %s of %s is not in unicharset\n",
                 other_case_uch.c_str(), unichar_str);
       }
     }
  
     // Set RTL property and obtain mirror unichar ID from ICU.
     std::vector<char32> mirrors(num_code_points, 0);
     for (int i = 0; i < num_code_points; ++i) {
       mirrors[i] = u_charMirror(uni_vector[i]);
       if (i == 0) {  // set directionality to that of the 1st code point
         unicharset->set_direction(unichar_id,
                                   static_cast<UNICHARSET::Direction>(
                                       u_charDirection(uni_vector[i])));
       }
     }
     std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);
     UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());
     if (mirror_uch_id != INVALID_UNICHAR_ID) {
       unicharset->set_mirror(unichar_id, mirror_uch_id);
     } else if (report_errors) {
       tprintf("Mirror %s of %s is not in unicharset\n",
               mirror_uch.c_str(), unichar_str);
     }
  
     // Record normalized version of this unichar.
     std::string normed_str;
     if (unichar_id != 0 &&
         tesseract::NormalizeUTF8String(
             decompose ? tesseract::UnicodeNormMode::kNFKD
                       : tesseract::UnicodeNormMode::kNFKC,
             tesseract::OCRNorm::kNormalize, tesseract::GraphemeNorm::kNone,
             unichar_str, &normed_str) &&
         !normed_str.empty()) {
       unicharset->set_normed(unichar_id, normed_str.c_str());
     } else {
       unicharset->set_normed(unichar_id, unichar_str);
     }
     ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size());
   }
   unicharset->post_load_setup();
 }

◆ SetupBasicProperties() [2/2]

void tesseract::SetupBasicProperties	(	bool	report_errors,
		UNICHARSET *	unicharset
	)

inline

Definition at line 38 of file unicharset_training_utils.h.

                                                                              {
   SetupBasicProperties(report_errors, false, unicharset);
 }

◆ SoftmaxInPlace()

template<typename T >

void tesseract::SoftmaxInPlace	(	int	n,
		T *	inout
	)

inline

Definition at line 146 of file functions.h.

                                             {
   if (n <= 0) return;
   // A limit on the negative range input to exp to guarantee non-zero output.
   const T kMaxSoftmaxActivation = 86.0f;
  
   T max_output = inout[0];
   for (int i = 1; i < n; i++) {
     T output = inout[i];
     if (output > max_output) max_output = output;
   }
   T prob_total = 0.0;
   for (int i = 0; i < n; i++) {
     T prob = inout[i] - max_output;
     prob = exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
     prob_total += prob;
     inout[i] = prob;
   }
   if (prob_total > 0.0) {
     for (int i = 0; i < n; i++) inout[i] /= prob_total;
   }
 }

◆ sort_cmp()

template<typename T >

int tesseract::sort_cmp	(	const void *	t1,
		const void *	t2
	)

Definition at line 384 of file genericvector.h.

                                              {
   const T* a = static_cast<const T*>(t1);
   const T* b = static_cast<const T*>(t2);
   if (*a < *b) {
     return -1;
   }
   if (*b < *a) {
     return 1;
   }
   return 0;
 }

◆ sort_ptr_cmp()

template<typename T >

int tesseract::sort_ptr_cmp	(	const void *	t1,
		const void *	t2
	)

Definition at line 401 of file genericvector.h.

                                                  {
   const T* a = *static_cast<T* const*>(t1);
   const T* b = *static_cast<T* const*>(t2);
   if (*a < *b) {
     return -1;
   }
   if (*b < *a) {
     return 1;
   }
   return 0;
 }

◆ SortByBoxBottom()

template<class BBC >

int tesseract::SortByBoxBottom	(	const void *	void1,
		const void *	void2
	)

Definition at line 407 of file bbgrid.h.

                                                           {
   // The void*s are actually doubly indirected, so get rid of one level.
   const BBC* p1 = *static_cast<const BBC* const*>(void1);
   const BBC* p2 = *static_cast<const BBC* const*>(void2);
   int result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
   if (result != 0)
     return result;
   result =  p1->bounding_box().top() - p2->bounding_box().top();
   if (result != 0)
     return result;
   result = p1->bounding_box().left() - p2->bounding_box().left();
   if (result != 0)
     return result;
   return p1->bounding_box().right() - p2->bounding_box().right();
 }

◆ SortByBoxLeft()

template<class BBC >

int tesseract::SortByBoxLeft	(	const void *	void1,
		const void *	void2
	)

Definition at line 371 of file bbgrid.h.

                                                         {
   // The void*s are actually doubly indirected, so get rid of one level.
   const BBC* p1 = *static_cast<const BBC* const*>(void1);
   const BBC* p2 = *static_cast<const BBC* const*>(void2);
   int result = p1->bounding_box().left() - p2->bounding_box().left();
   if (result != 0)
     return result;
   result = p1->bounding_box().right() - p2->bounding_box().right();
   if (result != 0)
     return result;
   result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
   if (result != 0)
     return result;
   return p1->bounding_box().top() - p2->bounding_box().top();
 }

◆ SortByRating()

template<class BLOB_CHOICE >

int tesseract::SortByRating	(	const void *	void1,
		const void *	void2
	)

Definition at line 81 of file pieces.cpp.

◆ SortByUnicharID()

template<class BLOB_CHOICE >

int tesseract::SortByUnicharID	(	const void *	void1,
		const void *	void2
	)

Definition at line 73 of file pieces.cpp.

78 {

◆ SortRightToLeft()

template<class BBC >

int tesseract::SortRightToLeft	(	const void *	void1,
		const void *	void2
	)

Definition at line 389 of file bbgrid.h.

                                                           {
   // The void*s are actually doubly indirected, so get rid of one level.
   const BBC* p1 = *static_cast<const BBC* const*>(void1);
   const BBC* p2 = *static_cast<const BBC* const*>(void2);
   int result = p2->bounding_box().right() - p1->bounding_box().right();
   if (result != 0)
     return result;
   result = p2->bounding_box().left() - p1->bounding_box().left();
   if (result != 0)
     return result;
   result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
   if (result != 0)
     return result;
   return p1->bounding_box().top() - p2->bounding_box().top();
 }

◆ SpanUTF8NotWhitespace()

unsigned int tesseract::SpanUTF8NotWhitespace ( const char * text )

Definition at line 259 of file normstrngs.cpp.

276 {

◆ SpanUTF8Whitespace()

unsigned int tesseract::SpanUTF8Whitespace ( const char * text )

Definition at line 249 of file normstrngs.cpp.

                                          {
   return IsValidCodepoint(ch) &&
          !(ch >= 0xFDD0 && ch <= 0xFDEF) &&  // Noncharacters.
          !(ch >= 0xFFFE && ch <= 0xFFFF) && !(ch >= 0x1FFFE && ch <= 0x1FFFF) &&
          !(ch >= 0x2FFFE && ch <= 0x2FFFF) &&

◆ StrongModel()

bool tesseract::StrongModel ( const ParagraphModel * model )

inline

Definition at line 70 of file paragraphs_internal.h.

71 {

72 return model != nullptr && model != kCrownLeft && model != kCrownRight;

◆ SumVectors()

void tesseract::SumVectors	(	int	n,
		const double *	v1,
		const double *	v2,
		const double *	v3,
		const double *	v4,
		const double *	v5,
		double *	sum
	)

inline

Definition at line 192 of file functions.h.

                                     {
   for (int i = 0; i < n; ++i) {
     sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i];
   }
 }

◆ Tanh()

double tesseract::Tanh ( double x )

inline

Definition at line 43 of file functions.h.

                              {
   if (x < 0.0) return -Tanh(-x);
   x *= kScaleFactor;
   unsigned index = static_cast<unsigned>(x);
   if (index >= (kTableSize - 1)) return 1.0;
   double tanh_i0 = TanhTable[index];
   double tanh_i1 = TanhTable[index + 1];
   // Linear interpolation.
   return tanh_i0 + (tanh_i1 - tanh_i0) * (x - index);
 }

◆ TEST_F() [1/34]

tesseract::TEST_F	(	EquationFinderTest	,
		CheckSeedBlobsCount
	)

Definition at line 342 of file equationdetect_test.cc.

                                                 {
   TBOX box(0, 950, 999, 999);
   ColPartition* part1 =
       ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   ColPartition* part2 =
       ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   ColPartition* part3 =
       ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   ColPartition* part4 =
       ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  
   // Part 1: 8 math, 0 digit, 20 total.
   equation_det_->AddMathDigitBlobs(8, 0, 20, part1);
   EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part1));
  
   // Part 2: 1 math, 8 digit, 20 total.
   equation_det_->AddMathDigitBlobs(1, 8, 20, part2);
   EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part2));
  
   // Part 3: 3 math, 8 digit, 8 total.
   equation_det_->AddMathDigitBlobs(3, 8, 20, part3);
   EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part3));
  
   // Part 4: 8 math, 0 digit, 8 total.
   equation_det_->AddMathDigitBlobs(0, 0, 8, part4);
   EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part4));
  
   // Release memory.
   part1->DeleteBoxes();
   delete (part1);
   part2->DeleteBoxes();
   delete (part2);
   part3->DeleteBoxes();
   delete (part3);
   part4->DeleteBoxes();
   delete (part4);
 }

◆ TEST_F() [2/34]

tesseract::TEST_F	(	EquationFinderTest	,
		ComputeCPsSuperBBox
	)

Definition at line 420 of file equationdetect_test.cc.

                                                 {
   Pix* pix = pixCreate(1001, 1001, 1);
   equation_det_->SetPixBinary(pix);
   ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
  
   TBOX box1(0, 0, 999, 99);
   ColPartition* part1 =
       ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   TBOX box2(0, 100, 499, 199);
   ColPartition* part2 =
       ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   TBOX box3(500, 100, 999, 199);
   ColPartition* part3 =
       ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   TBOX box4(0, 200, 999, 299);
   ColPartition* part4 =
       ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   TBOX box5(0, 900, 999, 999);
   ColPartition* part5 =
       ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
  
   // Add part1->part3 into part_grid and test.
   part_grid.InsertBBox(true, true, part1);
   part_grid.InsertBBox(true, true, part2);
   part_grid.InsertBBox(true, true, part3);
   TBOX super_box(0, 0, 999, 199);
   equation_det_->TestComputeCPsSuperBBox(super_box, &part_grid);
  
   // Add part4 and test.
   part_grid.InsertBBox(true, true, part4);
   TBOX super_box2(0, 0, 999, 299);
   equation_det_->TestComputeCPsSuperBBox(super_box2, &part_grid);
  
   // Add part5 and test.
   part_grid.InsertBBox(true, true, part5);
   TBOX super_box3(0, 0, 999, 999);
   equation_det_->TestComputeCPsSuperBBox(super_box3, &part_grid);
  
   // Release memory.
   part1->DeleteBoxes();
   delete (part1);
   part2->DeleteBoxes();
   delete (part2);
   part3->DeleteBoxes();
   delete (part3);
   part4->DeleteBoxes();
   delete (part4);
   part5->DeleteBoxes();
   delete (part5);
 }

◆ TEST_F() [3/34]

tesseract::TEST_F	(	EquationFinderTest	,
		ComputeForegroundDensity
	)

Definition at line 380 of file equationdetect_test.cc.

                                                      {
   // Create the pix with top half foreground, bottom half background.
   int width = 1024, height = 768;
   Pix* pix = pixCreate(width, height, 1);
   pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
   TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
       box3(100, height - 40, 140, height);
   equation_det_->SetPixBinary(pix);
  
   // Verify
   EXPECT_NEAR(0.0, equation_det_->RunComputeForegroundDensity(box1), 0.0001f);
   EXPECT_NEAR(0.5, equation_det_->RunComputeForegroundDensity(box2), 0.0001f);
   EXPECT_NEAR(1.0, equation_det_->RunComputeForegroundDensity(box3), 0.0001f);
 }

◆ TEST_F() [4/34]

tesseract::TEST_F	(	EquationFinderTest	,
		CountAlignment
	)

Definition at line 395 of file equationdetect_test.cc.

                                            {
   GenericVector<int> vec;
   vec.push_back(1);
   vec.push_back(1);
   vec.push_back(1);
   vec.push_back(100);
   vec.push_back(200);
   vec.push_back(200);
  
   // Test the right point.
   EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 1));
   EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 100));
   EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 200));
  
   // Test the near neighbors.
   EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 3));
   EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 99));
   EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 202));
  
   // Test the far neighbors.
   EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 150));
   EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 50));
   EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 250));
 }

◆ TEST_F() [5/34]

tesseract::TEST_F	(	EquationFinderTest	,
		EstimateTypeForUnichar
	)

Definition at line 233 of file equationdetect_test.cc.

                                                    {
   // Test abc characters.
   EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("a"));
   EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("c"));
  
   // Test punctuation characters.
   EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("'"));
   EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar(","));
  
   // Test digits.
   EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("1"));
   EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("4"));
   EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("|"));
  
   // Test math symbols.
   EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("("));
   EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("+"));
 }

◆ TEST_F() [6/34]

tesseract::TEST_F	(	EquationFinderTest	,
		IdentifySpecialText
	)

Definition at line 181 of file equationdetect_test.cc.

                                                 {
 #if 1
   GTEST_SKIP();
 #else // TODO: missing equ_gt1.tif
   // Load Image.
   std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
   Pix* pix_binary = pixRead(imagefile.c_str());
   CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
  
   // Get components.
   BLOCK_LIST blocks;
   TO_BLOCK_LIST to_blocks;
   AddPageBlock(pix_binary, &blocks);
   Textord* textord = tesseract_->mutable_textord();
   textord->find_components(pix_binary, &blocks, &to_blocks);
  
   // Identify special texts from to_blocks.
   TO_BLOCK_IT to_block_it(&to_blocks);
   std::map<int, int> stt_count;
   for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list();
        to_block_it.forward()) {
     TO_BLOCK* to_block = to_block_it.data();
     BLOBNBOX_IT blob_it(&(to_block->blobs));
     for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
       BLOBNBOX* blob = blob_it.data();
       // blob->set_special_text_type(BSTT_NONE);
       equation_det_->RunIdentifySpecialText(blob, 0);
       tensorflow::gtl::InsertIfNotPresent(&stt_count, blob->special_text_type(), 0);
       stt_count[blob->special_text_type()]++;
     }
   }
  
   // Verify the number, but allow a range of +/- kCountRange before squealing.
   const int kCountRange = 3;
   EXPECT_GE(39 + kCountRange, stt_count[BSTT_NONE]);
   EXPECT_LE(39 - kCountRange, stt_count[BSTT_NONE]);
  
   // if you count all the subscripts etc, there are ~45 italic chars.
   EXPECT_GE(45 + kCountRange, stt_count[BSTT_ITALIC]);
   EXPECT_LE(45 - kCountRange, stt_count[BSTT_ITALIC]);
   EXPECT_GE(41 + kCountRange, stt_count[BSTT_DIGIT]);
   EXPECT_LE(41 - kCountRange, stt_count[BSTT_DIGIT]);
   EXPECT_GE(50 + kCountRange, stt_count[BSTT_MATH]);
   EXPECT_LE(50 - kCountRange, stt_count[BSTT_MATH]);
   EXPECT_GE(10 + kCountRange, stt_count[BSTT_UNCLEAR]);
   EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
  
   // Release memory.
   pixDestroy(&pix_binary);
 #endif
 }

◆ TEST_F() [7/34]

tesseract::TEST_F	(	EquationFinderTest	,
		IsIndented
	)

Definition at line 252 of file equationdetect_test.cc.

                                        {
   ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
  
   // Create five ColPartitions:
   // part 1: ************
   // part 2:   *********
   // part 3: *******
   // part 4:   *****
   //
   // part 5:   ********
   TBOX box1(0, 950, 999, 999);
   ColPartition* part1 =
       ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   part_grid.InsertBBox(true, true, part1);
   TBOX box2(300, 920, 900, 940);
   ColPartition* part2 =
       ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   part_grid.InsertBBox(true, true, part2);
   TBOX box3(0, 900, 600, 910);
   ColPartition* part3 =
       ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   part_grid.InsertBBox(true, true, part3);
   TBOX box4(300, 890, 600, 899);
   ColPartition* part4 =
       ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   part_grid.InsertBBox(true, true, part4);
   TBOX box5(300, 500, 900, 510);
   ColPartition* part5 =
       ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   part_grid.InsertBBox(true, true, part5);
  
   // Test
   // part1 should be no indent.
   EXPECT_EQ(EquationDetect::NO_INDENT,
             equation_det_->RunIsIndented(&part_grid, part1));
   // part2 should be left indent in terms of part1.
   EXPECT_EQ(EquationDetect::LEFT_INDENT,
             equation_det_->RunIsIndented(&part_grid, part2));
   // part3 should be right indent.
   EXPECT_EQ(EquationDetect::RIGHT_INDENT,
             equation_det_->RunIsIndented(&part_grid, part3));
   // part4 should be both indented.
   EXPECT_EQ(EquationDetect::BOTH_INDENT,
             equation_det_->RunIsIndented(&part_grid, part4));
   // part5 should be no indent because it is too far from part1.
   EXPECT_EQ(EquationDetect::NO_INDENT,
             equation_det_->RunIsIndented(&part_grid, part5));
  
   // Release memory.
   part1->DeleteBoxes();
   delete (part1);
   part2->DeleteBoxes();
   delete (part2);
   part3->DeleteBoxes();
   delete (part3);
   part4->DeleteBoxes();
   delete (part4);
   part5->DeleteBoxes();
   delete (part5);
 }

◆ TEST_F() [8/34]

tesseract::TEST_F	(	EquationFinderTest	,
		IsNearSmallNeighbor
	)

Definition at line 313 of file equationdetect_test.cc.

                                                 {
   // Create four tboxes:
   //          part 1, part 2
   //           *****   *****
   // part 3:   *****
   //
   // part 4: *****************
   TBOX box1(0, 950, 499, 999);
   TBOX box2(500, 950, 999, 998);
   TBOX box3(0, 900, 499, 949);
   TBOX box4(0, 550, 499, 590);
  
   // Test
   // box2 should be box1's near neighbor but not vice versa.
   EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box2));
   EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box1));
   // box1 and box3 should be near neighbors of each other.
   EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box3));
   EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
   // box2 and box3 should not be near neighbors of each other.
   EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
   EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box2));
  
   // box4 should not be the near neighbor of any one.
   EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box1, box4));
   EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box4));
   EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box4));
 }

◆ TEST_F() [9/34]

tesseract::TEST_F	(	EquationFinderTest	,
		SplitCPHor
	)

Definition at line 506 of file equationdetect_test.cc.

                                        {
   TBOX box(0, 0, 999, 99);
   ColPartition* part =
       ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   part->DeleteBoxes();
   part->set_median_width(10);
   GenericVector<ColPartition*> parts_splitted;
  
   // Test an empty part.
   equation_det_->RunSplitCPHor(part, &parts_splitted);
   EXPECT_TRUE(parts_splitted.empty());
   // Test with one blob.
   AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
  
   equation_det_->RunSplitCPHor(part, &parts_splitted);
   EXPECT_EQ(1, parts_splitted.size());
   EXPECT_TRUE(TBOX(0, 0, 10, 50) == parts_splitted[0]->bounding_box());
  
   // Add more blob and test.
   AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
   AddBlobIntoPart(TBOX(25, 0, 30, 55), part);  // break point.
   AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
   AddBlobIntoPart(TBOX(125, 0, 140, 45), part);  // break point.
   AddBlobIntoPart(TBOX(500, 0, 540, 35), part);  // break point.
   equation_det_->RunSplitCPHor(part, &parts_splitted);
  
   // Verify.
   EXPECT_EQ(3, parts_splitted.size());
   EXPECT_TRUE(TBOX(0, 0, 30, 60) == parts_splitted[0]->bounding_box());
   EXPECT_TRUE(TBOX(100, 0, 140, 45) == parts_splitted[1]->bounding_box());
   EXPECT_TRUE(TBOX(500, 0, 540, 35) == parts_splitted[2]->bounding_box());
  
   parts_splitted.delete_data_pointers();
   part->DeleteBoxes();
   delete (part);
 }

◆ TEST_F() [10/34]

tesseract::TEST_F	(	EquationFinderTest	,
		SplitCPHorLite
	)

Definition at line 471 of file equationdetect_test.cc.

                                            {
   TBOX box(0, 0, 999, 99);
   ColPartition* part =
       ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
   part->DeleteBoxes();
   part->set_median_width(10);
   GenericVector<TBOX> splitted_boxes;
  
   // Test an empty part.
   equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
   EXPECT_TRUE(splitted_boxes.empty());
  
   // Test with one blob.
   AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
   equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
   EXPECT_EQ(1, splitted_boxes.size());
   EXPECT_TRUE(TBOX(0, 0, 10, 50) == splitted_boxes[0]);
  
   // Add more blob and test.
   AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
   AddBlobIntoPart(TBOX(25, 0, 30, 55), part);  // break point.
   AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
   AddBlobIntoPart(TBOX(125, 0, 140, 45), part);  // break point.
   AddBlobIntoPart(TBOX(500, 0, 540, 35), part);  // break point.
   equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
   // Verify.
   EXPECT_EQ(3, splitted_boxes.size());
   EXPECT_TRUE(TBOX(0, 0, 30, 60) == splitted_boxes[0]);
   EXPECT_TRUE(TBOX(100, 0, 140, 45) == splitted_boxes[1]);
   EXPECT_TRUE(TBOX(500, 0, 540, 35) == splitted_boxes[2]);
  
   part->DeleteBoxes();
   delete (part);
 }

◆ TEST_F() [11/34]

tesseract::TEST_F	(	HeapTest	,
		DoublePtrTest
	)

Definition at line 187 of file heap_test.cc.

                                 {
   DoublePtr ptr1;
   DoublePtr ptr2;
   ptr1.Connect(&ptr2);
   // Check that the correct copy constructor is used.
   DoublePtr ptr3(ptr1);
   EXPECT_EQ(&ptr3, ptr3.OtherEnd()->OtherEnd());
   EXPECT_TRUE(ptr1.OtherEnd() == nullptr);
   // Check that the correct operator= is used.
   ptr1 = ptr3;
   EXPECT_EQ(&ptr1, ptr1.OtherEnd()->OtherEnd());
   EXPECT_TRUE(ptr3.OtherEnd() == nullptr);
 }

◆ TEST_F() [12/34]

tesseract::TEST_F	(	HeapTest	,
		MixedTest
	)

Definition at line 95 of file heap_test.cc.

                             {
   GenericHeap<IntKDPair> heap;
   KDVector v;
   // Push the test data onto both the heap and the KDVector.
   PushTestData(&heap, &v);
   // Sort the vector and remove the first 5 values from both heap and v.
   v.sort();
   for (int i = 0; i < 5; ++i) {
     heap.Pop(nullptr);
     v.remove(0);
   }
   // Push the test data onto both the heap and the KDVector.
   PushTestData(&heap, &v);
   // Heap and vector should still match!
   VerifyHeapVectorMatch(&heap, &v);
 }

◆ TEST_F() [13/34]

tesseract::TEST_F	(	HeapTest	,
		PopWorstTest
	)

Definition at line 114 of file heap_test.cc.

                                {
   GenericHeap<IntKDPair> heap;
   KDVector v;
   // Push the test data onto both the heap and the KDVector.
   PushTestData(&heap, &v);
   // Get the worst element off the heap.
   IntKDPair pair;
   heap.PopWorst(&pair);
   EXPECT_EQ(pair.key, 65536);
   EXPECT_EQ(pair.data, 6);
   // Sort and remove the worst element from the vector.
   v.sort();
   v.truncate(v.size() - 1);
   // After that they should still match!
   VerifyHeapVectorMatch(&heap, &v);
 }

◆ TEST_F() [14/34]

tesseract::TEST_F	(	HeapTest	,
		RevalueTest
	)

Definition at line 133 of file heap_test.cc.

                               {
   // Here the data element of the pair is a DoublePtr, which links the entries
   // in the vector and heap, and we test a MAX heap.
   typedef KDPairDec<int, DoublePtr> PtrPair;
   GenericHeap<PtrPair> heap;
   GenericVector<PtrPair> v;
   // Push the test data onto both the heap and the vector.
   for (size_t i = 0; i < ARRAYSIZE(test_data); ++i) {
     PtrPair h_pair;
     h_pair.key = test_data[i];
     PtrPair v_pair;
     v_pair.key = test_data[i];
     h_pair.data.Connect(&v_pair.data);
     heap.Push(&h_pair);
     v.push_back(v_pair);
   }
   // Test changes both ways. Index 0 is 8, so change it to -1.
   v[0].key = -1;
   // v[0].data.OtherEnd() is a pointer to the data element in the appropriate
   // heap entry, wherever it may be. We can change its value via that pointer.
   // Without Reshuffle, that would be a terribly bad thing to do, as it violates
   // the heap invariant, making the heap corrupt.
   PtrPair* pair_ptr = PtrPair::RecastDataPointer(v[0].data.OtherEnd());
   pair_ptr->key = v[0].key;
   heap.Reshuffle(pair_ptr);
   // Index 1 is 1. Change to 32767.
   v[1].key = 32767;
   pair_ptr = PtrPair::RecastDataPointer(v[1].data.OtherEnd());
   pair_ptr->key = v[1].key;
   heap.Reshuffle(pair_ptr);
   // After the changes, popping the heap should still match the sorted order
   // of the vector.
   v.sort();
   EXPECT_GT(v[0].key, v.back().key);
   for (int i = 0; i < v.size(); ++i) {
     EXPECT_EQ(v[i].key, heap.PeekTop().key);
     EXPECT_FALSE(heap.empty());
     heap.Pop(nullptr);
   }
   EXPECT_TRUE(heap.empty());
 }

◆ TEST_F() [15/34]

tesseract::TEST_F	(	HeapTest	,
		SortTest
	)

Definition at line 82 of file heap_test.cc.

                            {
   GenericHeap<IntKDPair> heap;
   EXPECT_TRUE(heap.empty());
   KDVector v;
   EXPECT_EQ(heap.size(), v.size());
   // Push the test data onto both the heap and the KDVector.
   PushTestData(&heap, &v);
   VerifyHeapVectorMatch(&heap, &v);
 }

◆ TEST_F() [16/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		BasicTest
	)

Definition at line 29 of file lstm_test.cc.

                                    {
   // A Convolver sliding window classifier without LSTM.
   SetupTrainer(
       "[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
       "Ct1,1,64O1c1]",
       "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false,
       2e-4, false, "eng");
   double non_lstm_err = TrainIterations(kTrainerIterations * 4);
   EXPECT_LT(non_lstm_err, 98);
   LOG(INFO) << "********** Expected  < 98 ************\n" ;
  
   // A basic single-layer, single direction LSTM.
   SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false);
   double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
   EXPECT_LT(lstm_uni_err, 86);
    LOG(INFO) << "********** Expected  < 86 ************\n" ;
   // Beats the convolver. (Although it does have a lot more weights, it still
   // iterates faster.)
   EXPECT_LT(lstm_uni_err, non_lstm_err);
 }

◆ TEST_F() [17/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		BidiTest
	)

Definition at line 61 of file lstm_test.cc.

                                   {
   // A basic single-layer, bi-di 1d LSTM.
   SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false);
   double lstm_bi_err = TrainIterations(kTrainerIterations);
   EXPECT_LT(lstm_bi_err, 75);
   LOG(INFO) << "********** Expected   < 75 ************\n" ;
   // Int mode training is dead, so convert the trained network to int and check
   // that its error rate is close to the float version.
   TestIntMode(kTrainerIterations);
 }

◆ TEST_F() [18/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		ColorTest
	)

Definition at line 51 of file lstm_test.cc.

                                    {
   // A basic single-layer, single direction LSTM.
   SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
                   "2D-color-lstm", true, true);
   double lstm_uni_err = TrainIterations(kTrainerIterations);
   EXPECT_LT(lstm_uni_err, 85);
 //  EXPECT_GT(lstm_uni_err, 66);
   LOG(INFO) << "********** Expected  < 85 ************\n" ;
 }

◆ TEST_F() [19/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		DeterminismTest
	)

Definition at line 111 of file lstm_test.cc.

                                          {
   SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
                   "2-D-2-layer-lstm", false, false);
   double lstm_2d_err_a = TrainIterations(kTrainerIterations);
   double act_error_a = trainer_->ActivationError();
   double char_error_a = trainer_->CharError();
   GenericVector<char> trainer_a_data;
   EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
                                          &trainer_a_data));
   SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
                   "2-D-2-layer-lstm", false, false);
   double lstm_2d_err_b = TrainIterations(kTrainerIterations);
   double act_error_b = trainer_->ActivationError();
   double char_error_b = trainer_->CharError();
   EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
   EXPECT_FLOAT_EQ(act_error_a, act_error_b);
   EXPECT_FLOAT_EQ(char_error_a, char_error_b);
   // Now train some more iterations.
   lstm_2d_err_b = TrainIterations(kTrainerIterations / 3);
   act_error_b = trainer_->ActivationError();
   char_error_b = trainer_->CharError();
   // Unpack into a new trainer and train that some more too.
   SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
                   "2-D-2-layer-lstm", false, false);
   EXPECT_TRUE(trainer_->ReadTrainingDump(trainer_a_data, trainer_.get()));
   lstm_2d_err_a = TrainIterations(kTrainerIterations / 3);
   act_error_a = trainer_->ActivationError();
   char_error_a = trainer_->CharError();
   EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
   EXPECT_FLOAT_EQ(act_error_a, act_error_b);
   EXPECT_FLOAT_EQ(char_error_a, char_error_b);
   LOG(INFO) << "********** *** ************\n" ;
 }

◆ TEST_F() [20/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		EncodeDecodeBothTestKor
	)

Definition at line 41 of file lstm_recode_test.cc.

                                                  {
   TestEncodeDecodeBoth("kor", "한국어 위키백과에 오신 것을 환영합니다!");
 }

◆ TEST_F() [21/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		EncodedSoftmaxTest
	)

Definition at line 178 of file lstm_test.cc.

                                             {
   // LSTM with a built-in encoded softmax can beat the external softmax.
   SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true);
   double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
   EXPECT_LT(lstm_sm_err, 62.0);
   LOG(INFO) << "********** Expected   < 62 ************\n" ;
   // Check that it works in int mode too.
   TestIntMode(kTrainerIterations);
 }

◆ TEST_F() [22/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		RecodeTestKor
	)

Definition at line 29 of file lstm_recode_test.cc.

                                        {
   // A basic single-layer, bi-di 1d LSTM on Korean.
   SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",
                "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");
   double kor_recode_err = TrainIterations(kTrainerIterations);
   EXPECT_LT(kor_recode_err, 60);
   LOG(INFO) << "********** Expected  < 60 ************\n" ;
 }

◆ TEST_F() [23/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		RecodeTestKorBase
	)

Definition at line 19 of file lstm_recode_test.cc.

                                            {
   // A basic single-layer, bi-di 1d LSTM on Korean.
   SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",
                "kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");
   double kor_full_err = TrainIterations(kTrainerIterations * 2);
   EXPECT_LT(kor_full_err, 88);
 //  EXPECT_GT(kor_full_err, 85);
   LOG(INFO) << "********** Expected  < 88 ************\n" ;
 }

◆ TEST_F() [24/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		SoftmaxBaselineTest
	)

Definition at line 146 of file lstm_test.cc.

                                              {
   // A basic single-layer, single direction LSTM.
   SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true);
   double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
   EXPECT_LT(lstm_uni_err, 60);
 //  EXPECT_GT(lstm_uni_err, 48);
   LOG(INFO) << "********** Expected  < 60 ************\n" ;
   // Check that it works in int mode too.
   TestIntMode(kTrainerIterations);
   // If we run TestIntMode again, it tests that int_mode networks can
   // serialize and deserialize correctly.
   double delta = TestIntMode(kTrainerIterations);
   // The two tests (both of int mode this time) should be almost identical.
   LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n";
   EXPECT_LT(delta, 0.01);
 }

◆ TEST_F() [25/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		SoftmaxTest
	)

Definition at line 166 of file lstm_test.cc.

                                      {
   // LSTM with a built-in softmax can beat the external softmax.
   SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true);
   double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
   EXPECT_LT(lstm_sm_err, 49.0);
   LOG(INFO) << "********** Expected  < 49 ************\n" ;
   // Check that it works in int mode too.
   TestIntMode(kTrainerIterations);
 }

◆ TEST_F() [26/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		SpeedTest
	)

Definition at line 100 of file lstm_test.cc.

                                    {
   SetupTrainerEng(
       "[1,30,0,1 Ct5,5,16 Mp2,2 L2xy24 Ct1,1,48 Mp5,1 Ct1,1,32 S3,1 Lbx64 "
       "O1c1]",
       "2-D-2-layer-lstm", false, true);
   TrainIterations(kTrainerIterations);
    LOG(INFO) << "********** *** ************\n" ;
 }

◆ TEST_F() [27/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		Test2D
	)

Definition at line 74 of file lstm_test.cc.

                                 {
   // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
   SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
                   "2-D-2-layer-lstm", false, false);
   double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2 );
   EXPECT_LT(lstm_2d_err, 98);
 //  EXPECT_GT(lstm_2d_err, 90);
   LOG(INFO) << "********** Expected  < 98 ************\n" ;
   // Int mode training is dead, so convert the trained network to int and check
   // that its error rate is close to the float version.
   TestIntMode(kTrainerIterations);
 }

◆ TEST_F() [28/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		TestAdam
	)

Definition at line 89 of file lstm_test.cc.

                                   {
   // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
   SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
                   "2-D-2-layer-lstm", false, true);
   double lstm_2d_err = TrainIterations(kTrainerIterations);
   EXPECT_LT(lstm_2d_err, 70);
   LOG(INFO) << "********** Expected   < 70 ************\n" ;
   TestIntMode(kTrainerIterations);
 }

◆ TEST_F() [29/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		TestLayerAccess
	)

Definition at line 189 of file lstm_test.cc.

                                          {
   // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom.
   SetupTrainerEng("[1,32,0,1 Ct5,5,16 Mp2,2 Lfys32 Lbx128 O1c1]", "SQU-lstm",
                   false, false);
   // Number of layers.
   const int kNumLayers = 8;
   // Expected layer names.
   const char* kLayerIds[kNumLayers] = {":0",   ":1:0", ":1:1",   ":2",
                                        ":3:0", ":4:0", ":4:1:0", ":5"};
   const char* kLayerNames[kNumLayers] = {"Input",   "Convolve", "ConvNL",
                                          "Maxpool", "Lfys32",   "Lbx128LTR",
                                          "Lbx128",  "Output"};
   // Expected number of weights.
   const int kNumWeights[kNumLayers] = {0,
                                        0,
                                        16 * (25 + 1),
                                        0,
                                        32 * (4 * (32 + 16 + 1)),
                                        128 * (4 * (128 + 32 + 1)),
                                        128 * (4 * (128 + 32 + 1)),
                                        112 * (2 * 128 + 1)};
  
   GenericVector<STRING> layers = trainer_->EnumerateLayers();
   EXPECT_EQ(kNumLayers, layers.size());
   for (int i = 0; i < kNumLayers && i < layers.size(); ++i) {
     EXPECT_STREQ(kLayerIds[i], layers[i].c_str());
     EXPECT_STREQ(kLayerNames[i],
                  trainer_->GetLayer(layers[i])->name().c_str());
     EXPECT_EQ(kNumWeights[i], trainer_->GetLayer(layers[i])->num_weights());
   }
 }

◆ TEST_F() [30/34]

tesseract::TEST_F	(	LSTMTrainerTest	,
		TestSquashed
	)

Definition at line 18 of file lstm_squashed_test.cc.

                                       {
   // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom, and
   // a small convolution/maxpool below that.
   // Match training conditions to those typically used with this spec:
   // recoding on, adam on.
   SetupTrainerEng("[1,32,0,1 Ct3,3,16 Mp3,3 Lfys48 Lbx96 O1c1]",
                   "SQU-2-layer-lstm", /*recode*/ true, /*adam*/ true);
   double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
   EXPECT_LT(lstm_2d_err, 80);
   LOG(INFO) << "********** < 80 ************\n" ;
   TestIntMode(kTrainerIterations);
 }

◆ TEST_F() [31/34]

tesseract::TEST_F	(	NthItemTest	,
		BoringTest
	)

Definition at line 69 of file nthitem_test.cc.

                                 {
   KDVector v;
   // Push the test data onto the KDVector.
   int test_data[] = {8, 8, 8, 8, 8, 7, 7, 7, 7};
   for (size_t i = 0; i < ARRAYSIZE(test_data); ++i) {
     IntKDPair pair(test_data[i], i);
     v.push_back(pair);
   }
   // The 3rd item is 7 but the 4th is 8..
   int index = v.choose_nth_item(3);
   // The result is 7.
   EXPECT_EQ(7, v[index].key);
   index = v.choose_nth_item(4);
   // The result is 8.
   EXPECT_EQ(8, v[index].key);
   // Get the min item.
   index = v.choose_nth_item(0);
   // The result is 7.
   EXPECT_EQ(7, v[index].key);
   // Get the max item.
   index = v.choose_nth_item(v.size() - 1);
   // The result is 8.
   EXPECT_EQ(8, v[index].key);
 }

◆ TEST_F() [32/34]

tesseract::TEST_F	(	NthItemTest	,
		EqualTest
	)

Definition at line 107 of file nthitem_test.cc.

                                {
   KDVector v;
   // Push the test data onto the KDVector.
   PushTestData(&v);
   // Add an extra 8. This makes the median 7.
   IntKDPair pair(8, 13);
   v.push_back(pair);
   // Get the median item.
   int index = v.choose_nth_item(v.size() / 2);
   // The result is 7, it started out at index 4 or 12.
   EXPECT_EQ(7, v[index].key);
   EXPECT_TRUE(v[index].data == 4 || v[index].data == 12);
 }

◆ TEST_F() [33/34]

tesseract::TEST_F	(	NthItemTest	,
		GeneralTest
	)

Definition at line 45 of file nthitem_test.cc.

                                  {
   KDVector v;
   // Push the test data onto the KDVector.
   PushTestData(&v);
   // Get the min item.
   int index = v.choose_nth_item(0);
   // The result is -32767.
   EXPECT_EQ(-32767, v[index].key);
   // Get the max item.
   index = v.choose_nth_item(v.size() - 1);
   // The result is 65536.
   EXPECT_EQ(65536, v[index].key);
   // Invalid items are silently truncated to valid.
   // Get the min item.
   index = v.choose_nth_item(-1);
   // The result is -32767.
   EXPECT_EQ(-32767, v[index].key);
   // Get the max item.
   index = v.choose_nth_item(v.size());
   // The result is 65536.
   EXPECT_EQ(65536, v[index].key);
 }

◆ TEST_F() [34/34]

tesseract::TEST_F	(	NthItemTest	,
		UniqueTest
	)

Definition at line 95 of file nthitem_test.cc.

                                 {
   KDVector v;
   // Push the test data onto the KDVector.
   PushTestData(&v);
   // Get the median item.
   int index = v.choose_nth_item(v.size() / 2);
   // The result is 6, it started out at index 11.
   EXPECT_EQ(6, v[index].key);
   EXPECT_EQ(11, v[index].data);
 }

◆ TraceBlockOnReducedPix()

Pix * tesseract::TraceBlockOnReducedPix	(	BLOCK *	block,
		int	gridsize,
		ICOORD	bleft,
		int *	left,
		int *	bottom
	)

Definition at line 254 of file bbgrid.cpp.

                                                                   {
   const TBOX& box = block->pdblk.bounding_box();
   Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
   int wpl = pixGetWpl(pix);
   l_uint32* data = pixGetData(pix);
   ICOORDELT_IT it(block->pdblk.poly_block()->points());
   for (it.mark_cycle_pt(); !it.cycled_list();) {
     ICOORD pos = *it.data();
     it.forward();
     ICOORD next_pos = *it.data();
     ICOORD line_vector = next_pos - pos;
     int major, minor;
     ICOORD major_step, minor_step;
     line_vector.setup_render(&major_step, &minor_step, &major, &minor);
     int accumulator = major / 2;
     while (pos != next_pos) {
       int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
       int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
       SET_DATA_BIT(data + grid_y * wpl, grid_x);
       pos += major_step;
       accumulator += minor;
       if (accumulator >= major) {
         accumulator -= major;
         pos += minor_step;
       }
     }
   }
   return pix;
 }

◆ TraceOutlineOnReducedPix()

Pix * tesseract::TraceOutlineOnReducedPix	(	C_OUTLINE *	outline,
		int	gridsize,
		ICOORD	bleft,
		int *	left,
		int *	bottom
	)

Definition at line 228 of file bbgrid.cpp.

                                                                     {
   const TBOX& box = outline->bounding_box();
   Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
   int wpl = pixGetWpl(pix);
   l_uint32* data = pixGetData(pix);
   int length = outline->pathlength();
   ICOORD pos = outline->start_pos();
   for (int i = 0; i < length; ++i) {
     int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
     int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
     SET_DATA_BIT(data + grid_y * wpl, grid_x);
     pos += outline->step(i);
   }
   return pix;
 }

◆ UnicodeFor()

int tesseract::UnicodeFor	(	const UNICHARSET *	u,
		const WERD_CHOICE *	werd,
		int	pos
	)

Definition at line 303 of file paragraphs.cpp.

310 :

311 const UNICHARSET *u_;

◆ ValidBodyLine()

bool tesseract::ValidBodyLine	(	const GenericVector< RowScratchRegisters > *	rows,
		int	row,
		const ParagraphModel *	model
	)

Definition at line 1303 of file paragraphs.cpp.

                             {
     return NearlyEqual(row_a.rindent_ + row_a.rmargin_,
                        row_b.rindent_ + row_b.rmargin_,
                        Epsilon(row_a.ri_->average_interword_space));
   }
   return NearlyEqual(row_a.lindent_ + row_a.lmargin_,

◆ ValidFirstLine()

bool tesseract::ValidFirstLine	(	const GenericVector< RowScratchRegisters > *	rows,
		int	row,
		const ParagraphModel *	model
	)

Definition at line 1292 of file paragraphs.cpp.

1300 {

1301 if (model != kCrownRight && model != kCrownLeft) {

◆ write_info()

bool tesseract::write_info	(	FILE *	f,
		const FontInfo &	fi
	)

Definition at line 166 of file fontinfo.cpp.

                                              {
   int32_t size = strlen(fi.name);
   return tesseract::Serialize(f, &size) &&
          tesseract::Serialize(f, &fi.name[0], size) &&
          tesseract::Serialize(f, &fi.properties);
 }

◆ write_set()

bool tesseract::write_set	(	FILE *	f,
		const FontSet &	fs
	)

Definition at line 235 of file fontinfo.cpp.

                                            {
   return tesseract::Serialize(f, &fs.size) &&
          tesseract::Serialize(f, &fs.configs[0], fs.size);
 }

◆ write_spacing_info()

bool tesseract::write_spacing_info	(	FILE *	f,
		const FontInfo &	fi
	)

Definition at line 201 of file fontinfo.cpp.

                                                      {
   int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
   if (!tesseract::Serialize(f, &vec_size)) return false;
   int16_t x_gap_invalid = -1;
   for (int i = 0; i < vec_size; ++i) {
     FontSpacingInfo *fs = fi.spacing_vec->get(i);
     int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size();
     if (fs == nullptr) {
       // Writing two invalid x-gaps.
       if (!tesseract::Serialize(f, &x_gap_invalid, 2) ||
           !tesseract::Serialize(f, &kern_size)) {
         return false;
       }
     } else {
       if (!tesseract::Serialize(f, &fs->x_gap_before) ||
           !tesseract::Serialize(f, &fs->x_gap_after) ||
           !tesseract::Serialize(f, &kern_size)) {
         return false;
       }
     }
     if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) ||
                           !fs->kerned_x_gaps.Serialize(f))) {
       return false;
     }
   }
   return true;
 }

◆ WriteFile()

bool tesseract::WriteFile	(	const std::string &	output_dir,
		const std::string &	lang,
		const std::string &	suffix,
		const GenericVector< char > &	data,
		FileWriter	writer
	)

Definition at line 36 of file lang_model_helpers.cpp.

                                   {
   if (lang.empty()) return true;
   std::string dirname = output_dir + "/" + lang;
   // Attempt to make the directory, but ignore errors, as it may not be a
   // standard filesystem, and the writer will complain if not successful.
 #if defined(_WIN32)
   _mkdir(dirname.c_str());
 #else
   mkdir(dirname.c_str(), S_IRWXU | S_IRWXG);
 #endif
   std::string filename = dirname + "/" + lang + suffix;
   if (writer == nullptr)
     return SaveDataToFile(data, filename.c_str());
   else
     return (*writer)(data, filename.c_str());
 }

◆ WriteRecoder()

bool tesseract::WriteRecoder	(	const UNICHARSET &	unicharset,
		bool	pass_through,
		const std::string &	output_dir,
		const std::string &	lang,
		FileWriter	writer,
		STRING *	radical_table_data,
		TessdataManager *	traineddata
	)

Definition at line 85 of file lang_model_helpers.cpp.

                                                 {
   UnicharCompress recoder;
   // Where the unicharset is carefully setup already to contain a good
   // compact encoding, use a pass-through recoder that does nothing.
   // For scripts that have a large number of unicodes (Han, Hangul) we want
   // to use the recoder to compress the symbol space by re-encoding each
   // unicode as multiple codes from a smaller 'alphabet' that are related to the
   // shapes in the character. Hangul Jamo is a perfect example of this.
   // See the Hangul Syllables section, sub-section "Equivalence" in:
   // http://www.unicode.org/versions/Unicode10.0.0/ch18.pdf
   if (pass_through) {
     recoder.SetupPassThrough(unicharset);
   } else {
     int null_char =
         unicharset.has_special_codes() ? UNICHAR_BROKEN : unicharset.size();
     tprintf("Null char=%d\n", null_char);
     if (!recoder.ComputeEncoding(unicharset, null_char, radical_table_data)) {
       tprintf("Creation of encoded unicharset failed!!\n");
       return false;
     }
   }
   TFile fp;
   GenericVector<char> recoder_data;
   fp.OpenWrite(&recoder_data);
   if (!recoder.Serialize(&fp)) return false;
   traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0],
                               recoder_data.size());
   STRING encoding = recoder.GetEncodingAsString(unicharset);
   recoder_data.init_to_size(encoding.length(), 0);
   memcpy(&recoder_data[0], &encoding[0], encoding.length());
   STRING suffix;
   suffix.add_str_int(".charset_size=", recoder.code_range());
   suffix += ".txt";
   return WriteFile(output_dir, lang, suffix.c_str(), recoder_data, writer);
 }

◆ WriteShapeTable()

void tesseract::WriteShapeTable	(	const STRING &	file_prefix,
		const ShapeTable &	shape_table
	)

Definition at line 179 of file commontraining.cpp.

                                                                                {
   STRING shape_table_file = file_prefix;
   shape_table_file += kShapeTableFileSuffix;
   FILE* fp = fopen(shape_table_file.c_str(), "wb");
   if (fp != nullptr) {
     if (!shape_table.Serialize(fp)) {
       fprintf(stderr, "Error writing shape table: %s\n",
               shape_table_file.c_str());
     }
     fclose(fp);
   } else {
     fprintf(stderr, "Error creating shape table: %s\n",
             shape_table_file.c_str());
   }
 }

◆ WriteUnicharset()

bool tesseract::WriteUnicharset	(	const UNICHARSET &	unicharset,
		const std::string &	output_dir,
		const std::string &	lang,
		FileWriter	writer,
		TessdataManager *	traineddata
	)

Definition at line 71 of file lang_model_helpers.cpp.

                                                    {
   GenericVector<char> unicharset_data;
   TFile fp;
   fp.OpenWrite(&unicharset_data);
   if (!unicharset.save_to_file(&fp)) return false;
   traineddata->OverwriteEntry(TESSDATA_LSTM_UNICHARSET, &unicharset_data[0],
                               unicharset_data.size());
   return WriteFile(output_dir, lang, ".unicharset", unicharset_data, writer);
 }

◆ ZeroVector()

template<typename T >

void tesseract::ZeroVector	(	int	n,
		T *	vec
	)

inline

Definition at line 202 of file functions.h.

                                       {
   memset(vec, 0, n * sizeof(*vec));
 }

Variable Documentation

◆ _TFNetworkModel_default_instance_

TFNetworkModelDefaultTypeInternal tesseract::_TFNetworkModel_default_instance_

Definition at line 51 of file tfnetwork.pb.h.

◆ case_state_table

const int tesseract::case_state_table[6][4]

Initial value:

= {
    {
     
     
     0, 1, 5, 4},
    {
     0, 3, 2, 4},
    {
     0, -1, 2, -1},
    {
     0, 3, -1, 4},
    {
     0, -1, -1, 4},
    {
     5, -1, 2, -1},
}

Definition at line 44 of file context.cpp.

◆ DotProduct

DotProductFunction tesseract::DotProduct

Definition at line 50 of file simddetect.cpp.

◆ kAdamCorrectionIterations

const int tesseract::kAdamCorrectionIterations = 200000

Definition at line 35 of file weightmatrix.cpp.

◆ kAdamEpsilon

const double tesseract::kAdamEpsilon = 1e-8

Definition at line 37 of file weightmatrix.cpp.

◆ kAdamFlag

const int tesseract::kAdamFlag = 4

Definition at line 165 of file weightmatrix.cpp.

◆ kAdjacentLeaderSearchPadding

const int tesseract::kAdjacentLeaderSearchPadding = 2

Definition at line 116 of file tablefind.cpp.

◆ kAlignedFraction

const double tesseract::kAlignedFraction = 0.03125

Definition at line 38 of file alignedblob.cpp.

◆ kAlignedGapFraction

const double tesseract::kAlignedGapFraction = 0.75

Definition at line 42 of file alignedblob.cpp.

◆ kAllowBlobArea

const double tesseract::kAllowBlobArea = 0.05

Definition at line 57 of file tablefind.cpp.

◆ kAllowBlobHeight

const double tesseract::kAllowBlobHeight = 0.3

Definition at line 55 of file tablefind.cpp.

◆ kAllowBlobWidth

const double tesseract::kAllowBlobWidth = 0.4

Definition at line 56 of file tablefind.cpp.

◆ kAllowTextArea

const double tesseract::kAllowTextArea = 0.8

Definition at line 50 of file tablefind.cpp.

◆ kAllowTextHeight

const double tesseract::kAllowTextHeight = 0.5

Definition at line 48 of file tablefind.cpp.

◆ kAllowTextWidth

const double tesseract::kAllowTextWidth = 0.6

Definition at line 49 of file tablefind.cpp.

◆ kBatchIterations

const int tesseract::kBatchIterations = 100

Definition at line 37 of file lstm_test.h.

◆ kBestCheckpointFraction

const double tesseract::kBestCheckpointFraction = 31.0 / 32.0

Definition at line 69 of file lstmtrainer.cpp.

◆ kBigPartSizeRatio

const double tesseract::kBigPartSizeRatio = 1.75

Definition at line 46 of file colpartitiongrid.cpp.

◆ kBoxClipTolerance

const int tesseract::kBoxClipTolerance = 2

Definition at line 31 of file boxword.cpp.

◆ kBrokenCJKIterationFraction

const double tesseract::kBrokenCJKIterationFraction = 0.125

Definition at line 67 of file strokewidth.cpp.

◆ kBytesPer64BitNumber

const int tesseract::kBytesPer64BitNumber = 20

Max bytes in the decimal representation of int64_t.

Definition at line 1501 of file baseapi.cpp.

◆ kBytesPerBoxFileLine

const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

Multiplier for max expected textlength assumes (kBytesPerNumber + space)

kNumbersPerBlob plus the newline. Add to this the original UTF8 characters, and one kMaxBytesPerLine for safety.

Definition at line 1499 of file baseapi.cpp.

◆ kBytesPerNumber

const int tesseract::kBytesPerNumber = 5

The number of bytes taken by each number. Since we use int16_t for ICOORD, assume only 5 digits max.

Definition at line 1493 of file baseapi.cpp.

◆ kCellSplitColumnThreshold

const int tesseract::kCellSplitColumnThreshold = 0

Definition at line 42 of file tablerecog.cpp.

◆ kCellSplitRowThreshold

const int tesseract::kCellSplitRowThreshold = 0

Definition at line 41 of file tablerecog.cpp.

◆ kCertaintyScale

const float tesseract::kCertaintyScale = 7.0f

Definition at line 35 of file linerec.cpp.

◆ kCertOffset

const double tesseract::kCertOffset = -0.085

Definition at line 50 of file lstmrecognizer.cpp.

◆ kCJKAspectRatio

const double tesseract::kCJKAspectRatio = 1.25

Definition at line 61 of file strokewidth.cpp.

◆ kCJKAspectRatioIncrease

const double tesseract::kCJKAspectRatioIncrease = 1.0625

Definition at line 63 of file strokewidth.cpp.

◆ kCJKBrokenDistanceFraction

const double tesseract::kCJKBrokenDistanceFraction = 0.25

Definition at line 57 of file strokewidth.cpp.

◆ kCJKMaxComponents

const int tesseract::kCJKMaxComponents = 8

Definition at line 59 of file strokewidth.cpp.

◆ kCJKRadius

const int tesseract::kCJKRadius = 2

Definition at line 55 of file strokewidth.cpp.

◆ kColumnWidthFactor

const int tesseract::kColumnWidthFactor = 20

Pixel resolution of column width estimates.

Definition at line 41 of file tabfind.h.

◆ kCosMaxSkewAngle

const double tesseract::kCosMaxSkewAngle = 0.866025

Definition at line 60 of file tabfind.cpp.

◆ kCrackSpacing

const int tesseract::kCrackSpacing = 100

Spacing of cracks across the page to break up tall vertical lines.

Definition at line 45 of file linefind.cpp.

◆ kCrownLeft

const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F))

Definition at line 69 of file paragraphs.cpp.

◆ kCrownRight

const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F))

Definition at line 71 of file paragraphs.cpp.

◆ kDefaultResolution

const int tesseract::kDefaultResolution = 300

Definition at line 69 of file pango_font_info.cpp.

◆ kDiacriticXPadRatio

const double tesseract::kDiacriticXPadRatio = 7.0

Definition at line 70 of file strokewidth.cpp.

◆ kDiacriticYPadRatio

const double tesseract::kDiacriticYPadRatio = 1.75

Definition at line 73 of file strokewidth.cpp.

◆ kDictRatio

const double tesseract::kDictRatio = 2.25

Definition at line 48 of file lstmrecognizer.cpp.

◆ kDoNotReverse

const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE"

Definition at line 49 of file trie.cpp.

◆ kDoubleFlag

const int tesseract::kDoubleFlag = 128

Definition at line 169 of file weightmatrix.cpp.

◆ kErrClip

const double tesseract::kErrClip = 1.0f

Definition at line 71 of file lstm.cpp.

◆ kErrorGraphInterval

const int tesseract::kErrorGraphInterval = 1000

Definition at line 57 of file lstmtrainer.cpp.

◆ kExposureFactor

const int tesseract::kExposureFactor = 16

Definition at line 75 of file degradeimage.cpp.

◆ kFeaturePadding

const int tesseract::kFeaturePadding = 2

Definition at line 36 of file imagedata.h.

◆ kFontMergeDistance

const float tesseract::kFontMergeDistance = 0.025

Definition at line 48 of file mastertrainer.cpp.

◆ kForceReverse

const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE"

Definition at line 51 of file trie.cpp.

◆ kGoodRowNumberOfColumnsLarge

const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7

Definition at line 60 of file tablerecog.cpp.

◆ kGoodRowNumberOfColumnsSmall

const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }

Definition at line 56 of file tablerecog.cpp.

◆ kGoodRowNumberOfColumnsSmallSize

const int tesseract::kGoodRowNumberOfColumnsSmallSize

Initial value:

=

sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1

Definition at line 57 of file tablerecog.cpp.

◆ kGutterMultiple

const int tesseract::kGutterMultiple = 4

Definition at line 35 of file tabvector.cpp.

◆ kGutterToNeighbourRatio

const int tesseract::kGutterToNeighbourRatio = 3

Definition at line 37 of file tabvector.cpp.

◆ kHighConfidence

const double tesseract::kHighConfidence = 0.9375

Definition at line 65 of file lstmtrainer.cpp.

◆ kHistogramBuckets

const int tesseract::kHistogramBuckets = 16

Definition at line 367 of file weightmatrix.cpp.

◆ kHistogramSize

const int tesseract::kHistogramSize = 256

Definition at line 27 of file otsuthr.h.

◆ kHorizontalGapMergeFraction

const double tesseract::kHorizontalGapMergeFraction = 0.5

Definition at line 49 of file colfind.cpp.

◆ kHorizontalSpacing

const double tesseract::kHorizontalSpacing = 0.30

Definition at line 35 of file tablerecog.cpp.

◆ kHorzStrongTextlineAspect

const int tesseract::kHorzStrongTextlineAspect = 5

Definition at line 67 of file colpartition.cpp.

◆ kHorzStrongTextlineCount

const int tesseract::kHorzStrongTextlineCount = 8

Definition at line 63 of file colpartition.cpp.

◆ kHorzStrongTextlineHeight

const int tesseract::kHorzStrongTextlineHeight = 10

Definition at line 65 of file colpartition.cpp.

◆ kImagePadding

const int tesseract::kImagePadding = 4

Definition at line 38 of file imagedata.h.

◆ kImprovementFraction

const double tesseract::kImprovementFraction = 15.0 / 16.0

Definition at line 67 of file lstmtrainer.cpp.

◆ kInfiniteDist

const float tesseract::kInfiniteDist = 999.0f

Definition at line 905 of file mastertrainer.cpp.

◆ kInt8Flag

const int tesseract::kInt8Flag = 1

Definition at line 163 of file weightmatrix.cpp.

◆ kLargeTableProjectionThreshold

const double tesseract::kLargeTableProjectionThreshold = 0.45

Definition at line 106 of file tablefind.cpp.

◆ kLargeTableRowCount

const int tesseract::kLargeTableRowCount = 6

Definition at line 108 of file tablefind.cpp.

◆ kLatinChs

const int tesseract::kLatinChs[]

Initial value:

= {
  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
}

Latin chars corresponding to the unicode chars above.

Definition at line 1562 of file baseapi.cpp.

◆ kLearningRateDecay

const double tesseract::kLearningRateDecay = M_SQRT1_2

Definition at line 53 of file lstmtrainer.cpp.

◆ kLeftIndentAlignmentCountTh

const int tesseract::kLeftIndentAlignmentCountTh = 1

Definition at line 85 of file equationdetect.cpp.

◆ kLineCountReciprocal

const double tesseract::kLineCountReciprocal = 4.0

Definition at line 48 of file tabvector.cpp.

◆ kLinedTableMinHorizontalLines

const int tesseract::kLinedTableMinHorizontalLines = 3

Definition at line 45 of file tablerecog.cpp.

◆ kLinedTableMinVerticalLines

const int tesseract::kLinedTableMinVerticalLines = 3

Definition at line 44 of file tablerecog.cpp.

◆ kLineFindGridSize

const int tesseract::kLineFindGridSize = 50

Grid size used by line finder. Not very critical.

Definition at line 47 of file linefind.cpp.

◆ kLineFragmentAspectRatio

const double tesseract::kLineFragmentAspectRatio = 10.0

Definition at line 54 of file tabfind.cpp.

◆ kLineResidueAspectRatio

const double tesseract::kLineResidueAspectRatio = 8.0

Definition at line 94 of file strokewidth.cpp.

◆ kLineResiduePadRatio

const int tesseract::kLineResiduePadRatio = 3

Definition at line 96 of file strokewidth.cpp.

◆ kLineResidueSizeRatio

const double tesseract::kLineResidueSizeRatio = 1.75

Definition at line 98 of file strokewidth.cpp.

◆ kLineTrapLongest

const int tesseract::kLineTrapLongest = 4

Definition at line 87 of file strokewidth.cpp.

◆ kLineTrapShortest

const int tesseract::kLineTrapShortest = 2

Definition at line 89 of file strokewidth.cpp.

◆ kLRM

const char *const tesseract::kLRM = "\u200E"

Left-to-Right Mark.

Definition at line 38 of file unicodes.cpp.

◆ kMarginFactor

const double tesseract::kMarginFactor = 1.1

Definition at line 50 of file tablerecog.cpp.

◆ kMarginOverlapFraction

const double tesseract::kMarginOverlapFraction = 0.25

Definition at line 44 of file colpartitiongrid.cpp.

◆ kMathDigitDensityTh1

const float tesseract::kMathDigitDensityTh1 = 0.25

Definition at line 80 of file equationdetect.cpp.

◆ kMathDigitDensityTh2

const float tesseract::kMathDigitDensityTh2 = 0.1

Definition at line 81 of file equationdetect.cpp.

◆ kMathItalicDensityTh

const float tesseract::kMathItalicDensityTh = 0.5

Definition at line 82 of file equationdetect.cpp.

◆ kMaxAmbigStringSize

const int tesseract::kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1)

Definition at line 41 of file ambigs.cpp.

◆ kMaxBaselineError

const double tesseract::kMaxBaselineError = 0.4375

Definition at line 70 of file colpartition.cpp.

◆ kMaxBlobOverlapFactor

const double tesseract::kMaxBlobOverlapFactor = 4.0

Definition at line 76 of file tablefind.cpp.

◆ kMaxBlobWidth

const int tesseract::kMaxBlobWidth = 500

Definition at line 39 of file tablefind.cpp.

◆ kMaxBoxEdgeDiff

const int16_t tesseract::kMaxBoxEdgeDiff = 2

Definition at line 32 of file recogtraining.cpp.

◆ kMaxBoxesInDataPartition

const int tesseract::kMaxBoxesInDataPartition = 20

Definition at line 65 of file tablefind.cpp.

◆ kMaxBytesPerLine

const int tesseract::kMaxBytesPerLine

Initial value:

= kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +

UNICHAR_LEN

A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.

Definition at line 1508 of file baseapi.cpp.

◆ kMaxCaptionLines

const int tesseract::kMaxCaptionLines = 7

Definition at line 38 of file colpartitiongrid.cpp.

◆ kMaxCharTopRange

const int tesseract::kMaxCharTopRange = 48

Definition at line 82 of file fixxht.cpp.

◆ kMaxCircleErosions

const int tesseract::kMaxCircleErosions = 8

Definition at line 66 of file pagesegmain.cpp.

◆ kMaxCJKSizeRatio

const int tesseract::kMaxCJKSizeRatio = 5

Definition at line 65 of file strokewidth.cpp.

◆ kMaxColorDistance

const int tesseract::kMaxColorDistance = 900

Definition at line 77 of file colpartition.cpp.

◆ kMaxColumnHeaderDistance

const int tesseract::kMaxColumnHeaderDistance = 4

Definition at line 84 of file tablefind.cpp.

◆ kMaxDiacriticDistanceRatio

const double tesseract::kMaxDiacriticDistanceRatio = 1.25

Definition at line 79 of file strokewidth.cpp.

◆ kMaxDiacriticGapToBaseCharHeight

const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0

Definition at line 82 of file strokewidth.cpp.

◆ kMaxDistToPartSizeRatio

const double tesseract::kMaxDistToPartSizeRatio = 1.5

Definition at line 54 of file colfind.cpp.

◆ kMaxFillinMultiple

const int tesseract::kMaxFillinMultiple = 11

Definition at line 44 of file tabvector.cpp.

◆ kMaxGapInTextPartition

const double tesseract::kMaxGapInTextPartition = 4.0

Definition at line 68 of file tablefind.cpp.

◆ kMaxGutterWidthAbsolute

const double tesseract::kMaxGutterWidthAbsolute = 2.00

Definition at line 49 of file tabfind.cpp.

◆ kMaxIncompatibleColumnCount

const int tesseract::kMaxIncompatibleColumnCount = 2

Definition at line 46 of file colfind.cpp.

◆ kMaxInputHeight

const int tesseract::kMaxInputHeight = 48

Definition at line 28 of file input.cpp.

◆ kMaxIntSize

const int tesseract::kMaxIntSize = 22

Max string length of an int.

Definition at line 121 of file baseapi.cpp.

◆ kMaxLargeOverlapsWithMedium

const int tesseract::kMaxLargeOverlapsWithMedium = 12

Definition at line 43 of file ccnontextdetect.cpp.

◆ kMaxLargeOverlapsWithSmall

const int tesseract::kMaxLargeOverlapsWithSmall = 3

Definition at line 34 of file ccnontextdetect.cpp.

◆ kMaxLeaderGapFractionOfMax

const double tesseract::kMaxLeaderGapFractionOfMax = 0.25

Definition at line 53 of file colpartition.cpp.

◆ kMaxLeaderGapFractionOfMin

const double tesseract::kMaxLeaderGapFractionOfMin = 0.5

Definition at line 55 of file colpartition.cpp.

◆ kMaxLigature

const int tesseract::kMaxLigature = 0xfb17

Definition at line 64 of file ligature_table.cpp.

◆ kMaxLineLength

const int tesseract::kMaxLineLength = 1024

Definition at line 318 of file boxchar.cpp.

◆ kMaxLineResidue

const int tesseract::kMaxLineResidue = 6

Definition at line 53 of file linefind.cpp.

◆ kMaxMediumOverlapsWithSmall

const int tesseract::kMaxMediumOverlapsWithSmall = 12

Definition at line 39 of file ccnontextdetect.cpp.

◆ kMaxNeighbourDistFactor

const int tesseract::kMaxNeighbourDistFactor = 4

Definition at line 36 of file colpartitiongrid.cpp.

◆ kMaxNonLineDensity

const double tesseract::kMaxNonLineDensity = 0.25

Definition at line 58 of file linefind.cpp.

◆ kMaxOffsetDist

const int tesseract::kMaxOffsetDist = 32

Definition at line 32 of file intfeaturemap.cpp.

◆ kMaxOutputRegisters

constexpr int tesseract::kMaxOutputRegisters = 8

constexpr

Definition at line 35 of file intsimdmatrixavx2.cpp.

◆ kMaxPadFactor

const int tesseract::kMaxPadFactor = 6

Definition at line 33 of file colpartitiongrid.cpp.

◆ kMaxParagraphEndingLeftSpaceMultiple

const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0

Definition at line 125 of file tablefind.cpp.

◆ kMaxPartitionSpacing

const double tesseract::kMaxPartitionSpacing = 1.75

Definition at line 61 of file colpartitiongrid.cpp.

◆ kMaxRaggedSearch

const int tesseract::kMaxRaggedSearch = 25

Definition at line 39 of file tabfind.cpp.

◆ kMaxRealDistance

const int tesseract::kMaxRealDistance = 2.0

Definition at line 39 of file detlinefit.cpp.

◆ kMaxRectangularFraction

const double tesseract::kMaxRectangularFraction = 0.75

Definition at line 42 of file imagefind.cpp.

◆ kMaxRectangularGradient

const double tesseract::kMaxRectangularGradient = 0.1

Definition at line 45 of file imagefind.cpp.

◆ kMaxRMSColorNoise

const int tesseract::kMaxRMSColorNoise = 128

Definition at line 74 of file colpartition.cpp.

◆ kMaxRowSize

const double tesseract::kMaxRowSize = 2.5

Definition at line 53 of file tablerecog.cpp.

◆ kMaxSameBlockLineSpacing

const double tesseract::kMaxSameBlockLineSpacing = 3

Definition at line 49 of file colpartition.cpp.

◆ kMaxSizeRatio

const double tesseract::kMaxSizeRatio = 1.5

Definition at line 51 of file colpartition.cpp.

◆ kMaxSkewFactor

const int tesseract::kMaxSkewFactor = 15

Definition at line 64 of file alignedblob.cpp.

◆ kMaxSmallNeighboursPerPix

const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32

Definition at line 31 of file ccnontextdetect.cpp.

◆ kMaxSpacingDrift

const double tesseract::kMaxSpacingDrift = 1.0 / 72

Definition at line 43 of file colpartition.cpp.

◆ kMaxStaveHeight

const double tesseract::kMaxStaveHeight = 1.0

Definition at line 60 of file linefind.cpp.

◆ kMaxTableCellXheight

const double tesseract::kMaxTableCellXheight = 2.0

Definition at line 80 of file tablefind.cpp.

◆ kMaxTopSpacingFraction

const double tesseract::kMaxTopSpacingFraction = 0.25

Definition at line 46 of file colpartition.cpp.

◆ kMaxUnicharsPerCluster

const int tesseract::kMaxUnicharsPerCluster = 2000

Definition at line 46 of file mastertrainer.cpp.

◆ kMaxVerticalSearch

const int tesseract::kMaxVerticalSearch = 12

Definition at line 38 of file tabfind.cpp.

◆ kMaxVerticalSpacing

const int tesseract::kMaxVerticalSpacing = 500

Definition at line 37 of file tablefind.cpp.

◆ kMaxWinSize

const int tesseract::kMaxWinSize = 2000

Definition at line 50 of file network.cpp.

◆ kMaxXProjectionGapFactor

const double tesseract::kMaxXProjectionGapFactor = 2.0

Definition at line 135 of file tablefind.cpp.

◆ kMinAlignedGutter

const double tesseract::kMinAlignedGutter = 0.25

Definition at line 50 of file tabvector.cpp.

◆ kMinAlignedTabs

const int tesseract::kMinAlignedTabs = 4

Definition at line 54 of file alignedblob.cpp.

◆ kMinBaselineCoverage

const double tesseract::kMinBaselineCoverage = 0.5

Definition at line 72 of file colpartition.cpp.

◆ kMinBoxesInTextPartition

const int tesseract::kMinBoxesInTextPartition = 10

Definition at line 62 of file tablefind.cpp.

◆ kMinCaptionGapHeightRatio

const double tesseract::kMinCaptionGapHeightRatio = 0.5

Definition at line 42 of file colpartitiongrid.cpp.

◆ kMinCaptionGapRatio

const double tesseract::kMinCaptionGapRatio = 2.0

Definition at line 40 of file colpartitiongrid.cpp.

◆ kMinCertainty

const float tesseract::kMinCertainty = -20.0f

Definition at line 30 of file networkio.cpp.

◆ kMinChainTextValue

const int tesseract::kMinChainTextValue = 3

Definition at line 61 of file colpartition.cpp.

◆ kMinClusteredShapes

const int tesseract::kMinClusteredShapes = 1

Definition at line 44 of file mastertrainer.cpp.

◆ kMinColorDifference

const int tesseract::kMinColorDifference = 16

Definition at line 51 of file imagefind.cpp.

◆ kMinColumnWidth

const int tesseract::kMinColumnWidth = 2.0 / 3

Definition at line 31 of file colpartitionset.cpp.

◆ kMinDiacriticSizeRatio

const double tesseract::kMinDiacriticSizeRatio = 1.0625

Definition at line 76 of file strokewidth.cpp.

◆ kMinDivergenceRate

const double tesseract::kMinDivergenceRate = 50.0

Definition at line 46 of file lstmtrainer.cpp.

◆ kMinEvaluatedTabs

const int tesseract::kMinEvaluatedTabs = 3

Definition at line 56 of file tabfind.cpp.

◆ kMinFilledArea

const double tesseract::kMinFilledArea = 0.35

Definition at line 63 of file tablerecog.cpp.

◆ kMinFractionalLinesInColumn

const double tesseract::kMinFractionalLinesInColumn = 0.125

Definition at line 45 of file tabfind.cpp.

◆ kMinGoodTextPARatio

const double tesseract::kMinGoodTextPARatio = 1.5

Definition at line 59 of file ccnontextdetect.cpp.

◆ kMinGutterFraction

const double tesseract::kMinGutterFraction = 0.5

Definition at line 46 of file tabvector.cpp.

◆ kMinGutterWidthGrid

const double tesseract::kMinGutterWidthGrid = 0.5

Definition at line 51 of file colfind.cpp.

◆ kMinImageFindSize

const int tesseract::kMinImageFindSize = 100

Definition at line 47 of file imagefind.cpp.

◆ kMinLeaderCount

const int tesseract::kMinLeaderCount = 5

Definition at line 57 of file colpartition.cpp.

◆ kMinLigature

const int tesseract::kMinLigature = 0xfb00

Definition at line 63 of file ligature_table.cpp.

◆ kMinLineLengthFraction

const int tesseract::kMinLineLengthFraction = 4

Denominator of resolution makes min pixels to demand line lengths to be.

Definition at line 43 of file linefind.cpp.

◆ kMinLinesInColumn

const int tesseract::kMinLinesInColumn = 10

Definition at line 41 of file tabfind.cpp.

◆ kMinMaxGapInTextPartition

const double tesseract::kMinMaxGapInTextPartition = 0.5

Definition at line 72 of file tablefind.cpp.

◆ kMinMusicPixelFraction

const double tesseract::kMinMusicPixelFraction = 0.75

Definition at line 62 of file linefind.cpp.

◆ kMinOverlapWithTable

const double tesseract::kMinOverlapWithTable = 0.6

Definition at line 96 of file tablefind.cpp.

◆ kMinParagraphEndingTextToWhitespaceRatio

const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0

Definition at line 131 of file tablefind.cpp.

◆ kMinPointsForErrorCount

const int tesseract::kMinPointsForErrorCount = 16

Definition at line 36 of file detlinefit.cpp.

◆ kMinProb

const float tesseract::kMinProb = exp(kMinCertainty)

Definition at line 32 of file networkio.cpp.

◆ kMinRaggedGutter

const double tesseract::kMinRaggedGutter = 1.5

Definition at line 52 of file tabvector.cpp.

◆ kMinRaggedTabs

const int tesseract::kMinRaggedTabs = 5

Definition at line 52 of file alignedblob.cpp.

◆ kMinRampSize

const int tesseract::kMinRampSize = 1000

Definition at line 79 of file degradeimage.cpp.

◆ kMinRectangularFraction

const double tesseract::kMinRectangularFraction = 0.125

Definition at line 40 of file imagefind.cpp.

◆ kMinRectSize

const int tesseract::kMinRectSize = 10

Minimum sensible image size to be worth running tesseract.

Definition at line 104 of file baseapi.cpp.

◆ kMinRowsInTable

const int tesseract::kMinRowsInTable = 3

Definition at line 111 of file tablefind.cpp.

◆ kMinStallIterations

const int tesseract::kMinStallIterations = 10000

Definition at line 48 of file lstmtrainer.cpp.

◆ kMinStartedErrorRate

const int tesseract::kMinStartedErrorRate = 75

Definition at line 61 of file lstmtrainer.cpp.

◆ kMinStrongTextValue

const int tesseract::kMinStrongTextValue = 6

Definition at line 59 of file colpartition.cpp.

◆ kMinTabGradient

const double tesseract::kMinTabGradient = 4.0

Definition at line 60 of file alignedblob.cpp.

◆ kMinThickLineWidth

const int tesseract::kMinThickLineWidth = 12

Definition at line 49 of file linefind.cpp.

◆ kMinVerticalSearch

const int tesseract::kMinVerticalSearch = 3

Definition at line 37 of file tabfind.cpp.

◆ kMinWinSize

const int tesseract::kMinWinSize = 500

Definition at line 49 of file network.cpp.

◆ kMostlyOneDirRatio

const int tesseract::kMostlyOneDirRatio = 3

Definition at line 92 of file strokewidth.cpp.

◆ kNeighbourSearchFactor

const double tesseract::kNeighbourSearchFactor = 2.5

Definition at line 102 of file strokewidth.cpp.

◆ kNoiseOverlapAreaFactor

const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512

Definition at line 107 of file strokewidth.cpp.

◆ kNoiseOverlapGrowthFactor

const double tesseract::kNoiseOverlapGrowthFactor = 4.0

Definition at line 104 of file strokewidth.cpp.

◆ kNoisePadding

const int tesseract::kNoisePadding = 4

Definition at line 50 of file ccnontextdetect.cpp.

◆ kNumAdjustmentIterations

const int tesseract::kNumAdjustmentIterations = 100

Definition at line 55 of file lstmtrainer.cpp.

◆ kNumbersPerBlob

const int tesseract::kNumbersPerBlob = 5

The 5 numbers output for each box (the usual 4 and a page number.)

Definition at line 1488 of file baseapi.cpp.

◆ kNumEndPoints

const int tesseract::kNumEndPoints = 3

Definition at line 30 of file detlinefit.cpp.

◆ kNumInputGroups

constexpr int tesseract::kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup

constexpr

Definition at line 41 of file intsimdmatrixavx2.cpp.

◆ kNumInputsPerGroup

constexpr int tesseract::kNumInputsPerGroup = 4

constexpr

Definition at line 39 of file intsimdmatrixavx2.cpp.

◆ kNumInputsPerRegister

constexpr int tesseract::kNumInputsPerRegister = 32

constexpr

Definition at line 37 of file intsimdmatrixavx2.cpp.

◆ kNumOutputsPerRegister

constexpr int tesseract::kNumOutputsPerRegister = 8

constexpr

Definition at line 33 of file intsimdmatrixavx2.cpp.

◆ kNumPagesPerBatch

const int tesseract::kNumPagesPerBatch = 100

Definition at line 59 of file lstmtrainer.cpp.

◆ kOriginalNoiseMultiple

const int tesseract::kOriginalNoiseMultiple = 8

Definition at line 46 of file ccnontextdetect.cpp.

◆ kParagraphEndingPreviousLineRatio

const double tesseract::kParagraphEndingPreviousLineRatio = 1.3

Definition at line 121 of file tablefind.cpp.

◆ kPDF

const char *const tesseract::kPDF = "\u202C"

Pop Directional Formatting.

Definition at line 41 of file unicodes.cpp.

◆ kPhotoOffsetFraction

const double tesseract::kPhotoOffsetFraction = 0.375

Definition at line 53 of file ccnontextdetect.cpp.

◆ kPrime1

const int tesseract::kPrime1 = 17

Definition at line 36 of file trainingsampleset.cpp.

◆ kPrime2

const int tesseract::kPrime2 = 13

Definition at line 37 of file trainingsampleset.cpp.

◆ kRadicalRadix

const int tesseract::kRadicalRadix = 29

Definition at line 31 of file unicharcompress.cpp.

◆ kRaggedFraction

const double tesseract::kRaggedFraction = 2.5

Definition at line 40 of file alignedblob.cpp.

◆ kRaggedGapFraction

const double tesseract::kRaggedGapFraction = 1.0

Definition at line 44 of file alignedblob.cpp.

◆ kRaggedGutterMultiple

const int tesseract::kRaggedGutterMultiple = 5

Definition at line 51 of file tabfind.cpp.

◆ kRandomizingCenter

const int tesseract::kRandomizingCenter = 128

Definition at line 36 of file trainingsample.cpp.

◆ kRatingEpsilon

const double tesseract::kRatingEpsilon = 1.0 / 32

Definition at line 31 of file errorcounter.cpp.

◆ kRequiredColumns

const double tesseract::kRequiredColumns = 0.7

Definition at line 48 of file tablerecog.cpp.

◆ kReverseIfHasRTL

const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"

Definition at line 50 of file trie.cpp.

◆ kRGBRMSColors

const int tesseract::kRGBRMSColors = 4

Definition at line 36 of file colpartition.h.

◆ kRLE

const char *const tesseract::kRLE = "\u202A"

Right-to-Left Embedding.

Definition at line 40 of file unicodes.cpp.

◆ kRLM

const char *const tesseract::kRLM = "\u200F"

Right-to-Left Mark.

Definition at line 39 of file unicodes.cpp.

◆ kRMSFitScaling

const double tesseract::kRMSFitScaling = 8.0

Definition at line 49 of file imagefind.cpp.

◆ kRotationRange

const float tesseract::kRotationRange = 0.02f

Definition at line 73 of file degradeimage.cpp.

◆ kSaltnPepper

const int tesseract::kSaltnPepper = 5

Definition at line 77 of file degradeimage.cpp.

◆ kScaleFactor

constexpr double tesseract::kScaleFactor = 256.0

constexpr

Definition at line 36 of file functions.h.

◆ kSeedBlobsCountTh

const int tesseract::kSeedBlobsCountTh = 10

Definition at line 84 of file equationdetect.cpp.

◆ kSideSpaceMargin

const int tesseract::kSideSpaceMargin = 10

Definition at line 101 of file tablefind.cpp.

◆ kSimilarRaggedDist

const int tesseract::kSimilarRaggedDist = 50

Definition at line 42 of file tabvector.cpp.

◆ kSimilarVectorDist

const int tesseract::kSimilarVectorDist = 10

Definition at line 39 of file tabvector.cpp.

◆ ksizeofUniversalAmbigsFile

const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

Definition at line 19036 of file universalambigs.cpp.

◆ kSizeRatioToReject

const float tesseract::kSizeRatioToReject = 2.0

Definition at line 100 of file strokewidth.cpp.

◆ kSmallTableProjectionThreshold

const double tesseract::kSmallTableProjectionThreshold = 0.35

Definition at line 105 of file tablefind.cpp.

◆ kSmoothDecisionMargin

const int tesseract::kSmoothDecisionMargin = 4

Definition at line 64 of file colpartitiongrid.cpp.

◆ kSplitPartitionSize

const double tesseract::kSplitPartitionSize = 2.0

Definition at line 43 of file tablefind.cpp.

◆ kSquareLimit

const int tesseract::kSquareLimit = 25

Definition at line 34 of file trainingsampleset.cpp.

◆ kStageTransitionThreshold

const double tesseract::kStageTransitionThreshold = 10.0

Definition at line 63 of file lstmtrainer.cpp.

◆ kStateClip

const double tesseract::kStateClip = 100.0

Definition at line 69 of file lstm.cpp.

◆ kStrokeWidthCJK

const double tesseract::kStrokeWidthCJK = 2.0

Definition at line 52 of file strokewidth.cpp.

◆ kStrokeWidthConstantTolerance

const double tesseract::kStrokeWidthConstantTolerance = 2.0

Definition at line 140 of file tablefind.cpp.

◆ kStrokeWidthFractionalTolerance

const double tesseract::kStrokeWidthFractionalTolerance = 0.25

Definition at line 139 of file tablefind.cpp.

◆ kStrokeWidthFractionCJK

const double tesseract::kStrokeWidthFractionCJK = 0.25

Definition at line 51 of file strokewidth.cpp.

◆ kStrokeWidthFractionTolerance

const double tesseract::kStrokeWidthFractionTolerance = 0.125

Allowed proportional change in stroke width to be the same font.

Definition at line 44 of file strokewidth.cpp.

◆ kStrokeWidthTolerance

const double tesseract::kStrokeWidthTolerance = 1.5

Allowed constant change in stroke width to be the same font. Really 1.5 pixels.

Definition at line 49 of file strokewidth.cpp.

◆ kSubTrainerMarginFraction

const double tesseract::kSubTrainerMarginFraction = 3.0 / 128

Definition at line 51 of file lstmtrainer.cpp.

◆ kTableColumnThreshold

const double tesseract::kTableColumnThreshold = 3.0

Definition at line 88 of file tablefind.cpp.

◆ kTableSize

constexpr int tesseract::kTableSize = 4096

constexpr

Definition at line 34 of file functions.h.

◆ kTabRadiusFactor

const int tesseract::kTabRadiusFactor = 5

Definition at line 35 of file tabfind.cpp.

◆ kTargetXScale

const int tesseract::kTargetXScale = 5

Definition at line 71 of file lstmtrainer.cpp.

◆ kTargetYScale

const int tesseract::kTargetYScale = 100

Definition at line 72 of file lstmtrainer.cpp.

◆ kTesseractReject

const char tesseract::kTesseractReject = '~'

Character returned when Tesseract couldn't recognize as anything.

Definition at line 106 of file baseapi.cpp.

◆ kTestChar

const int tesseract::kTestChar = -1

Definition at line 32 of file trainingsampleset.cpp.

◆ kThickLengthMultiple

const double tesseract::kThickLengthMultiple = 0.75

Definition at line 56 of file linefind.cpp.

◆ kThinLineFraction

const int tesseract::kThinLineFraction = 20

Denominator of resolution makes max pixel width to allow thin lines.

Definition at line 41 of file linefind.cpp.

◆ kTinyEnoughTextlineOverlapFraction

const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25

Definition at line 48 of file colpartitiongrid.cpp.

◆ kTrainerIterations

const int tesseract::kTrainerIterations = 600

Definition at line 35 of file lstm_test.h.

◆ kUnclearDensityTh

const float tesseract::kUnclearDensityTh = 0.25

Definition at line 83 of file equationdetect.cpp.

◆ kUniChs

const int tesseract::kUniChs[]

Initial value:

= {
  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
}

Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.

Definition at line 1558 of file baseapi.cpp.

◆ kUniversalAmbigsFile

const char tesseract::kUniversalAmbigsFile

Definition at line 24 of file universalambigs.cpp.

◆ kUNLVReject

const char tesseract::kUNLVReject = '~'

Character used by UNLV error counter as a reject.

Definition at line 108 of file baseapi.cpp.

◆ kUNLVSuspect

const char tesseract::kUNLVSuspect = '^'

Character used by UNLV as a suspect marker.

Definition at line 110 of file baseapi.cpp.

◆ kVerticalSpacing

const double tesseract::kVerticalSpacing = -0.2

Definition at line 38 of file tablerecog.cpp.

◆ kVLineAlignment

const int tesseract::kVLineAlignment = 3

Definition at line 46 of file alignedblob.cpp.

◆ kVLineGutter

const int tesseract::kVLineGutter = 1

Definition at line 48 of file alignedblob.cpp.

◆ kVLineMinLength

const int tesseract::kVLineMinLength = 500

Definition at line 56 of file alignedblob.cpp.

◆ kVLineSearchSize

const int tesseract::kVLineSearchSize = 150

Definition at line 50 of file alignedblob.cpp.

◆ kWorstDictCertainty

const float tesseract::kWorstDictCertainty = -25.0f

Definition at line 37 of file linerec.cpp.

◆ kXWinFrameSize

const int tesseract::kXWinFrameSize = 30

Definition at line 52 of file network.cpp.

◆ kYWinFrameSize

const int tesseract::kYWinFrameSize = 80

Definition at line 53 of file network.cpp.

◆ LogisticTable

const double tesseract::LogisticTable

Definition at line 4102 of file functions.cpp.

◆ RTLReversePolicyNames

const char* const tesseract::RTLReversePolicyNames[]

Initial value:

= {
  kDoNotReverse,
  kReverseIfHasRTL,
  kForceReverse
}

Definition at line 53 of file trie.cpp.

◆ TanhTable

const double tesseract::TanhTable

Definition at line 4 of file functions.cpp.

◆ test_data

int tesseract::test_data = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}

Definition at line 24 of file heap_test.cc.

◆ textord_tabvector_vertical_box_ratio

double tesseract::textord_tabvector_vertical_box_ratio = 0.5

"Fraction of box matches required to declare a line vertical"

Definition at line 58 of file tabvector.cpp.

◆ textord_tabvector_vertical_gap_fraction

double tesseract::textord_tabvector_vertical_gap_fraction = 0.5

"max fraction of mean blob width allowed for vertical gaps in vertical text"

"Max fraction of mean blob width allowed for vertical gaps in vertical text"

Definition at line 55 of file tabvector.cpp.

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Include Files and Type Defines

Public Function Prototypes

Include Files and Type Defines

Include Files and Type Defines

Typedef Documentation

◆ BlobGridSearch

◆ char32

◆ ColPartitionGridSearch

◆ ColSegmentGrid

◆ ColSegmentGridSearch

◆ DawgVector

◆ DictFunc

◆ DotProductFunction

◆ FileReader

◆ FileWriter

◆ FillLatticeFunc

◆ IntKDPair

◆ LanguageModelFlagsType

◆ LigHash

◆ NodeChildVector

◆ PainPointHeap

◆ ParamsModelClassifyFunc

◆ ParamsTrainingHypothesisList

◆ PartSetVector

◆ ProbabilityInContextFunc

◆ RecodeHeap

◆ RecodePair

◆ RSCounts

◆ RSMap

◆ SetOfModels

◆ ShapeQueue

◆ SuccessorList

◆ SuccessorListsVector

◆ TestCallback

◆ TruthCallback

◆ UnicharAmbigsVector

◆ UnicharIdVector

◆ WidthCallback

◆ WordGrid

◆ WordRecognizer

◆ WordSearch

Enumeration Type Documentation

◆ AmbigType

◆ CachingStrategy

◆ CharSegmentationType

◆ CMD_EVENTS

◆ ColSegType

◆ ColumnSpanningType

◆ CountTypes

◆ DawgType

◆ ErrorTypes

◆ FactorNames

◆ FlexDimensions

◆ GraphemeNorm

◆ GraphemeNormMode

◆ kParamsTrainingFeatureType

◆ LeftOrRight

◆ LineType

◆ LMPainPointsType

◆ LossType

◆ NeighbourPartitionType

◆ NetworkFlags

◆ NetworkType

◆ NodeContinuation

◆ NormalizationMode

◆ OcrEngineMode

◆ OCRNorm

◆ Orientation

◆ PageIteratorLevel

◆ PageSegMode

◆ ParagraphJustification

◆ PartitionFindResult

◆ ScriptPos

◆ SerializeAmount