tesseract
5.0.0-alpha-619-ge9db
|
Go to the documentation of this file.
20 #ifndef TESSERACT_TRAINING_VALIDATOR_H_
21 #define TESSERACT_TRAINING_VALIDATOR_H_
82 const std::vector<char32>& src,
83 std::vector<std::vector<char32>>*
dest);
133 using IndicPair = std::pair<CharClass, char32>;
151 const std::vector<char32>& src,
152 std::vector<std::vector<char32>>*
dest);
155 std::vector<std::vector<char32>>*
dest);
160 const std::vector<char32>& utf32);
231 std::vector<IndicPair>
codes_;
233 std::vector<std::vector<char32>>
parts_;
246 #endif // TESSERACT_TRAINING_VALIDATOR_H_
void MultiCodePart(unsigned length)
static ViramaScript MostFrequentViramaScript(const std::vector< char32 > &utf32)
static bool IsVedicAccent(char32 unicode)
static const char32 kSinhalaVirama
static bool IsZeroWidthMark(char32 ch)
bool UseMultiCode(unsigned length)
bool IsSubscriptScript() const
void MoveResultsToDest(GraphemeNormMode g_mode, std::vector< std::vector< char32 >> *dest)
std::pair< CharClass, char32 > IndicPair
static const char32 kMaxSinhalaUnicode
static bool IsVirama(char32 unicode)
std::vector< IndicPair > codes_
static const int kIndicCodePageSize
static const char32 kInvalid
static const char32 kMyanmarVirama
static const char32 kRightToLeftMark
static const char32 kZeroWidthNonJoiner
virtual bool ConsumeGraphemeIfValid()=0
static bool ValidateCleanAndSegment(GraphemeNormMode g_mode, bool report_errors, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
std::vector< char32 > output_
static const char32 kMinIndicUnicode
static const char32 kZeroWidthSpace
bool ValidateCleanAndSegmentInternal(GraphemeNormMode g_mode, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
static std::unique_ptr< Validator > ScriptValidator(ViramaScript script, bool report_errors)
static const char32 kZeroWidthJoiner
static const char32 kLeftToRightMark
static const char32 kMaxViramaScriptUnicode
virtual CharClass UnicodeToCharClass(char32 ch) const =0
void ComputeClassCodes(const std::vector< char32 > &text)
static const char32 kKhmerVirama
static const char32 kJavaneseVirama
static const char32 kMaxJavaneseUnicode
std::vector< std::vector< char32 > > parts_
Validator(ViramaScript script, bool report_errors)