21 #ifndef TESSERACT_CCUTIL_NORMSTRNGS_H_ 22 #define TESSERACT_CCUTIL_NORMSTRNGS_H_ 59 std::string* normalized);
67 std::vector<std::string>* graphemes);
103 #endif // TESSERACT_CCUTIL_NORMSTRNGS_H_
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
char32 OCRNormalize(char32 ch)
bool IsInterchangeValid(const char32 ch)
unsigned int SpanUTF8Whitespace(const char *text)
bool IsInterchangeValid7BitAscii(const char32 ch)
bool IsOCREquivalent(char32 ch1, char32 ch2)
bool IsWhitespace(const char32 ch)
bool IsUTF8Whitespace(const char *text)
bool IsValidCodepoint(const char32 ch)
char32 FullwidthToHalfwidth(const char32 ch)
unsigned int SpanUTF8NotWhitespace(const char *text)
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)