#include "normstrngs.h"
#include <string>
#include <unordered_map>
#include <vector>
#include "errcode.h"
#include "icuerrorcode.h"
#include <tesseract/unichar.h>
#include "unicode/normalizer2.h"
#include "unicode/translit.h"
#include "unicode/uchar.h"
#include "unicode/unorm2.h"
#include "unicode/uscript.h"
Go to the source code of this file.
|
bool | tesseract::NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized) |
|
bool | tesseract::NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes) |
|
char32 | tesseract::OCRNormalize (char32 ch) |
|
bool | tesseract::IsOCREquivalent (char32 ch1, char32 ch2) |
|
bool | tesseract::IsValidCodepoint (const char32 ch) |
|
bool | tesseract::IsWhitespace (const char32 ch) |
|
bool | tesseract::IsUTF8Whitespace (const char *text) |
|
unsigned int | tesseract::SpanUTF8Whitespace (const char *text) |
|
unsigned int | tesseract::SpanUTF8NotWhitespace (const char *text) |
|
bool | tesseract::IsInterchangeValid (const char32 ch) |
|
bool | tesseract::IsInterchangeValid7BitAscii (const char32 ch) |
|
char32 | tesseract::FullwidthToHalfwidth (const char32 ch) |
|