tesseract  5.0.0-alpha-619-ge9db
normstrngs.cpp File Reference
#include "normstrngs.h"
#include <string>
#include <unordered_map>
#include <vector>
#include "errcode.h"
#include "icuerrorcode.h"
#include <tesseract/unichar.h>
#include "unicode/normalizer2.h"
#include "unicode/translit.h"
#include "unicode/uchar.h"
#include "unicode/unorm2.h"
#include "unicode/uscript.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Functions

bool tesseract::NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
 
bool tesseract::NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
 
char32 tesseract::OCRNormalize (char32 ch)
 
bool tesseract::IsOCREquivalent (char32 ch1, char32 ch2)
 
bool tesseract::IsValidCodepoint (const char32 ch)
 
bool tesseract::IsWhitespace (const char32 ch)
 
bool tesseract::IsUTF8Whitespace (const char *text)
 
unsigned int tesseract::SpanUTF8Whitespace (const char *text)
 
unsigned int tesseract::SpanUTF8NotWhitespace (const char *text)
 
bool tesseract::IsInterchangeValid (const char32 ch)
 
bool tesseract::IsInterchangeValid7BitAscii (const char32 ch)
 
char32 tesseract::FullwidthToHalfwidth (const char32 ch)