tesseract  4.0.0-1-g2a2b
normstrngs.h File Reference
#include <string>
#include <vector>
#include "validator.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Enumerations

enum  tesseract::UnicodeNormMode { tesseract::UnicodeNormMode::kNFD, tesseract::UnicodeNormMode::kNFC, tesseract::UnicodeNormMode::kNFKD, tesseract::UnicodeNormMode::kNFKC }
 
enum  tesseract::OCRNorm { tesseract::OCRNorm::kNone, tesseract::OCRNorm::kNormalize }
 
enum  tesseract::GraphemeNorm { tesseract::GraphemeNorm::kNone, tesseract::GraphemeNorm::kNormalize }
 

Functions

bool tesseract::NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
 
bool tesseract::NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
 
char32 tesseract::OCRNormalize (char32 ch)
 
bool tesseract::IsOCREquivalent (char32 ch1, char32 ch2)
 
bool tesseract::IsValidCodepoint (const char32 ch)
 
bool tesseract::IsWhitespace (const char32 ch)
 
bool tesseract::IsUTF8Whitespace (const char *text)
 
unsigned int tesseract::SpanUTF8Whitespace (const char *text)
 
unsigned int tesseract::SpanUTF8NotWhitespace (const char *text)
 
bool tesseract::IsInterchangeValid (const char32 ch)
 
bool tesseract::IsInterchangeValid7BitAscii (const char32 ch)
 
char32 tesseract::FullwidthToHalfwidth (const char32 ch)