tesseract
4.0.0-1-g2a2b
validate_grapheme.h
Go to the documentation of this file.
1
#ifndef TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_
2
#define TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_
3
4
#include "
validator.h
"
5
6
namespace
tesseract
{
7
8
// Subclass of Validator that validates and segments generic unicode into
9
// grapheme clusters, including Latin with diacritics.
10
class
ValidateGrapheme
:
public
Validator
{
11
public
:
12
ValidateGrapheme
(
ViramaScript
script,
bool
report_errors)
13
:
Validator
(script, report_errors) {}
14
~ValidateGrapheme
() {}
15
16
protected
:
17
// Consumes the next Grapheme in codes_[codes_used_++...] and copies it to
18
// parts_ and output_. Returns true if a valid Grapheme was consumed,
19
// otherwise does not increment codes_used_.
20
bool
ConsumeGraphemeIfValid
()
override
;
21
// Returns the CharClass corresponding to the given Unicode ch.
22
CharClass
UnicodeToCharClass
(
char32
ch)
const override
;
23
24
private
:
25
// Helper returns true if the sequence prev_ch,ch is invalid.
26
bool
IsBadlyFormed(
char32
prev_ch,
char32
ch);
27
// Helper returns true if the sequence prev_ch,ch is an invalid Indic vowel.
28
static
bool
IsBadlyFormedIndicVowel(
char32
prev_ch,
char32
ch);
29
// Helper returns true if the sequence prev_ch,ch is invalid Thai.
30
static
bool
IsBadlyFormedThai(
char32
prev_ch,
char32
ch);
31
};
32
33
}
// namespace tesseract
34
35
#endif // TESSERACT_TRAINING_VALIDATE_GRAPHEME_H_
tesseract::Validator::CharClass
CharClass
Definition:
validator.h:112
tesseract::char32
signed int char32
Definition:
unichar.h:52
tesseract::ValidateGrapheme::UnicodeToCharClass
CharClass UnicodeToCharClass(char32 ch) const override
Definition:
validate_grapheme.cpp:48
tesseract::ValidateGrapheme
Definition:
validate_grapheme.h:10
tesseract::ViramaScript
ViramaScript
Definition:
validator.h:53
tesseract::Validator
Definition:
validator.h:72
tesseract::ValidateGrapheme::ConsumeGraphemeIfValid
bool ConsumeGraphemeIfValid() override
Definition:
validate_grapheme.cpp:7
validator.h
tesseract::ValidateGrapheme::~ValidateGrapheme
~ValidateGrapheme()
Definition:
validate_grapheme.h:14
tesseract
Definition:
baseapi.cpp:94
tesseract::ValidateGrapheme::ValidateGrapheme
ValidateGrapheme(ViramaScript script, bool report_errors)
Definition:
validate_grapheme.h:12
tesseract-ocr.master
src
training
validate_grapheme.h
Generated on Mon Oct 29 2018 11:03:50 for tesseract by
1.8.14