tesseract
5.0.0-alpha-619-ge9db
validate_indic.h
Go to the documentation of this file.
1
#ifndef TESSERACT_TRAINING_VALIDATE_INDIC_H_
2
#define TESSERACT_TRAINING_VALIDATE_INDIC_H_
3
4
#include "
validator.h
"
5
6
namespace
tesseract
{
7
8
// Subclass of Validator that validates and segments Indic scripts in the
9
// unicode range 0x900-0xdff (Devanagari-Sinhala).
10
class
ValidateIndic
:
public
Validator
{
11
public
:
12
ValidateIndic
(
ViramaScript
script,
bool
report_errors)
13
:
Validator
(script, report_errors) {}
14
~ValidateIndic
() {}
15
16
protected
:
17
// Returns whether codes matches the pattern for an Indic Grapheme.
18
// Consumes the next Grapheme in codes_[codes_used_++...] and copies it to
19
// parts_ and output_. Returns true if a valid Grapheme was consumed,
20
// otherwise does not increment codes_used_.
21
bool
ConsumeGraphemeIfValid
()
override
;
22
// Returns the CharClass corresponding to the given Unicode ch.
23
Validator::CharClass
UnicodeToCharClass
(
char32
ch)
const override
;
24
25
private
:
26
// Helper consumes/copies a virama and any associated post-virama joiners.
27
bool
ConsumeViramaIfValid(
IndicPair
joiner,
bool
post_matra);
28
// Helper consumes/copies a series of consonants separated by viramas while
29
// valid, but not any vowel or other modifiers.
30
bool
ConsumeConsonantHeadIfValid();
31
// Helper consumes/copies a tail part of a consonant, comprising optional
32
// matra/piece, vowel modifier, vedic mark, terminating virama.
33
bool
ConsumeConsonantTailIfValid();
34
// Helper consumes/copies a vowel and optional modifiers.
35
bool
ConsumeVowelIfValid();
36
37
// Some special unicodes used only for Indic processing.
38
static
const
char32
kYayana = 0xdba;
// Sinhala Ya
39
static
const
char32
kRayana = 0xdbb;
// Sinhala Ra
40
};
41
42
}
// namespace tesseract
43
44
#endif // TESSERACT_TRAINING_VALIDATE_INDIC_H_
tesseract::Validator::IndicPair
std::pair< CharClass, char32 > IndicPair
Definition:
validator.h:148
tesseract::ValidateIndic::ConsumeGraphemeIfValid
bool ConsumeGraphemeIfValid() override
Definition:
validate_indic.cpp:18
tesseract::ViramaScript
ViramaScript
Definition:
validator.h:67
tesseract::Validator::CharClass
CharClass
Definition:
validator.h:126
validator.h
tesseract::char32
signed int char32
Definition:
unichar.h:53
tesseract::ValidateIndic
Definition:
validate_indic.h:10
tesseract
Definition:
baseapi.h:65
tesseract::Validator
Definition:
validator.h:86
tesseract::ValidateIndic::UnicodeToCharClass
Validator::CharClass UnicodeToCharClass(char32 ch) const override
Definition:
validate_indic.cpp:44
tesseract::ValidateIndic::~ValidateIndic
~ValidateIndic()
Definition:
validate_indic.h:14
tesseract::ValidateIndic::ValidateIndic
ValidateIndic(ViramaScript script, bool report_errors)
Definition:
validate_indic.h:12
src
training
validate_indic.h
Generated on Thu Jan 30 2020 14:22:21 for tesseract by
1.8.16