5 #include "unicode/uchar.h" 6 #include "unicode/uscript.h" 14 int num_codes =
codes_.size();
36 tprintf(
"Invalid start of Myanmar syllable:0x%x\n",
41 if (ConsumeSubscriptIfPresent())
return true;
42 ConsumeOptionalSignsIfPresent();
62 bool ValidateMyanmar::ConsumeSubscriptIfPresent() {
64 int num_codes =
codes_.size();
77 bool ValidateMyanmar::ConsumeOptionalSignsIfPresent() {
80 const std::vector<char32> kMedials({kMyanmarAsat, kMyanmarMedialYa, 0x103c,
81 0x103d, 0x103e, 0x105e, 0x105f, 0x1060,
83 for (
char32 ch : kMedials) {
86 if (ch == kMyanmarMedialYa &&
94 if (ch == 0x102d || ch == 0x102e || ch == 0x1032) {
99 if (ch == 0x102f || ch == 0x1030 || (0x1056 <= ch && ch <= 0x1059) ||
100 ch == 0x1062 || ch == 0x1067 || ch == 0x1068 ||
101 (0x1071 <= ch && ch <= 0x1074) || (0x1083 <= ch && ch <= 0x1086) ||
102 ch == 0x109c || ch == 0x109d) {
114 const std::vector<char32> kSigns({0x1036, 0x1037});
115 for (
char32 ch : kSigns) {
122 if (ch == 0x1038 || ch == kMyanmarAsat || ch == 0x1063 || ch == 0x1064 ||
123 (0x1069 <= ch && ch <= 0x106d) || (0x1087 <= ch && ch <= 0x108d) ||
124 ch == 0x108f || ch == 0x109a || ch == 0x109b ||
125 (0xaa7b <= ch && ch <= 0xaa7d)) {
136 bool ValidateMyanmar::IsMyanmarLetter(
char32 ch) {
137 return (0x1000 <= ch && ch <= 0x102a) || ch == 0x103f ||
138 (0x1050 <= ch && ch <= 0x1055) || (0x105a <= ch && ch <= 0x105d) ||
139 ch == 0x1061 || ch == 0x1065 || ch == 0x1066 ||
140 (0x106e <= ch && ch <= 0x1070) || (0x1075 <= ch && ch <= 0x1080) ||
141 ch == 0x108e || (0xa9e0 <= ch && ch <= 0xa9ef) ||
142 (0xa9fa <= ch && ch <= 0xa9ff) || (0xaa60 <= ch && ch <= 0xaa73) ||
143 ch == 0xaa7a || ch == 0xaa7e || ch == 0xaa7f;
149 bool ValidateMyanmar::IsMyanmarOther(
char32 ch) {
151 UScriptCode script_code = uscript_getScript(ch, err);
155 return (0x1040 <= ch && ch <= 0x1049) || (0x1090 <= ch && ch <= 0x1099) ||
156 (0x109c <= ch && ch <= 0x109d) || (0xa9f0 <= ch && ch <= 0xa9f9) ||
157 (0xaa74 <= ch && ch <= 0xaa79);
std::vector< IndicPair > codes_
bool ConsumeGraphemeIfValid() override
static const char32 kZeroWidthNonJoiner
static const char32 kMyanmarVirama
Validator::CharClass UnicodeToCharClass(char32 ch) const override
DLLSYM void tprintf(const char *format,...)
bool UseMultiCode(int length)
static const char32 kZeroWidthJoiner