3 #include "unicode/uchar.h"
8 const unsigned num_codes =
codes_.size();
12 int num_codes_in_grapheme = 0;
16 const bool is_combiner =
29 tprintf(
"Two grapheme links in a row:0x%x 0x%x\n", prev_ch, ch);
33 IsBadlyFormed(prev_ch, ch)) {
36 bool prev_is_fwd_combiner =
40 if (num_codes_in_grapheme > 0 && !is_combiner && !prev_is_fwd_combiner)
43 ++num_codes_in_grapheme;
44 prev_prev_ch = prev_ch;
48 if (num_codes_in_grapheme > 0)
MultiCodePart(num_codes_in_grapheme);
60 int char_type = u_charType(ch);
61 if (char_type == U_NON_SPACING_MARK || char_type == U_ENCLOSING_MARK ||
69 bool ValidateGrapheme::IsBadlyFormed(
char32 prev_ch,
char32 ch) {
71 if (IsBadlyFormedIndicVowel(prev_ch, ch)) {
73 tprintf(
"Badly formed Indic vowel sequence:0x%x 0x%x\n", prev_ch, ch);
76 if (IsBadlyFormedThai(prev_ch, ch)) {
96 bool ValidateGrapheme::IsBadlyFormedIndicVowel(
char32 prev_ch,
char32 ch) {
97 return ((prev_ch == 0x905 && (ch == 0x946 || ch == 0x93E)) ||
98 (prev_ch == 0x909 && ch == 0x941) ||
99 (prev_ch == 0x90F && (ch >= 0x945 && ch <= 0x947)) ||
100 (prev_ch == 0x905 && (ch >= 0x949 && ch <= 0x94C)) ||
101 (prev_ch == 0x906 && (ch >= 0x949 && ch <= 0x94C)) ||
103 (prev_ch == 0x93E && (ch >= 0x945 && ch <= 0x948)) ||
105 (prev_ch == 0x94D && (ch >= 0x93E && ch <= 0x94C)) ||
107 (prev_ch == 0x985 && ch == 0x9BE) ||
109 (prev_ch == 0xC12 && (ch == 0xC55 || ch == 0xC4C)) ||
111 (prev_ch == 0xC92 && ch == 0xCCC));
115 static bool IsThaiConsonant(
char32 ch) {
return 0xe01 <= ch && ch <= 0xe2e; }
118 static bool IsThaiBeforeConsonantVowel(
char32 ch) {
119 return 0xe40 <= ch && ch <= 0xe44;
123 static bool IsThaiToneMark(
char32 ch) {
return 0xe48 <= ch && ch <= 0xe4b; }
127 static bool IsThaiTonableVowel(
char32 ch) {
128 return (0xe34 <= ch && ch <= 0xe39) || ch == 0xe31;
137 bool ValidateGrapheme::IsBadlyFormedThai(
char32 prev_ch,
char32 ch) {
139 if (IsThaiToneMark(ch) &&
140 !(IsThaiConsonant(prev_ch) || IsThaiTonableVowel(prev_ch))) {
144 if ((IsThaiTonableVowel(ch) || ch == 0xe47) && !IsThaiConsonant(prev_ch)) {
149 !(IsThaiConsonant(prev_ch) || prev_ch == 0xe38 || prev_ch == 0xe34)) {
156 !(IsThaiConsonant(prev_ch) || prev_ch == 0xe48 || prev_ch == 0xe49)) {
160 if ((ch == 0xe30 || ch == 0xe32 || ch == 0xe33) &&
161 !(IsThaiConsonant(prev_ch) || IsThaiToneMark(prev_ch)) &&
162 !(prev_ch == 0xe32 && ch == 0xe30) &&
163 !(prev_ch == 0xe4d && ch == 0xe32)) {
168 if (IsThaiBeforeConsonantVowel(ch) &&
169 (IsThaiBeforeConsonantVowel(prev_ch) || prev_ch == 0xe31 ||
174 if ((0xe30 <= ch && ch <= 0xe4D) && prev_ch == 0xe24) {