33 #include "unicode/uchar.h"
34 #include "unicode/uscript.h"
42 for (
int unichar_id = 0; unichar_id < unicharset->
size(); ++unichar_id) {
44 const char* unichar_str = unicharset->
id_to_unichar(unichar_id);
57 bool unichar_isalpha =
false;
58 bool unichar_islower =
false;
59 bool unichar_isupper =
false;
60 bool unichar_isdigit =
false;
61 bool unichar_ispunct =
false;
63 for (
char32 u_ch : uni_vector) {
64 if (u_isalpha(u_ch)) unichar_isalpha =
true;
65 if (u_islower(u_ch)) unichar_islower =
true;
66 if (u_isupper(u_ch)) unichar_isupper =
true;
67 if (u_isdigit(u_ch)) unichar_isdigit =
true;
68 if (u_ispunct(u_ch)) unichar_ispunct =
true;
71 unicharset->
set_isalpha(unichar_id, unichar_isalpha);
72 unicharset->
set_islower(unichar_id, unichar_islower);
73 unicharset->
set_isupper(unichar_id, unichar_isupper);
74 unicharset->
set_isdigit(unichar_id, unichar_isdigit);
78 unicharset->
set_script(unichar_id, uscript_getName(
79 uscript_getScript(uni_vector[0], err)));
81 const int num_code_points = uni_vector.size();
84 if (unichar_islower || unichar_isupper) {
85 std::vector<char32> other_case(num_code_points, 0);
86 for (
int i = 0; i < num_code_points; ++i) {
91 other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) :
92 u_tolower(uni_vector[i]);
97 if (other_case_id != INVALID_UNICHAR_ID) {
100 tprintf(
"Other case %s of %s is not in unicharset\n",
101 other_case_uch.c_str(), unichar_str);
106 std::vector<char32> mirrors(num_code_points, 0);
107 for (
int i = 0; i < num_code_points; ++i) {
108 mirrors[i] = u_charMirror(uni_vector[i]);
111 static_cast<UNICHARSET::Direction>(
112 u_charDirection(uni_vector[i])));
117 if (mirror_uch_id != INVALID_UNICHAR_ID) {
118 unicharset->
set_mirror(unichar_id, mirror_uch_id);
119 }
else if (report_errors) {
120 tprintf(
"Mirror %s of %s is not in unicharset\n",
121 mirror_uch.c_str(), unichar_str);
126 if (unichar_id != 0 &&
131 unichar_str, &normed_str) &&
132 !normed_str.empty()) {
133 unicharset->
set_normed(unichar_id, normed_str.c_str());
135 unicharset->
set_normed(unichar_id, unichar_str);
152 tprintf(
"Failed to load script unicharset from:%s\n", filename.c_str());
157 tprintf(
"Warning: properties incomplete for index %d = %s\n", c,
173 xheights_str += script_heights;
191 tprintf(
"Loaded unicharset of size %d from file %s\n", unicharset.
size(),
192 input_unicharset_file.c_str());
195 tprintf(
"Setting unichar properties\n");
197 tprintf(
"Setting script properties\n");
199 if (!output_xheights_file.empty()) {
205 tprintf(
"Writing unicharset to file %s\n", output_unicharset_file.c_str());
206 unicharset.
save_to_file(output_unicharset_file.c_str());