27 #if defined(HAVE_WCHAR_T) || defined(_WIN32) || defined(GOOGLE3)
41 static const char*
const kUnicharsetFileName =
"unicharset";
96 int main(
int argc,
char** argv) {
98 const char* output_directory =
".";
99 STRING unicharset_file_name;
103 setlocale(LC_ALL,
"");
107 printf(
"Usage: %s [-D DIRECTORY] FILE...\n", argv[0]);
113 while ((option =
tessopt(argc, argv,
"D" )) != EOF) {
123 unicharset_file_name = output_directory;
124 unicharset_file_name +=
"/";
125 unicharset_file_name += kUnicharsetFileName;
129 printf(
"Extracting unicharset from %s\n", argv[
tessoptind]);
131 FILE* box_file = fopen(argv[tessoptind],
"rb");
132 if (box_file ==
NULL) {
133 printf(
"Cannot open box file %s\n", argv[tessoptind]);
140 while (
ReadNextBox(&line_number, box_file, &unichar_string, &box)) {
148 printf(
"Wrote unicharset file %s.\n", unicharset_file_name.
string());
151 printf(
"Cannot save unicharset file %s.\n", unicharset_file_name.
string());
int main(int argc, char **argv)
void set_isupper(UNICHAR_ID unichar_id, bool value)
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
int tessopt(inT32 argc, char *argv[], const char *arglist)
bool save_to_file(const char *const filename) const
void set_islower(UNICHAR_ID unichar_id, bool value)
bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box)
void set_ispunctuation(UNICHAR_ID unichar_id, bool value)
void set_isdigit(UNICHAR_ID unichar_id, bool value)
UNICHAR_ID wc_to_unichar_id(const UNICHARSET &unicharset, int wc)
static int utf8_step(const char *utf8_str)
void unichar_insert(const char *const unichar_repr)
void set_other_case(UNICHAR_ID unichar_id, UNICHAR_ID other_case)
void set_isalpha(UNICHAR_ID unichar_id, bool value)
void set_properties(UNICHARSET *unicharset, const char *const c_string)
const char * string() const