21 #include <sys/types.h>
39 if (lang.empty())
return true;
44 _mkdir(dirname.c_str());
46 mkdir(dirname.c_str(), S_IRWXU | S_IRWXG);
48 std::string filename = dirname +
"/" + lang + suffix;
49 if (writer ==
nullptr)
52 return (*writer)(data, filename.c_str());
58 if (filename.empty())
return STRING();
61 if (reader ==
nullptr)
64 read_result = (*reader)(filename.c_str(), &data);
65 if (read_result)
return STRING(&data[0], data.
size());
66 tprintf(
"Failed to read data from: %s\n", filename.c_str());
79 unicharset_data.
size());
80 return WriteFile(output_dir, lang,
".unicharset", unicharset_data, writer);
103 tprintf(
"Null char=%d\n", null_char);
104 if (!recoder.
ComputeEncoding(unicharset, null_char, radical_table_data)) {
105 tprintf(
"Creation of encoded unicharset failed!!\n");
112 if (!recoder.
Serialize(&fp))
return false;
114 recoder_data.
size());
117 memcpy(&recoder_data[0], &encoding[0], encoding.
length());
121 return WriteFile(output_dir, lang, suffix.c_str(), recoder_data, writer);
132 trie.add_word_list(words, unicharset, reverse_policy);
133 tprintf(
"Reducing Trie to SquishedDawg\n");
134 std::unique_ptr<SquishedDawg> dawg(trie.trie_to_dawg());
135 if (dawg ==
nullptr || dawg->NumEdges() == 0)
return false;
138 fp.OpenWrite(&dawg_data);
139 if (!dawg->write_squished_dawg(&fp))
return false;
140 traineddata->OverwriteEntry(file_type, &dawg_data[0], dawg_data.
size());
151 TessdataManager* traineddata) {
153 tprintf(
"Must have non-empty puncs list to use language models!!\n");
162 if (!words.
empty() &&
175 if (!numbers.
empty() &&
187 const std::string& lang,
bool pass_through_recoder,
194 if (!version_str.empty()) {
199 if (!
WriteUnicharset(unicharset, output_dir, lang, writer, &traineddata)) {
200 tprintf(
"Error writing unicharset!!\n");
203 tprintf(
"Config file is optional, continuing...\n");
206 std::string config_filename = script_dir +
"/" + lang +
"/" + lang +
".config";
208 if (config_file.
length() > 0) {
212 std::string radical_filename = script_dir +
"/radical-stroke.txt";
214 if (radical_data.
length() == 0) {
215 tprintf(
"Error reading radical code table %s\n", radical_filename.c_str());
218 if (!
WriteRecoder(unicharset, pass_through_recoder, output_dir, lang, writer,
219 &radical_data, &traineddata)) {
220 tprintf(
"Error writing recoder!!\n");
223 if (!WriteDawgs(words, puncs, numbers, lang_is_rtl, unicharset,
225 tprintf(
"Error during conversion of wordlists to DAWGs!!\n");
232 traineddata.
Serialize(&traineddata_data);
233 if (!
WriteFile(output_dir, lang,
".traineddata", traineddata_data, writer)) {
234 tprintf(
"Error writing output traineddata file!!\n");