20 #ifndef TESSERACT_CCUTIL_TESSDATAMANAGER_H_
21 #define TESSERACT_CCUTIL_TESSDATAMANAGER_H_
29 static const char kTrainedDataSuffix[] =
"traineddata";
33 static const char kLangConfigFileSuffix[] =
"config";
34 static const char kUnicharsetFileSuffix[] =
"unicharset";
35 static const char kAmbigsFileSuffix[] =
"unicharambigs";
36 static const char kBuiltInTemplatesFileSuffix[] =
"inttemp";
37 static const char kBuiltInCutoffsFileSuffix[] =
"pffmtable";
38 static const char kNormProtoFileSuffix[] =
"normproto";
39 static const char kPuncDawgFileSuffix[] =
"punc-dawg";
40 static const char kSystemDawgFileSuffix[] =
"word-dawg";
41 static const char kNumberDawgFileSuffix[] =
"number-dawg";
42 static const char kFreqDawgFileSuffix[] =
"freq-dawg";
43 static const char kFixedLengthDawgsFileSuffix[] =
"fixed-length-dawgs";
44 static const char kCubeUnicharsetFileSuffix[] =
"cube-unicharset";
45 static const char kCubeSystemDawgFileSuffix[] =
"cube-word-dawg";
46 static const char kShapeTableFileSuffix[] =
"shapetable";
47 static const char kBigramDawgFileSuffix[] =
"bigram-dawg";
48 static const char kUnambigDawgFileSuffix[] =
"unambig-dawg";
49 static const char kParamsModelFileSuffix[] =
"params-model";
79 static const char *
const kTessdataFileSuffixes[] = {
80 kLangConfigFileSuffix,
81 kUnicharsetFileSuffix,
83 kBuiltInTemplatesFileSuffix,
84 kBuiltInCutoffsFileSuffix,
87 kSystemDawgFileSuffix,
88 kNumberDawgFileSuffix,
90 kFixedLengthDawgsFileSuffix,
91 kCubeUnicharsetFileSuffix,
92 kCubeSystemDawgFileSuffix,
93 kShapeTableFileSuffix,
94 kBigramDawgFileSuffix,
95 kUnambigDawgFileSuffix,
96 kParamsModelFileSuffix,
103 static const bool kTessdataFileIsText[] = {
130 static const int kMaxNumTessdataEntries = 1000;
137 actual_tessdata_num_entries_ = 0;
139 offset_table_[i] = -1;
149 bool Init(
const char *data_file_name,
int debug_level);
164 tprintf(
"TessdataManager: seek to offset %lld - start of tessdata"
165 "type %d (%s))\n", offset_table_[tessdata_type],
166 tessdata_type, kTessdataFileSuffixes[tessdata_type]);
168 if (offset_table_[tessdata_type] < 0) {
172 static_cast<size_t>(offset_table_[tessdata_type]),
179 int index = tessdata_type + 1;
180 while (index < actual_tessdata_num_entries_ && offset_table_[index] == -1) {
184 tprintf(
"TessdataManager: end offset for type %d is %lld\n",
186 (index == actual_tessdata_num_entries_) ? -1
187 : offset_table_[index]);
189 return (index == actual_tessdata_num_entries_) ? -1 : offset_table_[index] - 1;
193 if (data_file_ !=
NULL) {
206 const char *language_data_path_prefix,
215 const char *output_filename);
223 char **component_filenames,
224 int num_new_components);
243 static void CopyFile(FILE *input_file, FILE *output_file,
244 bool newline_end,
inT64 num_bytes_to_copy);
272 static FILE *GetFilePtr(
const char *language_data_path_prefix,
273 const char *file_suffix,
bool text_file);
288 inT32 actual_tessdata_num_entries_;
299 #endif // TESSERACT_CCUTIL_TESSDATAMANAGER_H_
FILE * GetDataFilePtr() const
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type, bool *text_file)
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type, bool *text_file)
inT64 GetEndOffset(TessdataType tessdata_type) const
bool OverwriteComponents(const char *new_traineddata_filename, char **component_filenames, int num_new_components)
bool ExtractToFile(const char *filename)
static bool CombineDataFiles(const char *language_data_path_prefix, const char *output_filename)
const STRING & GetDataFileName() const
static bool WriteMetadata(inT64 *offset_table, const char *language_data_path_prefix, FILE *output_file)
bool SeekToStart(TessdataType tessdata_type)
bool Init(const char *data_file_name, int debug_level)
static void CopyFile(FILE *input_file, FILE *output_file, bool newline_end, inT64 num_bytes_to_copy)