21 #include "config_auto.h" 52 data_file_name_ = data_file_name;
57 if (reader_ ==
nullptr) {
60 if (!(*reader_)(data_file_name, &data))
return false;
69 data_file_name_ = name;
74 swap_ = num_entries > kMaxNumTessdataEntries;
76 if (swap_)
ReverseN(&num_entries,
sizeof(num_entries));
77 if (num_entries > kMaxNumTessdataEntries)
return false;
80 if (!fp.
DeSerialize(&offset_table[0], num_entries))
return false;
82 if (offset_table[i] >= 0) {
83 int64_t entry_size = size - offset_table[i];
85 while (j < num_entries && offset_table[j] == -1) ++j;
86 if (j < num_entries) entry_size = offset_table[j] - offset_table[i];
88 if (!fp.
DeSerialize(&entries_[i][0], entry_size))
return false;
103 memcpy(&entries_[type][0], data, size);
112 if (writer ==
nullptr)
115 return (*writer)(data, filename);
123 int64_t offset =
sizeof(int32_t) +
sizeof(offset_table);
125 if (entries_[i].empty()) {
126 offset_table[i] = -1;
128 offset_table[i] = offset;
129 offset += entries_[i].
size();
139 if (!entries_[i].empty()) {
140 fp.
Serialize(&entries_[i][0], entries_[i].size());
158 if (!entries_[i].empty()) {
159 tprintf(
"%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i],
160 entries_[i].size(), offset);
161 offset += entries_[i].
size();
169 if (!is_loaded_ && !
Init(data_file_name_.
string()))
return false;
178 if (entries_[type].empty())
return false;
179 fp->
Open(&entries_[type][0], entries_[type].size());
197 const char *language_data_path_prefix,
198 const char *output_filename) {
203 STRING filename = language_data_path_prefix;
204 filename += kTessdataFileSuffixes[i];
205 FILE *fp = fopen(filename.
string(),
"rb");
219 "Error: traineddata file must contain at least (a unicharset file" 220 "and inttemp) OR an lstm file.\n");
224 return SaveFile(output_filename,
nullptr);
228 const char *new_traineddata_filename,
229 char **component_filenames,
230 int num_new_components) {
232 for (
int i = 0; i < num_new_components; ++i) {
236 tprintf(
"Failed to read component file:%s\n", component_filenames[i]);
243 return SaveFile(new_traineddata_filename,
nullptr);
250 if (entries_[type].empty())
return false;
257 if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) {
262 tprintf(
"TessdataManager can't determine which tessdata" 263 " component is represented by %s\n", suffix);
270 const char *suffix = strrchr(filename,
'.');
271 if (suffix ==
nullptr || *(++suffix) ==
'\0')
return false;
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type)
void SetVersionString(const std::string &v_str)
void resize_no_init(int size)
bool IsLSTMAvailable() const
bool CombineDataFiles(const char *language_data_path_prefix, const char *output_filename)
bool GetComponent(TessdataType type, TFile *fp)
void OpenWrite(GenericVector< char > *data)
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
void OverwriteEntry(TessdataType type, const char *data, int size)
const char * string() const
bool LoadMemBuffer(const char *name, const char *data, int size)
bool DeSerialize(char *data, size_t count=1)
void LoadFileLater(const char *data_file_name)
constexpr size_t countof(T const (&)[N]) noexcept
void ReverseN(void *ptr, int num_bytes)
bool OverwriteComponents(const char *new_traineddata_filename, char **component_filenames, int num_new_components)
void init_to_size(int size, const T &t)
bool Init(const char *data_file_name)
void set_swap(bool value)
bool Serialize(const char *data, size_t count=1)
DLLSYM void tprintf(const char *format,...)
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
bool Open(const STRING &filename, FileReader reader)
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type)
void Serialize(GenericVector< char > *data) const
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
std::string VersionString() const
bool ExtractToFile(const char *filename)
bool IsBaseAvailable() const
bool(* FileWriter)(const GenericVector< char > &data, const STRING &filename)