#include <tessdatamanager.h>
|
| TessdataManager () |
|
| TessdataManager (FileReader reader) |
|
| ~TessdataManager ()=default |
|
bool | swap () const |
|
bool | is_loaded () const |
|
void | LoadFileLater (const char *data_file_name) |
|
bool | Init (const char *data_file_name) |
|
bool | LoadMemBuffer (const char *name, const char *data, int size) |
|
void | OverwriteEntry (TessdataType type, const char *data, int size) |
|
bool | SaveFile (const STRING &filename, FileWriter writer) const |
|
void | Serialize (GenericVector< char > *data) const |
|
void | Clear () |
|
void | Directory () const |
|
bool | IsComponentAvailable (TessdataType type) const |
|
bool | GetComponent (TessdataType type, TFile *fp) |
|
bool | GetComponent (TessdataType type, TFile *fp) const |
|
std::string | VersionString () const |
|
void | SetVersionString (const std::string &v_str) |
|
bool | IsBaseAvailable () const |
|
bool | IsLSTMAvailable () const |
|
const STRING & | GetDataFileName () const |
|
bool | CombineDataFiles (const char *language_data_path_prefix, const char *output_filename) |
|
bool | OverwriteComponents (const char *new_traineddata_filename, char **component_filenames, int num_new_components) |
|
bool | ExtractToFile (const char *filename) |
|
Definition at line 126 of file tessdatamanager.h.
◆ TessdataManager() [1/2]
tesseract::TessdataManager::TessdataManager |
( |
| ) |
|
Definition at line 42 of file tessdatamanager.cpp.
42 : reader_(
nullptr), is_loaded_(
false), swap_(
false) {
◆ TessdataManager() [2/2]
tesseract::TessdataManager::TessdataManager |
( |
FileReader |
reader | ) |
|
|
explicit |
◆ ~TessdataManager()
tesseract::TessdataManager::~TessdataManager |
( |
| ) |
|
|
default |
◆ Clear()
void tesseract::TessdataManager::Clear |
( |
| ) |
|
◆ CombineDataFiles()
bool tesseract::TessdataManager::CombineDataFiles |
( |
const char * |
language_data_path_prefix, |
|
|
const char * |
output_filename |
|
) |
| |
Reads all the standard tesseract config and data files for a language at the given path and bundles them up into one binary data file. Returns true if the combined traineddata file was successfully written.
Definition at line 244 of file tessdatamanager.cpp.
248 for (
auto filesuffix : kTessdataFileSuffixes) {
251 STRING filename = language_data_path_prefix;
252 filename += filesuffix;
253 FILE *fp = fopen(filename.
c_str(),
"rb");
267 "Error: traineddata file must contain at least (a unicharset file"
268 "and inttemp) OR an lstm file.\n");
272 return SaveFile(output_filename,
nullptr);
◆ Directory()
void tesseract::TessdataManager::Directory |
( |
| ) |
const |
Definition at line 202 of file tessdatamanager.cpp.
206 if (!entries_[i].empty()) {
207 tprintf(
"%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i],
208 entries_[i].size(), offset);
209 offset += entries_[i].
size();
◆ ExtractToFile()
bool tesseract::TessdataManager::ExtractToFile |
( |
const char * |
filename | ) |
|
Extracts tessdata component implied by the name of the input file from the combined traineddata loaded into TessdataManager. Writes the extracted component to the file indicated by the file name. E.g. if the filename given is somepath/somelang.unicharset, unicharset will be extracted from the data loaded into the TessdataManager and will be written to somepath/somelang.unicharset.
- Returns
- true if the component was successfully extracted, false if the component was not present in the traineddata loaded into TessdataManager.
Definition at line 295 of file tessdatamanager.cpp.
298 tesseract::TessdataManager::TessdataTypeFromFileName(filename, &
type));
299 if (entries_[
type].empty())
return false;
◆ GetComponent() [1/2]
◆ GetComponent() [2/2]
bool tesseract::TessdataManager::GetComponent |
( |
TessdataType |
type, |
|
|
TFile * |
fp |
|
) |
| const |
◆ GetDataFileName()
const STRING& tesseract::TessdataManager::GetDataFileName |
( |
| ) |
const |
|
inline |
◆ Init()
bool tesseract::TessdataManager::Init |
( |
const char * |
data_file_name | ) |
|
Opens and reads the given data file right now.
- Returns
- true on success.
Definition at line 97 of file tessdatamanager.cpp.
99 if (reader_ ==
nullptr) {
100 #if defined(HAVE_LIBARCHIVE)
101 if (LoadArchiveFile(data_file_name))
return true;
105 if (!(*reader_)(data_file_name, &data))
return false;
◆ is_loaded()
bool tesseract::TessdataManager::is_loaded |
( |
| ) |
const |
|
inline |
◆ IsBaseAvailable()
bool tesseract::TessdataManager::IsBaseAvailable |
( |
| ) |
const |
|
inline |
◆ IsComponentAvailable()
bool tesseract::TessdataManager::IsComponentAvailable |
( |
TessdataType |
type | ) |
const |
|
inline |
◆ IsLSTMAvailable()
bool tesseract::TessdataManager::IsLSTMAvailable |
( |
| ) |
const |
|
inline |
◆ LoadFileLater()
void tesseract::TessdataManager::LoadFileLater |
( |
const char * |
data_file_name | ) |
|
◆ LoadMemBuffer()
bool tesseract::TessdataManager::LoadMemBuffer |
( |
const char * |
name, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 111 of file tessdatamanager.cpp.
115 data_file_name_ = name;
118 uint32_t num_entries;
119 if (!fp.DeSerialize(&num_entries))
return false;
120 swap_ = num_entries > kMaxNumTessdataEntries;
122 if (swap_)
ReverseN(&num_entries,
sizeof(num_entries));
123 if (num_entries > kMaxNumTessdataEntries)
return false;
126 if (!fp.DeSerialize(&offset_table[0], num_entries))
return false;
128 if (offset_table[i] >= 0) {
129 int64_t entry_size = size - offset_table[i];
131 while (j < num_entries && offset_table[j] == -1) ++j;
132 if (j < num_entries) entry_size = offset_table[j] - offset_table[i];
134 if (!fp.DeSerialize(&entries_[i][0], entry_size))
return false;
◆ OverwriteComponents()
bool tesseract::TessdataManager::OverwriteComponents |
( |
const char * |
new_traineddata_filename, |
|
|
char ** |
component_filenames, |
|
|
int |
num_new_components |
|
) |
| |
Gets the individual components from the data_file_ with which the class was initialized. Overwrites the components specified by component_filenames. Writes the updated traineddata file to new_traineddata_filename.
Definition at line 275 of file tessdatamanager.cpp.
281 for (
int i = 0; i < num_new_components; ++i) {
283 if (TessdataTypeFromFileName(component_filenames[i], &
type)) {
285 tprintf(
"Failed to read component file:%s\n", component_filenames[i]);
292 return SaveFile(new_traineddata_filename,
nullptr);
◆ OverwriteEntry()
void tesseract::TessdataManager::OverwriteEntry |
( |
TessdataType |
type, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
◆ SaveFile()
bool tesseract::TessdataManager::SaveFile |
( |
const STRING & |
filename, |
|
|
FileWriter |
writer |
|
) |
| const |
◆ Serialize()
void tesseract::TessdataManager::Serialize |
( |
GenericVector< char > * |
data | ) |
const |
Definition at line 166 of file tessdatamanager.cpp.
171 int64_t offset =
sizeof(int32_t) +
sizeof(offset_table);
173 if (entries_[i].empty()) {
174 offset_table[i] = -1;
176 offset_table[i] = offset;
177 offset += entries_[i].
size();
184 fp.Serialize(&num_entries);
185 fp.Serialize(&offset_table[0],
countof(offset_table));
186 for (
const auto& entry : entries_) {
187 if (!entry.empty()) {
188 fp.Serialize(&entry[0], entry.size());
◆ SetVersionString()
void tesseract::TessdataManager::SetVersionString |
( |
const std::string & |
v_str | ) |
|
◆ swap()
bool tesseract::TessdataManager::swap |
( |
| ) |
const |
|
inline |
◆ VersionString()
std::string tesseract::TessdataManager::VersionString |
( |
| ) |
const |
The documentation for this class was generated from the following files: