#include <tessdatamanager.h>
|
| TessdataManager () |
|
| TessdataManager (FileReader reader) |
|
| ~TessdataManager ()=default |
|
bool | swap () const |
|
bool | is_loaded () const |
|
void | LoadFileLater (const char *data_file_name) |
|
bool | Init (const char *data_file_name) |
|
bool | LoadMemBuffer (const char *name, const char *data, int size) |
|
void | OverwriteEntry (TessdataType type, const char *data, int size) |
|
bool | SaveFile (const STRING &filename, FileWriter writer) const |
|
void | Serialize (GenericVector< char > *data) const |
|
void | Clear () |
|
void | Directory () const |
|
bool | IsComponentAvailable (TessdataType type) const |
|
bool | GetComponent (TessdataType type, TFile *fp) |
|
bool | GetComponent (TessdataType type, TFile *fp) const |
|
std::string | VersionString () const |
|
void | SetVersionString (const std::string &v_str) |
|
bool | IsBaseAvailable () const |
|
bool | IsLSTMAvailable () const |
|
const STRING & | GetDataFileName () const |
|
bool | CombineDataFiles (const char *language_data_path_prefix, const char *output_filename) |
|
bool | OverwriteComponents (const char *new_traineddata_filename, char **component_filenames, int num_new_components) |
|
bool | ExtractToFile (const char *filename) |
|
Definition at line 126 of file tessdatamanager.h.
◆ TessdataManager() [1/2]
tesseract::TessdataManager::TessdataManager |
( |
| ) |
|
Definition at line 37 of file tessdatamanager.cpp.
37 : reader_(
nullptr), is_loaded_(
false), swap_(
false) {
void SetVersionString(const std::string &v_str)
◆ TessdataManager() [2/2]
tesseract::TessdataManager::TessdataManager |
( |
FileReader |
reader | ) |
|
|
explicit |
◆ ~TessdataManager()
tesseract::TessdataManager::~TessdataManager |
( |
| ) |
|
|
default |
◆ Clear()
void tesseract::TessdataManager::Clear |
( |
| ) |
|
◆ CombineDataFiles()
bool tesseract::TessdataManager::CombineDataFiles |
( |
const char * |
language_data_path_prefix, |
|
|
const char * |
output_filename |
|
) |
| |
Reads all the standard tesseract config and data files for a language at the given path and bundles them up into one binary data file. Returns true if the combined traineddata file was successfully written.
Definition at line 196 of file tessdatamanager.cpp.
203 STRING filename = language_data_path_prefix;
204 filename += kTessdataFileSuffixes[i];
205 FILE *fp = fopen(filename.
string(),
"rb");
219 "Error: traineddata file must contain at least (a unicharset file" 220 "and inttemp) OR an lstm file.\n");
224 return SaveFile(output_filename,
nullptr);
bool IsLSTMAvailable() const
const char * string() const
DLLSYM void tprintf(const char *format,...)
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type)
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
bool IsBaseAvailable() const
◆ Directory()
void tesseract::TessdataManager::Directory |
( |
| ) |
const |
Definition at line 154 of file tessdatamanager.cpp.
158 if (!entries_[i].empty()) {
159 tprintf(
"%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i],
160 entries_[i].size(), offset);
161 offset += entries_[i].
size();
DLLSYM void tprintf(const char *format,...)
std::string VersionString() const
◆ ExtractToFile()
bool tesseract::TessdataManager::ExtractToFile |
( |
const char * |
filename | ) |
|
Extracts tessdata component implied by the name of the input file from the combined traineddata loaded into TessdataManager. Writes the extracted component to the file indicated by the file name. E.g. if the filename given is somepath/somelang.unicharset, unicharset will be extracted from the data loaded into the TessdataManager and will be written to somepath/somelang.unicharset.
- Returns
- true if the component was successfully extracted, false if the component was not present in the traineddata loaded into TessdataManager.
Definition at line 246 of file tessdatamanager.cpp.
250 if (entries_[type].empty())
return false;
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type)
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
◆ GetComponent() [1/2]
Definition at line 168 of file tessdatamanager.cpp.
169 if (!is_loaded_ && !
Init(data_file_name_.
string()))
return false;
171 return const_this->GetComponent(type, fp);
const char * string() const
bool Init(const char *data_file_name)
◆ GetComponent() [2/2]
bool tesseract::TessdataManager::GetComponent |
( |
TessdataType |
type, |
|
|
TFile * |
fp |
|
) |
| const |
Definition at line 176 of file tessdatamanager.cpp.
178 if (entries_[type].empty())
return false;
179 fp->Open(&entries_[type][0], entries_[type].size());
◆ GetDataFileName()
const STRING& tesseract::TessdataManager::GetDataFileName |
( |
| ) |
const |
|
inline |
◆ Init()
bool tesseract::TessdataManager::Init |
( |
const char * |
data_file_name | ) |
|
Opens and reads the given data file right now.
- Returns
- true on success.
Definition at line 55 of file tessdatamanager.cpp.
57 if (reader_ ==
nullptr) {
60 if (!(*reader_)(data_file_name, &data))
return false;
bool LoadMemBuffer(const char *name, const char *data, int size)
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
◆ is_loaded()
bool tesseract::TessdataManager::is_loaded |
( |
| ) |
const |
|
inline |
◆ IsBaseAvailable()
bool tesseract::TessdataManager::IsBaseAvailable |
( |
| ) |
const |
|
inline |
◆ IsComponentAvailable()
bool tesseract::TessdataManager::IsComponentAvailable |
( |
TessdataType |
type | ) |
const |
|
inline |
◆ IsLSTMAvailable()
bool tesseract::TessdataManager::IsLSTMAvailable |
( |
| ) |
const |
|
inline |
◆ LoadFileLater()
void tesseract::TessdataManager::LoadFileLater |
( |
const char * |
data_file_name | ) |
|
◆ LoadMemBuffer()
bool tesseract::TessdataManager::LoadMemBuffer |
( |
const char * |
name, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 66 of file tessdatamanager.cpp.
69 data_file_name_ = name;
73 if (!fp.DeSerialize(&num_entries))
return false;
74 swap_ = num_entries > kMaxNumTessdataEntries;
76 if (swap_)
ReverseN(&num_entries,
sizeof(num_entries));
77 if (num_entries > kMaxNumTessdataEntries)
return false;
80 if (!fp.DeSerialize(&offset_table[0], num_entries))
return false;
82 if (offset_table[i] >= 0) {
83 int64_t entry_size = size - offset_table[i];
85 while (j < num_entries && offset_table[j] == -1) ++j;
86 if (j < num_entries) entry_size = offset_table[j] - offset_table[i];
88 if (!fp.DeSerialize(&entries_[i][0], entry_size))
return false;
void SetVersionString(const std::string &v_str)
void resize_no_init(int size)
void ReverseN(void *ptr, int num_bytes)
◆ OverwriteComponents()
bool tesseract::TessdataManager::OverwriteComponents |
( |
const char * |
new_traineddata_filename, |
|
|
char ** |
component_filenames, |
|
|
int |
num_new_components |
|
) |
| |
Gets the individual components from the data_file_ with which the class was initialized. Overwrites the components specified by component_filenames. Writes the updated traineddata file to new_traineddata_filename.
Definition at line 227 of file tessdatamanager.cpp.
232 for (
int i = 0; i < num_new_components; ++i) {
236 tprintf(
"Failed to read component file:%s\n", component_filenames[i]);
243 return SaveFile(new_traineddata_filename,
nullptr);
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type)
DLLSYM void tprintf(const char *format,...)
bool SaveFile(const STRING &filename, FileWriter writer) const
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
◆ OverwriteEntry()
void tesseract::TessdataManager::OverwriteEntry |
( |
TessdataType |
type, |
|
|
const char * |
data, |
|
|
int |
size |
|
) |
| |
Definition at line 99 of file tessdatamanager.cpp.
103 memcpy(&entries_[type][0], data, size);
void resize_no_init(int size)
◆ SaveFile()
bool tesseract::TessdataManager::SaveFile |
( |
const STRING & |
filename, |
|
|
FileWriter |
writer |
|
) |
| const |
Definition at line 107 of file tessdatamanager.cpp.
112 if (writer ==
nullptr)
115 return (*writer)(data, filename);
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
void Serialize(GenericVector< char > *data) const
◆ Serialize()
void tesseract::TessdataManager::Serialize |
( |
GenericVector< char > * |
data | ) |
const |
Definition at line 119 of file tessdatamanager.cpp.
123 int64_t offset =
sizeof(int32_t) +
sizeof(offset_table);
125 if (entries_[i].empty()) {
126 offset_table[i] = -1;
128 offset_table[i] = offset;
129 offset += entries_[i].
size();
136 fp.Serialize(&num_entries);
137 fp.Serialize(&offset_table[0],
countof(offset_table));
139 if (!entries_[i].empty()) {
140 fp.Serialize(&entries_[i][0], entries_[i].size());
constexpr size_t countof(T const (&)[N]) noexcept
void init_to_size(int size, const T &t)
◆ SetVersionString()
void tesseract::TessdataManager::SetVersionString |
( |
const std::string & |
v_str | ) |
|
◆ swap()
bool tesseract::TessdataManager::swap |
( |
| ) |
const |
|
inline |
◆ TessdataTypeFromFileName()
bool tesseract::TessdataManager::TessdataTypeFromFileName |
( |
const char * |
filename, |
|
|
TessdataType * |
type |
|
) |
| |
|
static |
Tries to determine tessdata component file suffix from filename, returns true on success.
Definition at line 267 of file tessdatamanager.cpp.
270 const char *suffix = strrchr(filename,
'.');
271 if (suffix ==
nullptr || *(++suffix) ==
'\0')
return false;
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type)
◆ TessdataTypeFromFileSuffix()
bool tesseract::TessdataManager::TessdataTypeFromFileSuffix |
( |
const char * |
suffix, |
|
|
TessdataType * |
type |
|
) |
| |
|
static |
Fills type with TessdataType of the tessdata component represented by the given file name. E.g. tessdata/eng.unicharset -> TESSDATA_UNICHARSET.
- Returns
- true if the tessdata component type could be determined from the given file name.
Definition at line 254 of file tessdatamanager.cpp.
257 if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) {
262 tprintf(
"TessdataManager can't determine which tessdata" 263 " component is represented by %s\n", suffix);
DLLSYM void tprintf(const char *format,...)
◆ VersionString()
std::string tesseract::TessdataManager::VersionString |
( |
| ) |
const |
The documentation for this class was generated from the following files: