30 class_strings_ =
NULL;
31 unicharset_map_ =
NULL;
35 memset(hash_bin_size_, 0,
sizeof(hash_bin_size_));
39 if (class_strings_ !=
NULL) {
40 for (
int cls = 0; cls < class_cnt_; cls++) {
41 if (class_strings_[cls] !=
NULL) {
42 delete class_strings_[cls];
45 delete []class_strings_;
46 class_strings_ =
NULL;
48 delete []unicharset_map_;
57 if (char_set ==
NULL) {
62 bool cube_unicharset_exists;
63 if (!(cube_unicharset_exists =
66 fprintf(stderr,
"Cube ERROR (CharSet::Create): could not find "
67 "either cube or tesseract unicharset\n");
72 fprintf(stderr,
"Cube ERROR (CharSet::Create): could not load "
81 if (cube_unicharset_exists) {
84 loaded = loaded && char_set->LoadSupportedCharList(
86 char_set->unicharset_ = &char_set->cube_unicharset_;
88 loaded = char_set->LoadSupportedCharList(charset_fp,
NULL);
89 char_set->unicharset_ = tess_unicharset;
96 char_set->init_ =
true;
101 bool CharSet::LoadSupportedCharList(FILE *fp,
UNICHARSET *tess_unicharset) {
107 memset(hash_bin_size_, 0,
sizeof(hash_bin_size_));
109 if (fgets(str_line,
sizeof(str_line), fp) ==
NULL) {
110 fprintf(stderr,
"Cube ERROR (CharSet::InitMemory): could not "
111 "read char count.\n");
114 class_cnt_ = atoi(str_line);
115 if (class_cnt_ < 2) {
116 fprintf(stderr,
"Cube ERROR (CharSet::InitMemory): invalid "
117 "class count: %d\n", class_cnt_);
121 class_strings_ =
new string_32*[class_cnt_];
122 if (class_strings_ ==
NULL) {
123 fprintf(stderr,
"Cube ERROR (CharSet::InitMemory): could not "
124 "allocate memory for class strings.\n");
128 if (tess_unicharset) {
129 unicharset_map_ =
new int[class_cnt_];
130 if (unicharset_map_ ==
NULL) {
131 fprintf(stderr,
"Cube ERROR (CharSet::InitMemory): could not "
132 "allocate memory for unicharset map.\n");
138 for (
int class_id = 0; class_id < class_cnt_; class_id++) {
140 if (fgets(str_line,
sizeof(str_line), fp) ==
NULL) {
141 fprintf(stderr,
"Cube ERROR (CharSet::ReadAndHashStrings): "
142 "could not read class string with class_id=%d.\n", class_id);
146 char *p = strchr(str_line,
' ');
152 if (strcmp(str_line,
"NULL") == 0) {
153 strcpy(str_line,
" ");
156 class_strings_[class_id] =
new string_32(str32);
157 if (class_strings_[class_id] ==
NULL) {
158 fprintf(stderr,
"Cube ERROR (CharSet::ReadAndHashStrings): could not "
159 "allocate memory for class string with class_id=%d.\n", class_id);
164 int hash_val = Hash(reinterpret_cast<const char_32 *>(str32.c_str()));
165 if (hash_bin_size_[hash_val] >= kMaxHashSize) {
166 fprintf(stderr,
"Cube ERROR (CharSet::LoadSupportedCharList): hash "
170 hash_bins_[hash_val][hash_bin_size_[hash_val]++] = class_id;
172 if (tess_unicharset !=
NULL) {
175 if (tess_id == INVALID_UNICHAR_ID) {
180 unicharset_map_[class_id] = tess_id;
FILE * GetDataFilePtr() const
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
basic_string< char_32 > string_32
bool load_from_file(const char *const filename, bool skip_fragments)
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
void unichar_insert(const char *const unichar_repr)
bool SeekToStart(TessdataType tessdata_type)
static CharSet * Create(TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)