21 #ifndef TESSERACT_CCUTIL_AMBIGS_H_ 22 #define TESSERACT_CCUTIL_AMBIGS_H_ 30 #define MAX_AMBIG_SIZE 10 36 static const int kUnigramAmbigsBufferSize = 1000;
37 static const char kAmbigNgramSeparator[] = {
' ',
'\0' };
38 static const char kAmbigDelimiters[] =
"\t ";
39 static const char kIllegalMsg[] =
40 "Illegal ambiguity specification on line %d\n";
41 static const char kIllegalUnicharMsg[] =
42 "Illegal unichar %s in ambiguity specification\n";
67 if (val1 == INVALID_UNICHAR_ID)
return -1;
68 if (val2 == INVALID_UNICHAR_ID)
return 1;
69 if (val1 < val2)
return -1;
72 if (val1 == INVALID_UNICHAR_ID)
return 0;
80 for (
int i = 0; i < uid_vec.
size(); ++i)
81 if (uid_vec[i] == uid)
return i;
92 }
while (dst[i++] != INVALID_UNICHAR_ID);
101 if (*ptr == INVALID_UNICHAR_ID)
tprintf(
"[Empty]");
102 while (*ptr != INVALID_UNICHAR_ID) {
107 while (*ptr != INVALID_UNICHAR_ID)
tprintf(
"%d ", *ptr++);
126 if (result != 0)
return result;
147 replace_ambigs_.delete_data_pointers();
148 dang_ambigs_.delete_data_pointers();
149 one_to_one_definite_ambigs_.delete_data_pointers();
156 void InitUnicharAmbigs(
const UNICHARSET& unicharset,
157 bool use_ambigs_for_adaption);
174 void LoadUnicharAmbigs(
const UNICHARSET& encoder_set,
175 TFile *ambigs_file,
int debug_level,
176 bool use_ambigs_for_adaption,
UNICHARSET *unicharset);
181 if (one_to_one_definite_ambigs_.empty())
return nullptr;
182 return one_to_one_definite_ambigs_[unichar_id];
192 if (ambigs_for_adaption_.empty())
return nullptr;
193 return ambigs_for_adaption_[unichar_id];
201 if (reverse_ambigs_for_adaption_.empty())
return nullptr;
202 return reverse_ambigs_for_adaption_[unichar_id];
206 bool ParseAmbiguityLine(
int line_num,
int version,
int debug_level,
208 int *test_ambig_part_size,
210 int *replacement_ambig_part_size,
211 char *replacement_string,
int *type);
213 int test_ambig_part_size,
UNICHAR_ID *test_unichar_ids,
214 int replacement_ambig_part_size,
215 const char *replacement_string,
int type,
227 #endif // TESSERACT_CCUTIL_AMBIGS_H_
#define ELISTIZEH(CLASSNAME)
static int compare_ambig_specs(const void *spec1, const void *spec2)
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
static int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2)
const UnicharIdVector * OneToOneDefiniteAmbigs(UNICHAR_ID unichar_id) const
static int find_in(const UnicharIdVector &uid_vec, const UNICHAR_ID uid)
static void print(const UNICHAR_ID array[], const UNICHARSET &unicharset)
static int copy(const UNICHAR_ID src[], UNICHAR_ID dst[])
const UnicharAmbigsVector & dang_ambigs() const
DLLSYM void tprintf(const char *format,...)
UNICHAR_ID correct_ngram_id
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE+1]
const char * id_to_unichar(UNICHAR_ID id) const
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE+1]
const UnicharAmbigsVector & replace_ambigs() const