40 void WordListLangModel::Cleanup() {
49 bool WordListLangModel::Init() {
75 if (Init() ==
false) {
86 if (tess_lm_edge ==
NULL) {
89 edge_ref = tess_lm_edge->
EndEdge();
100 if (edge_array ==
NULL) {
106 edge_array + (*edge_cnt));
123 vector<WERD_CHOICE *> *word_variants) {
124 int str_len = str32.length();
126 if (word_so_far->
length() > 0) {
127 word_variants->push_back(
new WERD_CHOICE(*word_so_far));
131 for (
int len = 1; len <= str_len; len++) {
133 string_32 str_pref32 = str32.substr(0, len);
134 int class_id = char_set.
ClassID(reinterpret_cast<const char_32 *>(
135 str_pref32.c_str()));
139 string_32 new_prefix_str32 = prefix_str32 + str_pref32;
142 WordVariants(char_set, new_prefix_str32, word_so_far, new_str32,
155 vector<WERD_CHOICE *> *word_variants) {
156 for (
int i = 0; i < word_variants->size(); i++) {
157 delete (*word_variants)[i];
159 word_variants->clear();
162 WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants);
167 if (!init_ && !Init()) {
173 if (str32.length() < 1) {
181 if (char_32_ptr ==
NULL) {
185 vector<WERD_CHOICE *> word_variants;
187 char_32_ptr, &word_variants);
189 if (word_variants.size() > 0) {
191 int shortest_word = 0;
192 for (
int word = 1; word < word_variants.size(); word++) {
193 if (word_variants[shortest_word]->length() >
194 word_variants[word]->length()) {
195 shortest_word = word;
201 for (
int i = 0; i < word_variants.size(); i++) {
delete word_variants[i]; }
void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
bool AddString32(const char_32 *char_32_ptr)
bool IsValidSequence(const char_32 *sequence, bool eow_flag, LangModEdge **edges)
basic_string< char_32 > string_32
WordListLangModel(CubeRecoContext *cntxt)
bool add_word_to_dawg(const WERD_CHOICE &word, const GenericVector< bool > *repetitions)
static int CreateChildren(CubeRecoContext *cntxt, const Dawg *edges, NODE_REF edge_reg, LangModEdge **lm_edges)
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
int ClassID(const char_32 *str) const
void remove_last_unichar_id()
const UNICHARSET * TessUnicharset() const
LangModEdge ** GetEdges(CharAltList *alt_list, LangModEdge *edge, int *edge_cnt)
static void WordVariants(const CharSet &char_set, const UNICHARSET *uchset, string_32 str32, vector< WERD_CHOICE * > *word_variants)
NODE_REF next_node(EDGE_REF edge_ref) const
CharSet * CharacterSet() const
bool AddString(const char *char_ptr)