138 if (cube_alt_list ==
NULL || cube_alt_list->AltCount() <= 0)
142 char_32 *cube_best_str32 = cube_alt_list->Alt(0);
145 string cube_best_str;
146 int cube_best_cost = cube_alt_list->AltCost(0);
147 int cube_best_bigram_cost = 0;
148 bool cube_best_bigram_cost_valid =
true;
150 cube_best_bigram_cost = cube_cntxt_->
Bigrams()->
153 cube_best_bigram_cost_valid =
false;
161 *agreement = (tess_str.compare(cube_best_str) == 0);
165 string cube_next_best_str;
167 if (cube_alt_list->AltCount() > 1) {
168 cube_next_best_str32 = cube_alt_list->Alt(1);
169 if (cube_next_best_str32 ==
NULL ||
173 cube_next_best_cost = cube_alt_list->AltCost(1);
178 for (tess_rank = 0; tess_rank < cube_alt_list->AltCount(); tess_rank++) {
181 if (alt_str == tess_str)
187 int tess_cost = cube_obj->WordCost(tess_str.c_str());
189 int tess_bigram_cost = 0;
190 int tess_bigram_cost_valid =
true;
192 tess_bigram_cost = cube_cntxt_->
Bigrams()->
195 tess_bigram_cost_valid =
false;
198 features->push_back(tess_confidence);
200 features->push_back(tess_cost);
202 features->push_back(tess_rank);
204 features->push_back(tess_str.length());
206 features->push_back(
ValidWord(tess_str));
207 if (tess_bigram_cost_valid) {
209 features->push_back(tess_bigram_cost);
212 features->push_back(cube_best_cost);
214 features->push_back(cube_next_best_cost);
216 features->push_back(cube_best_str.length());
218 features->push_back(
ValidWord(cube_best_str));
219 if (cube_best_bigram_cost_valid) {
221 features->push_back(cube_best_bigram_cost);
224 int compare_nocase_punc = CompareStrings(cube_best_str,
225 tess_str,
false,
true);
226 features->push_back(compare_nocase_punc == 0);
228 int compare_case_nopunc = CompareStrings(cube_best_str,
229 tess_str,
true,
false);
230 features->push_back(compare_case_nopunc == 0);
232 int compare_nocase_nopunc = CompareStrings(cube_best_str,
233 tess_str,
true,
true);
234 features->push_back(compare_nocase_nopunc == 0);
basic_string< char_32 > string_32
bool ValidWord(const string &str)
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
CharBigrams * Bigrams() const
static int StrLen(const char_32 *str)
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
CharSet * CharacterSet() const