42 5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
50 if (
code == null_char) {
59 if (depth > 0 &&
prev !=
nullptr) {
61 prev->
Print(null_char, unicharset, depth - 1);
69 int null_char,
bool simple_text,
Dict* dict)
75 space_delimited_(true),
76 is_simple_text_(simple_text),
77 null_char_(null_char) {
83 double cert_offset,
double worst_dict_cert,
84 const UNICHARSET* charset,
int lstm_choice_mode) {
86 int width = output.
Width();
89 for (
int t = 0; t < width; ++t) {
90 ComputeTopN(output.
f(t), output.
NumFeatures(), kBeamWidths[0]);
91 DecodeStep(output.
f(t), t, dict_ratio, cert_offset, worst_dict_cert,
93 if (lstm_choice_mode) {
94 SaveMostCertainChoices(output.
f(t), output.
NumFeatures(), charset, t);
99 double dict_ratio,
double cert_offset,
100 double worst_dict_cert,
103 int width = output.
dim1();
104 for (
int t = 0; t < width; ++t) {
105 ComputeTopN(output[t], output.
dim2(), kBeamWidths[0]);
106 DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
110 void RecodeBeamSearch::SaveMostCertainChoices(
const float* outputs,
114 std::vector<std::pair<const char*, float>> choices;
116 for (
int i = 0; i < num_outputs; ++i) {
117 if (outputs[i] >= 0.01f) {
119 if (i + 2 >= num_outputs) {
129 while (choices.size() > pos && choices[pos].second > outputs[i]) {
132 choices.insert(choices.begin() + pos,
133 std::pair<const char*, float>(
character, outputs[i]));
145 ExtractBestPaths(&best_nodes,
nullptr);
148 int width = best_nodes.
size();
150 int label = best_nodes[t]->code;
151 if (label != null_char_) {
155 while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
168 ExtractBestPaths(&best_nodes,
nullptr);
169 ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
171 DebugPath(unicharset, best_nodes);
172 DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
179 float scale_factor,
bool debug,
182 int lstm_choice_mode) {
190 std::deque<std::pair<int,int>> best_choices;
191 ExtractBestPaths(&best_nodes, &second_nodes);
193 DebugPath(unicharset, best_nodes);
194 ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
196 tprintf(
"\nSecond choice path:\n");
197 DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
204 if (lstm_choice_mode == 2) {
205 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
206 &xcoords, &best_choices);
207 if (best_choices.size() > 0) {
208 current_char = best_choices.front().first;
209 timestepEnd = best_choices.front().second;
210 best_choices.pop_front();
213 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
216 int num_ids = unichar_ids.
size();
218 DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
223 float prev_space_cert = 0.0f;
224 for (
int word_start = 0; word_start < num_ids; word_start = word_end) {
225 for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
230 int index = xcoords[word_end];
231 if (best_nodes[index]->start_of_word)
break;
237 float space_cert = 0.0f;
238 if (word_end < num_ids && unichar_ids[word_end] ==
UNICHAR_SPACE)
239 space_cert = certs[word_end];
241 word_start > 0 && unichar_ids[word_start - 1] ==
UNICHAR_SPACE;
243 WERD_RES* word_res = InitializeWord(
244 leading_space, line_box, word_start, word_end,
245 std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
246 if (lstm_choice_mode == 1) {
247 for (
size_t i = timestepEnd; i < xcoords[word_end]; i++) {
250 timestepEnd = xcoords[word_end];
251 }
else if (lstm_choice_mode == 2) {
253 std::vector<std::pair<const char*, float>> choice_pairs;
254 for (
size_t i = timestepEnd; i < xcoords[word_end]; i++) {
255 for (std::pair<const char*, float> choice :
timesteps[i]) {
256 if (std::strcmp(choice.first,
"") != 0) {
257 sum += choice.second;
258 choice_pairs.push_back(choice);
261 if ((best_choices.size() > 0 && i == best_choices.front().second - 1)
262 || i == xcoords[word_end]-1) {
263 std::map<const char*, float> summed_propabilities;
264 for (
auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) {
265 summed_propabilities[it->first] += it->second;
267 std::vector<std::pair<const char*, float>> accumulated_timestep;
268 accumulated_timestep.push_back(std::pair<const char*,float>
270 (current_char), 2.0));
272 for (
auto it = summed_propabilities.begin();
273 it != summed_propabilities.end(); ++it) {
277 while (accumulated_timestep.size() > pos
278 && accumulated_timestep[pos].second > it->second) {
281 accumulated_timestep.insert(accumulated_timestep.begin() + pos,
282 std::pair<const char*,float>(it->first,
285 if (best_choices.size() > 0) {
286 current_char = best_choices.front().first;
287 best_choices.pop_front();
289 choice_pairs.clear();
290 word_res->
timesteps.push_back(accumulated_timestep);
294 timestepEnd = xcoords[word_end];
296 for (
int i = word_start; i < word_end; ++i) {
297 BLOB_CHOICE_LIST* choices =
new BLOB_CHOICE_LIST;
298 BLOB_CHOICE_IT bc_it(choices);
300 unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
302 int col = i - word_start;
304 bc_it.add_after_then_move(choice);
307 int index = xcoords[word_end - 1];
310 prev_space_cert = space_cert;
311 if (word_end < num_ids && unichar_ids[word_end] ==
UNICHAR_SPACE)
318 for (
int p = 0; p < beam_size_; ++p) {
319 for (
int d = 0; d < 2; ++d) {
320 for (
int c = 0; c <
NC_COUNT; ++c) {
323 if (beam_[p]->beams_[index].empty())
continue;
325 tprintf(
"Position %d: %s+%s beam\n", p, d ?
"Dict" :
"Non-Dict",
327 DebugBeamPos(unicharset, beam_[p]->beams_[index]);
334 void RecodeBeamSearch::DebugBeamPos(
const UNICHARSET& unicharset,
339 int heap_size = heap.
size();
340 for (
int i = 0; i < heap_size; ++i) {
343 if (null_best ==
nullptr || null_best->
score < node->
score) null_best = node;
345 if (unichar_bests[node->
unichar_id] ==
nullptr ||
351 for (
int u = 0; u < unichar_bests.
size(); ++u) {
352 if (unichar_bests[u] !=
nullptr) {
353 const RecodeNode& node = *unichar_bests[u];
354 node.Print(null_char_, unicharset, 1);
357 if (null_best !=
nullptr) {
358 null_best->
Print(null_char_, unicharset, 1);
365 void RecodeBeamSearch::ExtractPathAsUnicharIds(
369 std::deque<std::pair<int, int>>* best_choices) {
376 int width = best_nodes.
size();
378 double certainty = 0.0;
380 while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
381 double cert = best_nodes[t++]->certainty;
382 if (cert < certainty) certainty = cert;
386 int unichar_id = best_nodes[t]->unichar_id;
388 best_nodes[t]->permuter !=
NO_PERM) {
391 if (certainty < certs->back()) certs->
back() = certainty;
392 ratings->
back() += rating;
398 if (best_choices !=
nullptr) {
399 best_choices->push_back(std::pair<int, int>(unichar_id, t));
402 double cert = best_nodes[t++]->certainty;
406 best_nodes[t - 1]->permuter ==
NO_PERM)) {
410 }
while (t < width && best_nodes[t]->duplicate);
413 }
else if (!certs->
empty()) {
414 if (certainty < certs->back()) certs->
back() = certainty;
415 ratings->
back() += rating;
423 WERD_RES* RecodeBeamSearch::InitializeWord(
bool leading_space,
424 const TBOX& line_box,
int word_start,
425 int word_end,
float space_certainty,
428 float scale_factor) {
431 C_BLOB_IT b_it(&blobs);
432 for (
int i = word_start; i < word_end; ++i) {
433 int min_half_width = xcoords[i + 1] - xcoords[i];
434 if (i > 0 && xcoords[i] - xcoords[i - 1] < min_half_width)
435 min_half_width = xcoords[i] - xcoords[i - 1];
436 if (min_half_width < 1) min_half_width = 1;
438 TBOX box(xcoords[i] - min_half_width, 0, xcoords[i] + min_half_width,
440 box.
scale(scale_factor);
442 box.set_top(line_box.
top());
446 WERD* word =
new WERD(&blobs, leading_space,
nullptr);
449 word_res->
uch_set = unicharset;
458 void RecodeBeamSearch::ComputeTopN(
const float* outputs,
int num_outputs,
464 for (
int i = 0; i < num_outputs; ++i) {
465 if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key) {
466 TopPair entry(outputs[i], i);
467 top_heap_.Push(&entry);
468 if (top_heap_.size() > top_n) top_heap_.Pop(&entry);
471 while (!top_heap_.empty()) {
473 top_heap_.Pop(&entry);
474 if (top_heap_.size() > 1) {
475 top_n_flags_[entry.data] =
TN_TOPN;
477 top_n_flags_[entry.data] =
TN_TOP2;
478 if (top_heap_.empty())
479 top_code_ = entry.data;
481 second_code_ = entry.data;
484 top_n_flags_[null_char_] =
TN_TOP2;
490 void RecodeBeamSearch::DecodeStep(
const float* outputs,
int t,
491 double dict_ratio,
double cert_offset,
492 double worst_dict_cert,
495 RecodeBeam* step = beam_[t];
501 dict_ratio, cert_offset, worst_dict_cert, step);
502 if (dict_ !=
nullptr) {
504 TN_TOP2, dict_ratio, cert_offset, worst_dict_cert, step);
507 RecodeBeam* prev = beam_[t - 1];
510 for (
int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
512 ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
513 tprintf(
"Step %d: Dawg beam %d:\n", t, i);
514 DebugPath(charset, path);
517 for (
int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
519 ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
520 tprintf(
"Step %d: Non-Dawg beam %d:\n", t, i);
521 DebugPath(charset, path);
529 for (
int tn = 0; tn <
TN_COUNT && total_beam == 0; ++tn) {
531 for (
int index = 0; index <
kNumBeams; ++index) {
535 for (
int i = prev->beams_[index].size() - 1; i >= 0; --i) {
536 ContinueContext(&prev->beams_[index].get(i).data, index, outputs,
537 top_n, dict_ratio, cert_offset, worst_dict_cert,
541 for (
int index = 0; index <
kNumBeams; ++index) {
543 total_beam += step->beams_[index].size();
548 for (
int c = 0; c <
NC_COUNT; ++c) {
549 if (step->best_initial_dawgs_[c].code >= 0) {
550 int index =
BeamIndex(
true, static_cast<NodeContinuation>(c), 0);
552 PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
563 void RecodeBeamSearch::ContinueContext(
const RecodeNode* prev,
int index,
564 const float* outputs,
567 double worst_dict_cert,
569 RecodedCharID prefix;
570 RecodedCharID full_code;
571 const RecodeNode* previous = prev;
575 for (
int p = length - 1; p >= 0; --p, previous = previous->prev) {
576 while (previous !=
nullptr &&
577 (previous->duplicate || previous->code == null_char_)) {
578 previous = previous->prev;
580 if (previous !=
nullptr) {
581 prefix.Set(p, previous->code);
582 full_code.Set(p, previous->code);
585 if (prev !=
nullptr && !is_simple_text_) {
586 if (top_n_flags_[prev->code] == top_n_flag) {
590 PushDupOrNoDawgIfBetter(length,
true, prev->code, prev->unichar_id,
591 cert, worst_dict_cert, dict_ratio, use_dawgs,
595 prev->code != null_char_) {
597 outputs[null_char_]) +
599 PushDupOrNoDawgIfBetter(length,
true, prev->code, prev->unichar_id,
600 cert, worst_dict_cert, dict_ratio, use_dawgs,
605 if (prev->code != null_char_ && length > 0 &&
606 top_n_flags_[null_char_] == top_n_flag) {
611 PushDupOrNoDawgIfBetter(length,
false, null_char_, INVALID_UNICHAR_ID,
612 cert, worst_dict_cert, dict_ratio, use_dawgs,
617 if (final_codes !=
nullptr) {
618 for (
int i = 0; i < final_codes->
size(); ++i) {
619 int code = (*final_codes)[i];
620 if (top_n_flags_[code] != top_n_flag)
continue;
621 if (prev !=
nullptr && prev->code == code && !is_simple_text_)
continue;
624 full_code.Set(length, code);
627 if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID;
628 ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
630 if (top_n_flag ==
TN_TOP2 && code != null_char_) {
631 float prob = outputs[code] + outputs[null_char_];
633 prev->code != null_char_ &&
634 ((prev->code == top_code_ && code == second_code_) ||
635 (code == top_code_ && prev->code == second_code_))) {
636 prob += outputs[prev->code];
639 ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
645 if (next_codes !=
nullptr) {
646 for (
int i = 0; i < next_codes->
size(); ++i) {
647 int code = (*next_codes)[i];
648 if (top_n_flags_[code] != top_n_flag)
continue;
649 if (prev !=
nullptr && prev->code == code && !is_simple_text_)
continue;
651 PushDupOrNoDawgIfBetter(length + 1,
false, code, INVALID_UNICHAR_ID, cert,
652 worst_dict_cert, dict_ratio, use_dawgs,
654 if (top_n_flag ==
TN_TOP2 && code != null_char_) {
655 float prob = outputs[code] + outputs[null_char_];
657 prev->code != null_char_ &&
658 ((prev->code == top_code_ && code == second_code_) ||
659 (code == top_code_ && prev->code == second_code_))) {
660 prob += outputs[prev->code];
663 PushDupOrNoDawgIfBetter(length + 1,
false, code, INVALID_UNICHAR_ID,
664 cert, worst_dict_cert, dict_ratio, use_dawgs,
672 void RecodeBeamSearch::ContinueUnichar(
int code,
int unichar_id,
float cert,
673 float worst_dict_cert,
float dict_ratio,
675 const RecodeNode* prev,
678 if (cert > worst_dict_cert) {
679 ContinueDawg(code, unichar_id, cert, cont, prev, step);
683 PushHeapIfBetter(kBeamWidths[0], code, unichar_id,
TOP_CHOICE_PERM,
false,
684 false,
false,
false, cert * dict_ratio, prev,
nullptr,
686 if (dict_ !=
nullptr &&
692 float dawg_cert = cert;
706 dawg_cert *= dict_ratio;
707 PushInitialDawgIfBetter(code, unichar_id, permuter,
false,
false,
708 dawg_cert, cont, prev, step);
716 void RecodeBeamSearch::ContinueDawg(
int code,
int unichar_id,
float cert,
718 const RecodeNode* prev, RecodeBeam* step) {
721 if (unichar_id == INVALID_UNICHAR_ID) {
722 PushHeapIfBetter(kBeamWidths[0], code, unichar_id,
NO_PERM,
false,
false,
723 false,
false, cert, prev,
nullptr, dawg_heap);
728 if (prev !=
nullptr) score += prev->score;
729 if (dawg_heap->size() >= kBeamWidths[0] &&
730 score <= dawg_heap->PeekTop().data.score &&
731 nodawg_heap->size() >= kBeamWidths[0] &&
732 score <= nodawg_heap->PeekTop().data.score) {
735 const RecodeNode* uni_prev = prev;
738 while (uni_prev !=
nullptr &&
739 (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate))
740 uni_prev = uni_prev->prev;
742 if (uni_prev !=
nullptr && uni_prev->end_of_word) {
745 PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter,
false,
746 false, cert, cont, prev, step);
747 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter,
748 false,
false,
false,
false, cert, prev,
nullptr,
752 }
else if (uni_prev !=
nullptr && uni_prev->start_of_dawg &&
758 DawgPositionVector initial_dawgs;
759 DawgPositionVector* updated_dawgs =
new DawgPositionVector;
760 DawgArgs dawg_args(&initial_dawgs, updated_dawgs,
NO_PERM);
761 bool word_start =
false;
762 if (uni_prev ==
nullptr) {
766 }
else if (uni_prev->dawgs !=
nullptr) {
768 dawg_args.active_dawgs = uni_prev->dawgs;
769 word_start = uni_prev->start_of_dawg;
777 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter,
false,
778 word_start, dawg_args.valid_end,
false, cert, prev,
779 dawg_args.updated_dawgs, dawg_heap);
780 if (dawg_args.valid_end && !space_delimited_) {
784 PushInitialDawgIfBetter(code, unichar_id, permuter, word_start,
true,
785 cert, cont, prev, step);
786 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter,
false,
787 word_start,
true,
false, cert, prev,
nullptr, nodawg_heap);
790 delete updated_dawgs;
797 void RecodeBeamSearch::PushInitialDawgIfBetter(
int code,
int unichar_id,
799 bool start,
bool end,
float cert,
801 const RecodeNode* prev,
803 RecodeNode* best_initial_dawg = &step->best_initial_dawgs_[cont];
805 if (prev !=
nullptr) score += prev->score;
806 if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
807 DawgPositionVector* initial_dawgs =
new DawgPositionVector;
809 RecodeNode node(code, unichar_id, permuter,
true, start, end,
false, cert,
810 score, prev, initial_dawgs,
811 ComputeCodeHash(code,
false, prev));
812 *best_initial_dawg = node;
820 void RecodeBeamSearch::PushDupOrNoDawgIfBetter(
821 int length,
bool dup,
int code,
int unichar_id,
float cert,
822 float worst_dict_cert,
float dict_ratio,
bool use_dawgs,
824 int index =
BeamIndex(use_dawgs, cont, length);
826 if (cert > worst_dict_cert) {
827 PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
828 prev ? prev->permuter :
NO_PERM,
false,
false,
false,
829 dup, cert, prev,
nullptr, &step->beams_[index]);
834 PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
836 false, dup, cert, prev,
nullptr, &step->beams_[index]);
844 void RecodeBeamSearch::PushHeapIfBetter(
int max_size,
int code,
int unichar_id,
846 bool word_start,
bool end,
bool dup,
847 float cert,
const RecodeNode* prev,
848 DawgPositionVector* d,
851 if (prev !=
nullptr) score += prev->score;
852 if (heap->size() < max_size || score > heap->PeekTop().data.score) {
853 uint64_t hash = ComputeCodeHash(code, dup, prev);
854 RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
855 dup, cert, score, prev, d, hash);
856 if (UpdateHeapIfMatched(&node, heap))
return;
860 if (heap->size() > max_size) heap->Pop(&entry);
868 void RecodeBeamSearch::PushHeapIfBetter(
int max_size, RecodeNode* node,
870 if (heap->size() < max_size || node->score > heap->PeekTop().data.score) {
871 if (UpdateHeapIfMatched(node, heap)) {
877 if (heap->size() > max_size) heap->Pop(&entry);
883 bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
889 for (
int i = 0; i < nodes->
size(); ++i) {
890 RecodeNode& node = (*nodes)[i].data;
891 if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
892 node.permuter == new_node->permuter &&
893 node.start_of_dawg == new_node->start_of_dawg) {
894 if (new_node->score > node.score) {
898 (*nodes)[i].key = node.score;
899 heap->Reshuffle(&(*nodes)[i]);
908 uint64_t RecodeBeamSearch::ComputeCodeHash(
int code,
bool dup,
909 const RecodeNode* prev)
const {
910 uint64_t hash = prev ==
nullptr ? 0 : prev->code_hash;
911 if (!dup && code != null_char_) {
913 uint64_t carry = (((hash >> 32) * num_classes) >> 32);
925 void RecodeBeamSearch::ExtractBestPaths(
929 const RecodeNode* best_node =
nullptr;
930 const RecodeNode* second_best_node =
nullptr;
931 const RecodeBeam* last_beam = beam_[beam_size_ - 1];
932 for (
int c = 0; c <
NC_COUNT; ++c) {
935 for (
int is_dawg = 0; is_dawg < 2; ++is_dawg) {
936 int beam_index =
BeamIndex(is_dawg, cont, 0);
937 int heap_size = last_beam->beams_[beam_index].size();
938 for (
int h = 0; h < heap_size; ++h) {
939 const RecodeNode* node = &last_beam->beams_[beam_index].get(h).data;
943 const RecodeNode* dawg_node = node;
944 while (dawg_node !=
nullptr &&
945 (dawg_node->unichar_id == INVALID_UNICHAR_ID ||
946 dawg_node->duplicate))
947 dawg_node = dawg_node->prev;
948 if (dawg_node ==
nullptr || (!dawg_node->end_of_word &&
954 if (best_node ==
nullptr || node->score > best_node->score) {
955 second_best_node = best_node;
957 }
else if (second_best_node ==
nullptr ||
958 node->score > second_best_node->score) {
959 second_best_node = node;
964 if (second_nodes !=
nullptr) ExtractPath(second_best_node, second_nodes);
965 ExtractPath(best_node, best_nodes);
970 void RecodeBeamSearch::ExtractPath(
973 while (node !=
nullptr) {
981 void RecodeBeamSearch::DebugPath(
984 for (
int c = 0; c < path.
size(); ++c) {
985 const RecodeNode& node = *path[c];
987 node.Print(null_char_, *unicharset, 1);
992 void RecodeBeamSearch::DebugUnicharPath(
997 int num_ids = unichar_ids.
size();
998 double total_rating = 0.0;
999 for (
int c = 0; c < num_ids; ++c) {
1000 int coord = xcoords[c];
1001 tprintf(
"%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
1003 certs[c], path[coord]->start_of_word, path[coord]->end_of_word,
1004 path[coord]->permuter);
1005 total_rating += ratings[c];
1007 tprintf(
"Path total rating = %g\n", total_rating);
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
static const int kMaxCodeLen
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
const Pair & get(int index) const
int DecodeUnichar(const RecodedCharID &code) const
void FakeWordFromRatings(PermuterType permuter)
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
GenericHeap< RecodePair > RecodeHeap
static int LengthFromBeamsIndex(int index)
const char * string() const
void set_matrix_cell(int col, int row)
static NodeContinuation ContinuationFromBeamsIndex(int index)
void scale(const float f)
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
void Print(int null_char, const UNICHARSET &unicharset, int depth) const
std::vector< std::vector< std::pair< const char *, float > > > timesteps
void DebugBeams(const UNICHARSET &unicharset) const
static bool IsDawgFromBeamsIndex(int index)
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
void init_to_size(int size, const T &t)
STRING debug_str(UNICHAR_ID id) const
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
KDPairInc< double, RecodeNode > RecodePair
DLLSYM void tprintf(const char *format,...)
static float ProbToCertainty(float prob)
void put(ICOORD pos, const T &thing)
int def_letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
const char * id_to_unichar_ext(UNICHAR_ID id) const
RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
static const float kMinCertainty
const UNICHARSET * uch_set
void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
const UNICHARSET & getUnicharset() const
std::vector< std::vector< std::pair< const char *, float > > > timesteps
static const int kNumBeams
void default_dawgs(DawgPositionVector *anylength_dawgs, bool suppress_patterns) const
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
const char * kNodeContNames[]
static C_BLOB * FakeBlob(const TBOX &box)