tessapi/4.0.0/a01019_source.html

 // File:        recodebeam.cpp
 // Description: Beam search to decode from the re-encoded CJK as a sequence of
 //              smaller numbers in place of a single large code.
 // Author:      Ray Smith
 // Created:     Fri Mar 13 09:39:01 PDT 2015
 //
 // (C) Copyright 2015, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //

 #include "recodebeam.h"
 #include "networkio.h"
 #include "pageres.h"
 #include "unicharcompress.h"
 #include <deque>
 #include <map>
 #include <set>
 #include <vector>

 #include <algorithm>

 namespace tesseract {

 // Clipping value for certainty inside Tesseract. Reflects the minimum value
 // of certainty that will be returned by ExtractBestPathAsUnicharIds.
 // Supposedly on a uniform scale that can be compared across languages and
 // engines.
 const float RecodeBeamSearch::kMinCertainty = -20.0f;

 // The beam width at each code position.
 const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
     5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
 };

 const char* kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};

 // Prints debug details of the node.
 void RecodeNode::Print(int null_char, const UNICHARSET& unicharset,
                        int depth) const {
   if (code == null_char) {
     tprintf("null_char");
   } else {
     tprintf("label=%d, uid=%d=%s", code, unichar_id,
             unicharset.debug_str(unichar_id).string());
   }
   tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%lx", score, certainty,
           start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "",
           end_of_word ? " End" : "", permuter, code_hash);
   if (depth > 0 && prev != nullptr) {
     tprintf(" prev:");
     prev->Print(null_char, unicharset, depth - 1);
   } else {
     tprintf("\n");
   }
 }

 // Borrows the pointer, which is expected to survive until *this is deleted.
 RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress& recoder,
                                    int null_char, bool simple_text, Dict* dict)
     : recoder_(recoder),
       beam_size_(0),
       top_code_(-1),
       second_code_(-1),
       dict_(dict),
       space_delimited_(true),
       is_simple_text_(simple_text),
       null_char_(null_char) {
   if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false;
 }

 // Decodes the set of network outputs, storing the lattice internally.
 void RecodeBeamSearch::Decode(const NetworkIO& output, double dict_ratio,
                               double cert_offset, double worst_dict_cert,
                               const UNICHARSET* charset, int lstm_choice_mode) {
   beam_size_ = 0;
   int width = output.Width();
   if (lstm_choice_mode)
     timesteps.clear();
   for (int t = 0; t < width; ++t) {
     ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
     DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
                charset);
     if (lstm_choice_mode) {
       SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
     }
   }
 }
 void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float>& output,
                               double dict_ratio, double cert_offset,
                               double worst_dict_cert,
                               const UNICHARSET* charset) {
   beam_size_ = 0;
   int width = output.dim1();
   for (int t = 0; t < width; ++t) {
     ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
     DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
   }
 }

 void RecodeBeamSearch::SaveMostCertainChoices(const float* outputs,
                                              int num_outputs,
                                              const UNICHARSET* charset,
                                              int xCoord) {
   std::vector<std::pair<const char*, float>> choices;
   int pos = 0;
   for (int i = 0; i < num_outputs; ++i) {
     if (outputs[i] >= 0.01f) {
       const char* character;
       if (i + 2 >= num_outputs) {
         character = "";
       } else if (i > 0) {
         character = charset->id_to_unichar_ext(i + 2);
       } else {
         character = charset->id_to_unichar_ext(i);
       }
       pos = 0;
       //order the possible choices within one timestep
       //beginning with the most likely
       while (choices.size() > pos && choices[pos].second > outputs[i]) {
         pos++;
       }
       choices.insert(choices.begin() + pos,
                     std::pair<const char*, float>(character, outputs[i]));
     }
   }
   timesteps.push_back(choices);
 }

 // Returns the best path as labels/scores/xcoords similar to simple CTC.
 void RecodeBeamSearch::ExtractBestPathAsLabels(
     GenericVector<int>* labels, GenericVector<int>* xcoords) const {
   labels->truncate(0);
   xcoords->truncate(0);
   GenericVector<const RecodeNode*> best_nodes;
   ExtractBestPaths(&best_nodes, nullptr);
   // Now just run CTC on the best nodes.
   int t = 0;
   int width = best_nodes.size();
   while (t < width) {
     int label = best_nodes[t]->code;
     if (label != null_char_) {
       labels->push_back(label);
       xcoords->push_back(t);
     }
     while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
     }
   }
   xcoords->push_back(width);
 }

 // Returns the best path as unichar-ids/certs/ratings/xcoords skipping
 // duplicates, nulls and intermediate parts.
 void RecodeBeamSearch::ExtractBestPathAsUnicharIds(
     bool debug, const UNICHARSET* unicharset, GenericVector<int>* unichar_ids,
     GenericVector<float>* certs, GenericVector<float>* ratings,
     GenericVector<int>* xcoords) const {
   GenericVector<const RecodeNode*> best_nodes;
   ExtractBestPaths(&best_nodes, nullptr);
   ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
   if (debug) {
     DebugPath(unicharset, best_nodes);
     DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
                      *xcoords);
   }
 }

 // Returns the best path as a set of WERD_RES.
 void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
                                               float scale_factor, bool debug,
                                               const UNICHARSET* unicharset,
                                               PointerVector<WERD_RES>* words,
                                               int lstm_choice_mode) {
   words->truncate(0);
   GenericVector<int> unichar_ids;
   GenericVector<float> certs;
   GenericVector<float> ratings;
   GenericVector<int> xcoords;
   GenericVector<const RecodeNode*> best_nodes;
   GenericVector<const RecodeNode*> second_nodes;
   std::deque<std::pair<int,int>> best_choices;
   ExtractBestPaths(&best_nodes, &second_nodes);
   if (debug) {
     DebugPath(unicharset, best_nodes);
     ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
                             &xcoords);
     tprintf("\nSecond choice path:\n");
     DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
                      xcoords);
   }
   int current_char;
   int timestepEnd = 0;
   //if lstm choice mode is required in granularity level 2 it stores the x
   //Coordinates of every chosen character to match the alternative choices to it
   if (lstm_choice_mode == 2) {
     ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
                             &xcoords, &best_choices);
     if (best_choices.size() > 0) {
       current_char = best_choices.front().first;
       timestepEnd = best_choices.front().second;
       best_choices.pop_front();
     }
   } else {
     ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
                             &xcoords);
   }
   int num_ids = unichar_ids.size();
   if (debug) {
     DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
                      xcoords);
   }
   // Convert labels to unichar-ids.
   int word_end = 0;
   float prev_space_cert = 0.0f;
   for (int word_start = 0; word_start < num_ids; word_start = word_end) {
     for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
       // A word is terminated when a space character or start_of_word flag is
       // hit. We also want to force a separate word for every non
       // space-delimited character when not in a dictionary context.
       if (unichar_ids[word_end] == UNICHAR_SPACE) break;
       int index = xcoords[word_end];
       if (best_nodes[index]->start_of_word) break;
       if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
           (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
            !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1])))
         break;
     }
     float space_cert = 0.0f;
     if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
       space_cert = certs[word_end];
     bool leading_space =
         word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
     // Create a WERD_RES for the output word.
     WERD_RES* word_res = InitializeWord(
         leading_space, line_box, word_start, word_end,
         std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
     if (lstm_choice_mode == 1) {
       for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
         word_res->timesteps.push_back(timesteps[i]);
       }
       timestepEnd = xcoords[word_end];
     } else if (lstm_choice_mode == 2) {
       float sum = 0;
       std::vector<std::pair<const char*, float>> choice_pairs;
       for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
         for (std::pair<const char*, float> choice : timesteps[i]) {
           if (std::strcmp(choice.first, "") != 0) {
             sum += choice.second;
             choice_pairs.push_back(choice);
           }
         }
         if ((best_choices.size() > 0 && i == best_choices.front().second - 1)
             || i == xcoords[word_end]-1) {
           std::map<const char*, float> summed_propabilities;
           for (auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) {
             summed_propabilities[it->first] += it->second;
           }
           std::vector<std::pair<const char*, float>> accumulated_timestep;
           accumulated_timestep.push_back(std::pair<const char*,float>
                                         (unicharset->id_to_unichar_ext
                                         (current_char), 2.0));
           int pos;
           for (auto it = summed_propabilities.begin();
                it != summed_propabilities.end(); ++it) {
             if(sum == 0) break;
             it->second/=sum;
             pos = 0;
             while (accumulated_timestep.size() > pos
                    && accumulated_timestep[pos].second > it->second) {
               pos++;
             }
             accumulated_timestep.insert(accumulated_timestep.begin() + pos,
                                         std::pair<const char*,float>(it->first,
                                         it->second));
           }
           if (best_choices.size() > 0) {
             current_char = best_choices.front().first;
             best_choices.pop_front();
           }
           choice_pairs.clear();
           word_res->timesteps.push_back(accumulated_timestep);
           sum = 0;
         }
       }
       timestepEnd = xcoords[word_end];
     }
     for (int i = word_start; i < word_end; ++i) {
       BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
       BLOB_CHOICE_IT bc_it(choices);
       BLOB_CHOICE* choice = new BLOB_CHOICE(
           unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
           static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
       int col = i - word_start;
       choice->set_matrix_cell(col, col);
       bc_it.add_after_then_move(choice);
       word_res->ratings->put(col, col, choices);
     }
     int index = xcoords[word_end - 1];
     word_res->FakeWordFromRatings(best_nodes[index]->permuter);
     words->push_back(word_res);
     prev_space_cert = space_cert;
     if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
       ++word_end;
   }
 }

 // Generates debug output of the content of the beams after a Decode.
 void RecodeBeamSearch::DebugBeams(const UNICHARSET& unicharset) const {
   for (int p = 0; p < beam_size_; ++p) {
     for (int d = 0; d < 2; ++d) {
       for (int c = 0; c < NC_COUNT; ++c) {
         NodeContinuation cont = static_cast<NodeContinuation>(c);
         int index = BeamIndex(d, cont, 0);
         if (beam_[p]->beams_[index].empty()) continue;
         // Print all the best scoring nodes for each unichar found.
         tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
                 kNodeContNames[c]);
         DebugBeamPos(unicharset, beam_[p]->beams_[index]);
       }
     }
   }
 }

 // Generates debug output of the content of a single beam position.
 void RecodeBeamSearch::DebugBeamPos(const UNICHARSET& unicharset,
                                     const RecodeHeap& heap) const {
   GenericVector<const RecodeNode*> unichar_bests;
   unichar_bests.init_to_size(unicharset.size(), nullptr);
   const RecodeNode* null_best = nullptr;
   int heap_size = heap.size();
   for (int i = 0; i < heap_size; ++i) {
     const RecodeNode* node = &heap.get(i).data;
     if (node->unichar_id == INVALID_UNICHAR_ID) {
       if (null_best == nullptr || null_best->score < node->score) null_best = node;
     } else {
       if (unichar_bests[node->unichar_id] == nullptr ||
           unichar_bests[node->unichar_id]->score < node->score) {
         unichar_bests[node->unichar_id] = node;
       }
     }
   }
   for (int u = 0; u < unichar_bests.size(); ++u) {
     if (unichar_bests[u] != nullptr) {
       const RecodeNode& node = *unichar_bests[u];
       node.Print(null_char_, unicharset, 1);
     }
   }
   if (null_best != nullptr) {
     null_best->Print(null_char_, unicharset, 1);
   }
 }

 // Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
 // duplicates, nulls and intermediate parts.
 /* static */
 void RecodeBeamSearch::ExtractPathAsUnicharIds(
     const GenericVector<const RecodeNode*>& best_nodes,
     GenericVector<int>* unichar_ids, GenericVector<float>* certs,
     GenericVector<float>* ratings, GenericVector<int>* xcoords,
     std::deque<std::pair<int, int>>* best_choices) {
   unichar_ids->truncate(0);
   certs->truncate(0);
   ratings->truncate(0);
   xcoords->truncate(0);
   // Backtrack extracting only valid, non-duplicate unichar-ids.
   int t = 0;
   int width = best_nodes.size();
   while (t < width) {
     double certainty = 0.0;
     double rating = 0.0;
     while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
       double cert = best_nodes[t++]->certainty;
       if (cert < certainty) certainty = cert;
       rating -= cert;
     }
     if (t < width) {
       int unichar_id = best_nodes[t]->unichar_id;
       if (unichar_id == UNICHAR_SPACE && !certs->empty() &&
           best_nodes[t]->permuter != NO_PERM) {
         // All the rating and certainty go on the previous character except
         // for the space itself.
         if (certainty < certs->back()) certs->back() = certainty;
         ratings->back() += rating;
         certainty = 0.0;
         rating = 0.0;
       }
       unichar_ids->push_back(unichar_id);
       xcoords->push_back(t);
       if (best_choices != nullptr) {
         best_choices->push_back(std::pair<int, int>(unichar_id, t));
       }
       do {
         double cert = best_nodes[t++]->certainty;
         // Special-case NO-PERM space to forget the certainty of the previous
         // nulls. See long comment in ContinueContext.
         if (cert < certainty || (unichar_id == UNICHAR_SPACE &&
                                  best_nodes[t - 1]->permuter == NO_PERM)) {
           certainty = cert;
         }
         rating -= cert;
       } while (t < width && best_nodes[t]->duplicate);
       certs->push_back(certainty);
       ratings->push_back(rating);
     } else if (!certs->empty()) {
       if (certainty < certs->back()) certs->back() = certainty;
       ratings->back() += rating;
     }
   }
   xcoords->push_back(width);
 }

 // Sets up a word with the ratings matrix and fake blobs with boxes in the
 // right places.
 WERD_RES* RecodeBeamSearch::InitializeWord(bool leading_space,
                                            const TBOX& line_box, int word_start,
                                            int word_end, float space_certainty,
                                            const UNICHARSET* unicharset,
                                            const GenericVector<int>& xcoords,
                                            float scale_factor) {
   // Make a fake blob for each non-zero label.
   C_BLOB_LIST blobs;
   C_BLOB_IT b_it(&blobs);
   for (int i = word_start; i < word_end; ++i) {
     int min_half_width = xcoords[i + 1] - xcoords[i];
     if (i > 0 && xcoords[i] - xcoords[i - 1] < min_half_width)
       min_half_width = xcoords[i] - xcoords[i - 1];
     if (min_half_width < 1) min_half_width = 1;
     // Make a fake blob.
     TBOX box(xcoords[i] - min_half_width, 0, xcoords[i] + min_half_width,
              line_box.height());
     box.scale(scale_factor);
     box.move(ICOORD(line_box.left(), line_box.bottom()));
     box.set_top(line_box.top());
     b_it.add_after_then_move(C_BLOB::FakeBlob(box));
   }
   // Make a fake word from the blobs.
   WERD* word = new WERD(&blobs, leading_space, nullptr);
   // Make a WERD_RES from the word.
   WERD_RES* word_res = new WERD_RES(word);
   word_res->uch_set = unicharset;
   word_res->combination = true;  // Give it ownership of the word.
   word_res->space_certainty = space_certainty;
   word_res->ratings = new MATRIX(word_end - word_start, 1);
   return word_res;
 }

 // Fills top_n_flags_ with bools that are true iff the corresponding output
 // is one of the top_n.
 void RecodeBeamSearch::ComputeTopN(const float* outputs, int num_outputs,
                                    int top_n) {
   top_n_flags_.init_to_size(num_outputs, TN_ALSO_RAN);
   top_code_ = -1;
   second_code_ = -1;
   top_heap_.clear();
   for (int i = 0; i < num_outputs; ++i) {
     if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key) {
       TopPair entry(outputs[i], i);
       top_heap_.Push(&entry);
       if (top_heap_.size() > top_n) top_heap_.Pop(&entry);
     }
   }
   while (!top_heap_.empty()) {
     TopPair entry;
     top_heap_.Pop(&entry);
     if (top_heap_.size() > 1) {
       top_n_flags_[entry.data] = TN_TOPN;
     } else {
       top_n_flags_[entry.data] = TN_TOP2;
       if (top_heap_.empty())
         top_code_ = entry.data;
       else
         second_code_ = entry.data;
     }
   }
   top_n_flags_[null_char_] = TN_TOP2;
 }

 // Adds the computation for the current time-step to the beam. Call at each
 // time-step in sequence from left to right. outputs is the activation vector
 // for the current timestep.
 void RecodeBeamSearch::DecodeStep(const float* outputs, int t,
                                   double dict_ratio, double cert_offset,
                                   double worst_dict_cert,
                                   const UNICHARSET* charset, bool debug) {
   if (t == beam_.size()) beam_.push_back(new RecodeBeam);
   RecodeBeam* step = beam_[t];
   beam_size_ = t + 1;
   step->Clear();
   if (t == 0) {
     // The first step can only use singles and initials.
     ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2,
                     dict_ratio, cert_offset, worst_dict_cert, step);
     if (dict_ != nullptr) {
       ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs,
                       TN_TOP2, dict_ratio, cert_offset, worst_dict_cert, step);
     }
   } else {
     RecodeBeam* prev = beam_[t - 1];
     if (debug) {
       int beam_index = BeamIndex(true, NC_ANYTHING, 0);
       for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
         GenericVector<const RecodeNode*> path;
         ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
         tprintf("Step %d: Dawg beam %d:\n", t, i);
         DebugPath(charset, path);
       }
       beam_index = BeamIndex(false, NC_ANYTHING, 0);
       for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
         GenericVector<const RecodeNode*> path;
         ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
         tprintf("Step %d: Non-Dawg beam %d:\n", t, i);
         DebugPath(charset, path);
       }
     }
     int total_beam = 0;
     // Work through the scores by group (top-2, top-n, the rest) while the beam
     // is empty. This enables extending the context using only the top-n results
     // first, which may have an empty intersection with the valid codes, so we
     // fall back to the rest if the beam is empty.
     for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) {
       TopNState top_n = static_cast<TopNState>(tn);
       for (int index = 0; index < kNumBeams; ++index) {
         // Working backwards through the heaps doesn't guarantee that we see the
         // best first, but it comes before a lot of the worst, so it is slightly
         // more efficient than going forwards.
         for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
           ContinueContext(&prev->beams_[index].get(i).data, index, outputs,
                           top_n, dict_ratio, cert_offset, worst_dict_cert,
                           step);
         }
       }
       for (int index = 0; index < kNumBeams; ++index) {
         if (ContinuationFromBeamsIndex(index) == NC_ANYTHING)
           total_beam += step->beams_[index].size();
       }
     }
     // Special case for the best initial dawg. Push it on the heap if good
     // enough, but there is only one, so it doesn't blow up the beam.
     for (int c = 0; c < NC_COUNT; ++c) {
       if (step->best_initial_dawgs_[c].code >= 0) {
         int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
         RecodeHeap* dawg_heap = &step->beams_[index];
         PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
                          dawg_heap);
       }
     }
   }
 }

 // Adds to the appropriate beams the legal (according to recoder)
 // continuations of context prev, which is of the given length, using the
 // given network outputs to provide scores to the choices. Uses only those
 // choices for which top_n_flags[index] == top_n_flag.
 void RecodeBeamSearch::ContinueContext(const RecodeNode* prev, int index,
                                        const float* outputs,
                                        TopNState top_n_flag, double dict_ratio,
                                        double cert_offset,
                                        double worst_dict_cert,
                                        RecodeBeam* step) {
   RecodedCharID prefix;
   RecodedCharID full_code;
   const RecodeNode* previous = prev;
   int length = LengthFromBeamsIndex(index);
   bool use_dawgs = IsDawgFromBeamsIndex(index);
   NodeContinuation prev_cont = ContinuationFromBeamsIndex(index);
   for (int p = length - 1; p >= 0; --p, previous = previous->prev) {
     while (previous != nullptr &&
            (previous->duplicate || previous->code == null_char_)) {
       previous = previous->prev;
     }
     if (previous != nullptr) {
       prefix.Set(p, previous->code);
       full_code.Set(p, previous->code);
     }
   }
   if (prev != nullptr && !is_simple_text_) {
     if (top_n_flags_[prev->code] == top_n_flag) {
       if (prev_cont != NC_NO_DUP) {
         float cert =
             NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
         PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
                                 cert, worst_dict_cert, dict_ratio, use_dawgs,
                                 NC_ANYTHING, prev, step);
       }
       if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 &&
           prev->code != null_char_) {
         float cert = NetworkIO::ProbToCertainty(outputs[prev->code] +
                                                 outputs[null_char_]) +
                      cert_offset;
         PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
                                 cert, worst_dict_cert, dict_ratio, use_dawgs,
                                 NC_NO_DUP, prev, step);
       }
     }
     if (prev_cont == NC_ONLY_DUP) return;
     if (prev->code != null_char_ && length > 0 &&
         top_n_flags_[null_char_] == top_n_flag) {
       // Allow nulls within multi code sequences, as the nulls within are not
       // explicitly included in the code sequence.
       float cert =
           NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
       PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID,
                               cert, worst_dict_cert, dict_ratio, use_dawgs,
                               NC_ANYTHING, prev, step);
     }
   }
   const GenericVector<int>* final_codes = recoder_.GetFinalCodes(prefix);
   if (final_codes != nullptr) {
     for (int i = 0; i < final_codes->size(); ++i) {
       int code = (*final_codes)[i];
       if (top_n_flags_[code] != top_n_flag) continue;
       if (prev != nullptr && prev->code == code && !is_simple_text_) continue;
       float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
       if (cert < kMinCertainty && code != null_char_) continue;
       full_code.Set(length, code);
       int unichar_id = recoder_.DecodeUnichar(full_code);
       // Map the null char to INVALID.
       if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID;
       ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
                       use_dawgs, NC_ANYTHING, prev, step);
       if (top_n_flag == TN_TOP2 && code != null_char_) {
         float prob = outputs[code] + outputs[null_char_];
         if (prev != nullptr && prev_cont == NC_ANYTHING &&
             prev->code != null_char_ &&
             ((prev->code == top_code_ && code == second_code_) ||
              (code == top_code_ && prev->code == second_code_))) {
           prob += outputs[prev->code];
         }
         float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
         ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
                         use_dawgs, NC_ONLY_DUP, prev, step);
       }
     }
   }
   const GenericVector<int>* next_codes = recoder_.GetNextCodes(prefix);
   if (next_codes != nullptr) {
     for (int i = 0; i < next_codes->size(); ++i) {
       int code = (*next_codes)[i];
       if (top_n_flags_[code] != top_n_flag) continue;
       if (prev != nullptr && prev->code == code && !is_simple_text_) continue;
       float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
       PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert,
                               worst_dict_cert, dict_ratio, use_dawgs,
                               NC_ANYTHING, prev, step);
       if (top_n_flag == TN_TOP2 && code != null_char_) {
         float prob = outputs[code] + outputs[null_char_];
         if (prev != nullptr && prev_cont == NC_ANYTHING &&
             prev->code != null_char_ &&
             ((prev->code == top_code_ && code == second_code_) ||
              (code == top_code_ && prev->code == second_code_))) {
           prob += outputs[prev->code];
         }
         float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
         PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID,
                                 cert, worst_dict_cert, dict_ratio, use_dawgs,
                                 NC_ONLY_DUP, prev, step);
       }
     }
   }
 }

 // Continues for a new unichar, using dawg or non-dawg as per flag.
 void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert,
                                        float worst_dict_cert, float dict_ratio,
                                        bool use_dawgs, NodeContinuation cont,
                                        const RecodeNode* prev,
                                        RecodeBeam* step) {
   if (use_dawgs) {
     if (cert > worst_dict_cert) {
       ContinueDawg(code, unichar_id, cert, cont, prev, step);
     }
   } else {
     RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
     PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false,
                      false, false, false, cert * dict_ratio, prev, nullptr,
                      nodawg_heap);
     if (dict_ != nullptr &&
         ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
          !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
       // Any top choice position that can start a new word, ie a space or
       // any non-space-delimited character, should also be considered
       // by the dawg search, so push initial dawg to the dawg heap.
       float dawg_cert = cert;
       PermuterType permuter = TOP_CHOICE_PERM;
       // Since we use the space either side of a dictionary word in the
       // certainty of the word, (to properly handle weak spaces) and the
       // space is coming from a non-dict word, we need special conditions
       // to avoid degrading the certainty of the dict word that follows.
       // With a space we don't multiply the certainty by dict_ratio, and we
       // flag the space with NO_PERM to indicate that we should not use the
       // predecessor nulls to generate the confidence for the space, as they
       // have already been multiplied by dict_ratio, and we can't go back to
       // insert more entries in any previous heaps.
       if (unichar_id == UNICHAR_SPACE)
         permuter = NO_PERM;
       else
         dawg_cert *= dict_ratio;
       PushInitialDawgIfBetter(code, unichar_id, permuter, false, false,
                               dawg_cert, cont, prev, step);
     }
   }
 }

 // Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev,
 // appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id
 // is a valid continuation of whatever is in prev.
 void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
                                     NodeContinuation cont,
                                     const RecodeNode* prev, RecodeBeam* step) {
   RecodeHeap* dawg_heap = &step->beams_[BeamIndex(true, cont, 0)];
   RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
   if (unichar_id == INVALID_UNICHAR_ID) {
     PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false,
                      false, false, cert, prev, nullptr, dawg_heap);
     return;
   }
   // Avoid dictionary probe if score a total loss.
   float score = cert;
   if (prev != nullptr) score += prev->score;
   if (dawg_heap->size() >= kBeamWidths[0] &&
       score <= dawg_heap->PeekTop().data.score &&
       nodawg_heap->size() >= kBeamWidths[0] &&
       score <= nodawg_heap->PeekTop().data.score) {
     return;
   }
   const RecodeNode* uni_prev = prev;
   // Prev may be a partial code, null_char, or duplicate, so scan back to the
   // last valid unichar_id.
   while (uni_prev != nullptr &&
          (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate))
     uni_prev = uni_prev->prev;
   if (unichar_id == UNICHAR_SPACE) {
     if (uni_prev != nullptr && uni_prev->end_of_word) {
       // Space is good. Push initial state, to the dawg beam and a regular
       // space to the top choice beam.
       PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false,
                               false, cert, cont, prev, step);
       PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter,
                        false, false, false, false, cert, prev, nullptr,
                        nodawg_heap);
     }
     return;
   } else if (uni_prev != nullptr && uni_prev->start_of_dawg &&
              uni_prev->unichar_id != UNICHAR_SPACE &&
              dict_->getUnicharset().IsSpaceDelimited(uni_prev->unichar_id) &&
              dict_->getUnicharset().IsSpaceDelimited(unichar_id)) {
     return;  // Can't break words between space delimited chars.
   }
   DawgPositionVector initial_dawgs;
   DawgPositionVector* updated_dawgs = new DawgPositionVector;
   DawgArgs dawg_args(&initial_dawgs, updated_dawgs, NO_PERM);
   bool word_start = false;
   if (uni_prev == nullptr) {
     // Starting from beginning of line.
     dict_->default_dawgs(&initial_dawgs, false);
     word_start = true;
   } else if (uni_prev->dawgs != nullptr) {
     // Continuing a previous dict word.
     dawg_args.active_dawgs = uni_prev->dawgs;
     word_start = uni_prev->start_of_dawg;
   } else {
     return;  // Can't continue if not a dict word.
   }
   PermuterType permuter = static_cast<PermuterType>(
       dict_->def_letter_is_okay(&dawg_args,
                                 dict_->getUnicharset(), unichar_id, false));
   if (permuter != NO_PERM) {
     PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
                      word_start, dawg_args.valid_end, false, cert, prev,
                      dawg_args.updated_dawgs, dawg_heap);
     if (dawg_args.valid_end && !space_delimited_) {
       // We can start another word right away, so push initial state as well,
       // to the dawg beam, and the regular character to the top choice beam,
       // since non-dict words can start here too.
       PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true,
                               cert, cont, prev, step);
       PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
                        word_start, true, false, cert, prev, nullptr, nodawg_heap);
     }
   } else {
     delete updated_dawgs;
   }
 }

 // Adds a RecodeNode composed of the tuple (code, unichar_id,
 // initial-dawg-state, prev, cert) to the given heap if/ there is room or if
 // better than the current worst element if already full.
 void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
                                                PermuterType permuter,
                                                bool start, bool end, float cert,
                                                NodeContinuation cont,
                                                const RecodeNode* prev,
                                                RecodeBeam* step) {
   RecodeNode* best_initial_dawg = &step->best_initial_dawgs_[cont];
   float score = cert;
   if (prev != nullptr) score += prev->score;
   if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
     DawgPositionVector* initial_dawgs = new DawgPositionVector;
     dict_->default_dawgs(initial_dawgs, false);
     RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
                     score, prev, initial_dawgs,
                     ComputeCodeHash(code, false, prev));
     *best_initial_dawg = node;
   }
 }

 // Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
 // false, false, false, false, cert, prev, nullptr) to heap if there is room
 // or if better than the current worst element if already full.
 /* static */
 void RecodeBeamSearch::PushDupOrNoDawgIfBetter(
     int length, bool dup, int code, int unichar_id, float cert,
     float worst_dict_cert, float dict_ratio, bool use_dawgs,
     NodeContinuation cont, const RecodeNode* prev, RecodeBeam* step) {
   int index = BeamIndex(use_dawgs, cont, length);
   if (use_dawgs) {
     if (cert > worst_dict_cert) {
       PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
                        prev ? prev->permuter : NO_PERM, false, false, false,
                        dup, cert, prev, nullptr, &step->beams_[index]);
     }
   } else {
     cert *= dict_ratio;
     if (cert >= kMinCertainty || code == null_char_) {
       PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
                        prev ? prev->permuter : TOP_CHOICE_PERM, false, false,
                        false, dup, cert, prev, nullptr, &step->beams_[index]);
     }
   }
 }

 // Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
 // dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room
 // or if better than the current worst element if already full.
 void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
                                         PermuterType permuter, bool dawg_start,
                                         bool word_start, bool end, bool dup,
                                         float cert, const RecodeNode* prev,
                                         DawgPositionVector* d,
                                         RecodeHeap* heap) {
   float score = cert;
   if (prev != nullptr) score += prev->score;
   if (heap->size() < max_size || score > heap->PeekTop().data.score) {
     uint64_t hash = ComputeCodeHash(code, dup, prev);
     RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
                     dup, cert, score, prev, d, hash);
     if (UpdateHeapIfMatched(&node, heap)) return;
     RecodePair entry(score, node);
     heap->Push(&entry);
     ASSERT_HOST(entry.data.dawgs == nullptr);
     if (heap->size() > max_size) heap->Pop(&entry);
   } else {
     delete d;
   }
 }

 // Adds a RecodeNode to heap if there is room
 // or if better than the current worst element if already full.
 void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node,
                                         RecodeHeap* heap) {
   if (heap->size() < max_size || node->score > heap->PeekTop().data.score) {
     if (UpdateHeapIfMatched(node, heap)) {
       return;
     }
     RecodePair entry(node->score, *node);
     heap->Push(&entry);
     ASSERT_HOST(entry.data.dawgs == nullptr);
     if (heap->size() > max_size) heap->Pop(&entry);
   }
 }

 // Searches the heap for a matching entry, and updates the score with
 // reshuffle if needed. Returns true if there was a match.
 bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
                                            RecodeHeap* heap) {
   // TODO(rays) consider hash map instead of linear search.
   // It might not be faster because the hash map would have to be updated
   // every time a heap reshuffle happens, and that would be a lot of overhead.
   GenericVector<RecodePair>* nodes = heap->heap();
   for (int i = 0; i < nodes->size(); ++i) {
     RecodeNode& node = (*nodes)[i].data;
     if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
         node.permuter == new_node->permuter &&
         node.start_of_dawg == new_node->start_of_dawg) {
       if (new_node->score > node.score) {
         // The new one is better. Update the entire node in the heap and
         // reshuffle.
         node = *new_node;
         (*nodes)[i].key = node.score;
         heap->Reshuffle(&(*nodes)[i]);
       }
       return true;
     }
   }
   return false;
 }

 // Computes and returns the code-hash for the given code and prev.
 uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
                                            const RecodeNode* prev) const {
   uint64_t hash = prev == nullptr ? 0 : prev->code_hash;
   if (!dup && code != null_char_) {
     int num_classes = recoder_.code_range();
     uint64_t carry = (((hash >> 32) * num_classes) >> 32);
     hash *= num_classes;
     hash += carry;
     hash += code;
   }
   return hash;
 }

 // Backtracks to extract the best path through the lattice that was built
 // during Decode. On return the best_nodes vector essentially contains the set
 // of code, score pairs that make the optimal path with the constraint that
 // the recoder can decode the code sequence back to a sequence of unichar-ids.
 void RecodeBeamSearch::ExtractBestPaths(
     GenericVector<const RecodeNode*>* best_nodes,
     GenericVector<const RecodeNode*>* second_nodes) const {
   // Scan both beams to extract the best and second best paths.
   const RecodeNode* best_node = nullptr;
   const RecodeNode* second_best_node = nullptr;
   const RecodeBeam* last_beam = beam_[beam_size_ - 1];
   for (int c = 0; c < NC_COUNT; ++c) {
     if (c == NC_ONLY_DUP) continue;
     NodeContinuation cont = static_cast<NodeContinuation>(c);
     for (int is_dawg = 0; is_dawg < 2; ++is_dawg) {
       int beam_index = BeamIndex(is_dawg, cont, 0);
       int heap_size = last_beam->beams_[beam_index].size();
       for (int h = 0; h < heap_size; ++h) {
         const RecodeNode* node = &last_beam->beams_[beam_index].get(h).data;
         if (is_dawg) {
           // dawg_node may be a null_char, or duplicate, so scan back to the
           // last valid unichar_id.
           const RecodeNode* dawg_node = node;
           while (dawg_node != nullptr &&
                  (dawg_node->unichar_id == INVALID_UNICHAR_ID ||
                   dawg_node->duplicate))
             dawg_node = dawg_node->prev;
           if (dawg_node == nullptr || (!dawg_node->end_of_word &&
                                     dawg_node->unichar_id != UNICHAR_SPACE)) {
             // Dawg node is not valid.
             continue;
           }
         }
         if (best_node == nullptr || node->score > best_node->score) {
           second_best_node = best_node;
           best_node = node;
         } else if (second_best_node == nullptr ||
                    node->score > second_best_node->score) {
           second_best_node = node;
         }
       }
     }
   }
   if (second_nodes != nullptr) ExtractPath(second_best_node, second_nodes);
   ExtractPath(best_node, best_nodes);
 }

 // Helper backtracks through the lattice from the given node, storing the
 // path and reversing it.
 void RecodeBeamSearch::ExtractPath(
     const RecodeNode* node, GenericVector<const RecodeNode*>* path) const {
   path->truncate(0);
   while (node != nullptr) {
     path->push_back(node);
     node = node->prev;
   }
   path->reverse();
 }

 // Helper prints debug information on the given lattice path.
 void RecodeBeamSearch::DebugPath(
     const UNICHARSET* unicharset,
     const GenericVector<const RecodeNode*>& path) const {
   for (int c = 0; c < path.size(); ++c) {
     const RecodeNode& node = *path[c];
     tprintf("%d ", c);
     node.Print(null_char_, *unicharset, 1);
   }
 }

 // Helper prints debug information on the given unichar path.
 void RecodeBeamSearch::DebugUnicharPath(
     const UNICHARSET* unicharset, const GenericVector<const RecodeNode*>& path,
     const GenericVector<int>& unichar_ids, const GenericVector<float>& certs,
     const GenericVector<float>& ratings,
     const GenericVector<int>& xcoords) const {
   int num_ids = unichar_ids.size();
   double total_rating = 0.0;
   for (int c = 0; c < num_ids; ++c) {
     int coord = xcoords[c];
     tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
             unicharset->debug_str(unichar_ids[c]).string(), ratings[c],
             certs[c], path[coord]->start_of_word, path[coord]->end_of_word,
             path[coord]->permuter);
     total_rating += ratings[c];
   }
   tprintf("Path total rating = %g\n", total_rating);
 }

 }  // namespace tesseract.
tesseract::RecodeNode::unichar_id
int unichar_id
Definition: recodebeam.h:143

tesseract::RecodeBeamSearch::ExtractBestPathAsLabels
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:140

character
Definition: mfoutline.h:55

WERD_RES::space_certainty
float space_certainty
Definition: pageres.h:316

tesseract::RecodedCharID::kMaxCodeLen
static const int kMaxCodeLen
Definition: unicharcompress.h:37

tesseract::RecodeBeamSearch::BeamIndex
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:237

GenericVector::size
int size() const
Definition: genericvector.h:71

tesseract::GenericHeap::get
const Pair & get(int index) const
Definition: genericheap.h:87

tesseract::UnicharCompress::DecodeUnichar
int DecodeUnichar(const RecodedCharID &code) const
Definition: unicharcompress.cpp:293

WERD_RES::FakeWordFromRatings
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:904

BLOB_CHOICE
Definition: ratngs.h:49

tesseract::NC_ANYTHING
Definition: recodebeam.h:73

pageres.h

UNICHARSET::IsSpaceDelimited
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:647

tesseract::RecodeBeamSearch::Decode
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:82

tesseract::RecodeHeap
GenericHeap< RecodePair > RecodeHeap
Definition: recodebeam.h:176

tesseract::NC_NO_DUP
Definition: recodebeam.h:77

recodebeam.h

tesseract::RecodeBeamSearch::LengthFromBeamsIndex
static int LengthFromBeamsIndex(int index)
Definition: recodebeam.h:229

STRING::string
const char * string() const
Definition: strngs.cpp:196

BLOB_CHOICE::set_matrix_cell
void set_matrix_cell(int col, int row)
Definition: ratngs.h:157

GENERIC_2D_ARRAY< float >

tesseract::KDPair::data
Data data
Definition: kdpair.h:45

TBOX
Definition: rect.h:34

tesseract::TN_ALSO_RAN
Definition: recodebeam.h:87

tesseract::TN_TOPN
Definition: recodebeam.h:86

networkio.h

NO_PERM
Definition: ratngs.h:243

tesseract::PointerVector::truncate
void truncate(int size)
Definition: genericvector.h:497

WERD_RES
Definition: pageres.h:169

tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex
static NodeContinuation ContinuationFromBeamsIndex(int index)
Definition: recodebeam.h:230

TBOX::scale
void scale(const float f)
Definition: rect.h:175

tesseract::PointerVector< WERD_RES >

tesseract::TopNState
TopNState
Definition: recodebeam.h:84

tesseract::UnicharCompress::GetFinalCodes
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
Definition: unicharcompress.h:179

tesseract::RecodeNode::permuter
PermuterType permuter
Definition: recodebeam.h:147

tesseract::RecodeNode::Print
void Print(int null_char, const UNICHARSET &unicharset, int depth) const
Definition: recodebeam.cpp:48

tesseract::RecodeNode::score
float score
Definition: recodebeam.h:165

UNICHARSET
Definition: unicharset.h:146

GenericVector::back
T & back() const
Definition: genericvector.h:730

tesseract::RecodeBeamSearch::timesteps
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216

tesseract::NC_ONLY_DUP
Definition: recodebeam.h:74

UNICHARSET::size
int size() const
Definition: unicharset.h:336

tesseract::NC_COUNT
Definition: recodebeam.h:80

tesseract::RecodeBeamSearch::DebugBeams
void DebugBeams(const UNICHARSET &unicharset) const
Definition: recodebeam.cpp:317

TBOX::left
int16_t left() const
Definition: rect.h:72

tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex
static bool IsDawgFromBeamsIndex(int index)
Definition: recodebeam.h:233

tesseract::NetworkIO
Definition: networkio.h:39

GenericVector::reverse
void reverse()
Definition: genericvector.h:215

TBOX::top
int16_t top() const
Definition: rect.h:58

GenericVector< int >

tesseract::NodeContinuation
NodeContinuation
Definition: recodebeam.h:72

tesseract::UnicharCompress::GetNextCodes
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
Definition: unicharcompress.h:173

ICOORD
integer coordinate
Definition: points.h:32

GenericVector::init_to_size
void init_to_size(int size, const T &t)
Definition: genericvector.h:708

tesseract::RecodeNode::code
int code
Definition: recodebeam.h:141

tesseract::RecodeNode
Definition: recodebeam.h:92

tesseract::UnicharCompress
Definition: unicharcompress.h:128

UNICHARSET::debug_str
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:342

tesseract::Dict
Definition: dict.h:88

tesseract::RecodeNode::start_of_word
bool start_of_word
Definition: recodebeam.h:152

tesseract::TN_COUNT
Definition: recodebeam.h:88

tesseract::GenericHeap::size
int size() const
Definition: genericheap.h:71

TOP_CHOICE_PERM
Definition: ratngs.h:245

tesseract::Dict::IsSpaceDelimitedLang
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:857

tesseract::RecodePair
KDPairInc< double, RecodeNode > RecodePair
Definition: recodebeam.h:175

WERD
Definition: werd.h:59

tesseract::GenericHeap< RecodePair >

GenericVector::empty
bool empty() const
Definition: genericvector.h:90

tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

tesseract::NetworkIO::ProbToCertainty
static float ProbToCertainty(float prob)
Definition: networkio.cpp:573

GENERIC_2D_ARRAY::put
void put(ICOORD pos, const T &thing)
Definition: matrix.h:220

tesseract::RecodeNode::end_of_word
bool end_of_word
Definition: recodebeam.h:156

tesseract::Dict::def_letter_is_okay
int def_letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.cpp:367

UNICHARSET::id_to_unichar_ext
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:298

tesseract
Definition: baseapi.cpp:94

GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799

GENERIC_2D_ARRAY::dim1
int dim1() const
Definition: matrix.h:206

tesseract::RecodeBeamSearch::RecodeBeamSearch
RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
Definition: recodebeam.cpp:68

tesseract::NetworkIO::f
float * f(int t)
Definition: networkio.h:115

tesseract::RecodeNode::prev
const RecodeNode * prev
Definition: recodebeam.h:167

WERD_RES::combination
bool combination
Definition: pageres.h:334

unicharcompress.h

tesseract::RecodeBeamSearch::kMinCertainty
static const float kMinCertainty
Definition: recodebeam.h:222

tesseract::RecodeNode::certainty
float certainty
Definition: recodebeam.h:163

WERD_RES::ratings
MATRIX * ratings
Definition: pageres.h:231

WERD_RES::uch_set
const UNICHARSET * uch_set
Definition: pageres.h:206

tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds
void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:163

tesseract::Dict::getUnicharset
const UNICHARSET & getUnicharset() const
Definition: dict.h:98

tesseract::UnicharCompress::code_range
int code_range() const
Definition: unicharcompress.h:161

GENERIC_2D_ARRAY::dim2
int dim2() const
Definition: matrix.h:207

BCC_STATIC_CLASSIFIER
Definition: ratngs.h:42

WERD_RES::timesteps
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: pageres.h:224

tesseract::RecodeBeamSearch::kNumBeams
static const int kNumBeams
Definition: recodebeam.h:227

GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:136

tesseract::Dict::default_dawgs
void default_dawgs(DawgPositionVector *anylength_dawgs, bool suppress_patterns) const
Definition: dict.cpp:586

tesseract::RecodeNode::start_of_dawg
bool start_of_dawg
Definition: recodebeam.h:150

tesseract::RecodeBeamSearch::ExtractBestPathAsWords
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
Definition: recodebeam.cpp:178

MATRIX
Definition: matrix.h:575

tesseract::kNodeContNames
const char * kNodeContNames[]
Definition: recodebeam.cpp:45

tesseract::RecodeNode::code_hash
uint64_t code_hash
Definition: recodebeam.h:172

tesseract::TN_TOP2
Definition: recodebeam.h:85

C_BLOB::FakeBlob
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:243

TBOX::bottom
int16_t bottom() const
Definition: rect.h:65

tesseract::NetworkIO::Width
int Width() const
Definition: networkio.h:107

TBOX::height
int16_t height() const
Definition: rect.h:108

PermuterType
PermuterType
Definition: ratngs.h:242

UNICHAR_SPACE
Definition: unicharset.h:35

tesseract::NetworkIO::NumFeatures
int NumFeatures() const
Definition: networkio.h:111

ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:84