tesseract  5.0.0-alpha-619-ge9db
recodebeam.h
Go to the documentation of this file.
1 // File: recodebeam.h
3 // Description: Beam search to decode from the re-encoded CJK as a sequence of
4 // smaller numbers in place of a single large code.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2015, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_
21 #define THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_
22 
23 #include "dawg.h"
24 #include "dict.h"
25 #include "genericheap.h"
26 #include "kdpair.h"
27 #include "networkio.h"
28 #include "ratngs.h"
29 #include "unicharcompress.h"
30 #include <deque>
31 #include <set>
32 #include <tuple>
33 #include <vector>
34 #include <unordered_set>
35 
36 namespace tesseract {
37 
38 // Enum describing what can follow the current node.
39 // Consider the following softmax outputs:
40 // Timestep 0 1 2 3 4 5 6 7 8
41 // X-score 0.01 0.55 0.98 0.42 0.01 0.01 0.40 0.95 0.01
42 // Y-score 0.00 0.01 0.01 0.01 0.01 0.97 0.59 0.04 0.01
43 // Null-score 0.99 0.44 0.01 0.57 0.98 0.02 0.01 0.01 0.98
44 // Then the correct CTC decoding (in which adjacent equal classes are folded,
45 // and then all nulls are dropped) is clearly XYX, but simple decoding (taking
46 // the max at each timestep) leads to:
47 // Null@0.99 X@0.55 X@0.98 Null@0.57 Null@0.98 Y@0.97 Y@0.59 X@0.95 Null@0.98,
48 // which folds to the correct XYX. The conversion to Tesseract rating and
49 // certainty uses the sum of the log probs (log of the product of probabilities)
50 // for the Rating and the minimum log prob for the certainty, but that yields a
51 // minimum certainty of log(0.55), which is poor for such an obvious case.
52 // CTC says that the probability of the result is the SUM of the products of the
53 // probabilities over ALL PATHS that decode to the same result, which includes:
54 // NXXNNYYXN, NNXNNYYN, NXXXNYYXN, NNXXNYXXN, and others including XXXXXYYXX.
55 // That is intractable, so some compromise between simple and ideal is needed.
56 // Observing that evenly split timesteps rarely happen next to each other, we
57 // allow scores at a transition between classes to be added for decoding thus:
58 // N@0.99 (N+X)@0.99 X@0.98 (N+X)@0.99 N@0.98 Y@0.97 (X+Y+N)@1.00 X@0.95 N@0.98.
59 // This works because NNX and NXX both decode to X, so in the middle we can use
60 // N+X. Note that the classes either side of a sum must stand alone, i.e. use a
61 // single score, to force all paths to pass through them and decode to the same
62 // result. Also in the special case of a transition from X to Y, with only one
63 // timestep between, it is possible to add X+Y+N, since XXY, XYY, and XNY all
64 // decode to XY.
65 // An important condition is that we cannot combine X and Null between two
66 // stand-alone Xs, since that can decode as XNX->XX or XXX->X, so the scores for
67 // X and Null have to go in separate paths. Combining scores in this way
68 // provides a much better minimum certainty of log(0.95).
69 // In the implementation of the beam search, we have to place the possibilities
70 // X, X+N and X+Y+N in the beam under appropriate conditions of the previous
71 // node, and constrain what can follow, to enforce the rules explained above.
72 // We therefore have 3 different types of node determined by what can follow:
74  NC_ANYTHING, // This node used just its own score, so anything can follow.
75  NC_ONLY_DUP, // The current node combined another score with the score for
76  // itself, without a stand-alone duplicate before, so must be
77  // followed by a stand-alone duplicate.
78  NC_NO_DUP, // The current node combined another score with the score for
79  // itself, after a stand-alone, so can only be followed by
80  // something other than a duplicate of the current node.
82 };
83 
84 // Enum describing the top-n status of a code.
85 enum TopNState {
86  TN_TOP2, // Winner or 2nd.
87  TN_TOPN, // Runner up in top-n, but not 1st or 2nd.
88  TN_ALSO_RAN, // Not in the top-n.
90 };
91 
92 // Lattice element for Re-encode beam search.
93 struct RecodeNode {
95  : code(-1),
96  unichar_id(INVALID_UNICHAR_ID),
98  start_of_dawg(false),
99  start_of_word(false),
100  end_of_word(false),
101  duplicate(false),
102  certainty(0.0f),
103  score(0.0f),
104  prev(nullptr),
105  dawgs(nullptr),
106  code_hash(0) {}
107  RecodeNode(int c, int uni_id, PermuterType perm, bool dawg_start,
108  bool word_start, bool end, bool dup, float cert, float s,
109  const RecodeNode* p, DawgPositionVector* d, uint64_t hash)
110  : code(c),
111  unichar_id(uni_id),
112  permuter(perm),
113  start_of_dawg(dawg_start),
114  start_of_word(word_start),
115  end_of_word(end),
116  duplicate(dup),
117  certainty(cert),
118  score(s),
119  prev(p),
120  dawgs(d),
121  code_hash(hash) {}
122  // NOTE: If we could use C++11, then this would be a move constructor.
123  // Instead we have copy constructor that does a move!! This is because we
124  // don't want to copy the whole DawgPositionVector each time, and true
125  // copying isn't necessary for this struct. It does get moved around a lot
126  // though inside the heap and during heap push, hence the move semantics.
127  RecodeNode(RecodeNode& src) : dawgs(nullptr) {
128  *this = src;
129  ASSERT_HOST(src.dawgs == nullptr);
130  }
132  delete dawgs;
133  memcpy(this, &src, sizeof(src));
134  src.dawgs = nullptr;
135  return *this;
136  }
137  ~RecodeNode() { delete dawgs; }
138  // Prints details of the node.
139  void Print(int null_char, const UNICHARSET& unicharset, int depth) const;
140 
141  // The re-encoded code here = index to network output.
142  int code;
143  // The decoded unichar_id is only valid for the final code of a sequence.
145  // The type of permuter active at this point. Intervals between start_of_word
146  // and end_of_word make valid words of type given by permuter where
147  // end_of_word is true. These aren't necessarily delimited by spaces.
149  // True if this is the initial dawg state. May be attached to a space or,
150  // in a non-space-delimited lang, the end of the previous word.
152  // True if this is the first node in a dictionary word.
154  // True if this represents a valid candidate end of word position. Does not
155  // necessarily mark the end of a word, since a word can be extended beyond a
156  // candidate end by a continuation, eg 'the' continues to 'these'.
158  // True if this->code is a duplicate of prev->code. Some training modes
159  // allow the network to output duplicate characters and crush them with CTC,
160  // but that would mess up the dictionary search, so we just smash them
161  // together on the fly using the duplicate flag.
162  bool duplicate;
163  // Certainty (log prob) of (just) this position.
164  float certainty;
165  // Total certainty of the path to this position.
166  float score;
167  // The previous node in this chain. Borrowed pointer.
168  const RecodeNode* prev;
169  // The currently active dawgs at this position. Owned pointer.
171  // A hash of all codes in the prefix and this->code as well. Used for
172  // duplicate path removal.
173  uint64_t code_hash;
174 };
175 
178 
179 // Class that holds the entire beam search for recognition of a text line.
181  public:
182  // Borrows the pointer, which is expected to survive until *this is deleted.
183  RecodeBeamSearch(const UnicharCompress& recoder, int null_char,
184  bool simple_text, Dict* dict);
185 
186  // Decodes the set of network outputs, storing the lattice internally.
187  // If charset is not null, it enables detailed debugging of the beam search.
188  void Decode(const NetworkIO& output, double dict_ratio, double cert_offset,
189  double worst_dict_cert, const UNICHARSET* charset,
190  int lstm_choice_mode = 0);
191  void Decode(const GENERIC_2D_ARRAY<float>& output, double dict_ratio,
192  double cert_offset, double worst_dict_cert,
193  const UNICHARSET* charset);
194 
195  void DecodeSecondaryBeams(const NetworkIO& output, double dict_ratio,
196  double cert_offset, double worst_dict_cert,
197  const UNICHARSET* charset,
198  int lstm_choice_mode = 0);
199 
200  // Returns the best path as labels/scores/xcoords similar to simple CTC.
202  GenericVector<int>* xcoords) const;
203  // Returns the best path as unichar-ids/certs/ratings/xcoords skipping
204  // duplicates, nulls and intermediate parts.
205  void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET* unicharset,
206  GenericVector<int>* unichar_ids,
207  GenericVector<float>* certs,
208  GenericVector<float>* ratings,
209  GenericVector<int>* xcoords) const;
210 
211  // Returns the best path as a set of WERD_RES.
212  void ExtractBestPathAsWords(const TBOX& line_box, float scale_factor,
213  bool debug, const UNICHARSET* unicharset,
215  int lstm_choice_mode = 0);
216 
217  // Generates debug output of the content of the beams after a Decode.
218  void DebugBeams(const UNICHARSET& unicharset) const;
219 
220  // Extract the best charakters from the current decode iteration and block
221  // those symbols for the next iteration. In contrast to tesseracts standard
222  // method to chose the best overall node chain, this methods looks at a short
223  // node chain segmented by the character boundaries and chooses the best
224  // option independent of the remaining node chain.
225  void extractSymbolChoices(const UNICHARSET* unicharset);
226 
227  // Generates debug output of the content of the beams after a Decode.
228  void PrintBeam2(bool uids, int num_outputs, const UNICHARSET* charset,
229  bool secondary) const;
230  // Segments the timestep bundle by the character_boundaries.
232  std::vector<std::vector<std::pair<const char*, float>>>
233  // Unions the segmented timestep character bundles to one big bundle.
235  std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
237  // Stores the alternative characters of every timestep together with their
238  // probability.
239  std::vector< std::vector<std::pair<const char*, float>>> timesteps;
240  std::vector<std::vector<std::vector<std::pair<const char*, float>>>>
242  // Stores the character choices found in the ctc algorithm
243  std::vector<std::vector<std::pair<const char*, float>>> ctc_choices;
244  // Stores all unicharids which are excluded for future iterations
245  std::vector<std::unordered_set<int>> excludedUnichars;
246  // Stores the character boundaries regarding timesteps.
247  std::vector<int> character_boundaries_;
248  // Clipping value for certainty inside Tesseract. Reflects the minimum value
249  // of certainty that will be returned by ExtractBestPathAsUnicharIds.
250  // Supposedly on a uniform scale that can be compared across languages and
251  // engines.
252  static constexpr float kMinCertainty = -20.0f;
253  // Number of different code lengths for which we have a separate beam.
254  static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1;
255  // Total number of beams: dawg/nodawg * number of NodeContinuation * number
256  // of different lengths.
257  static const int kNumBeams = 2 * NC_COUNT * kNumLengths;
258  // Returns the relevant factor in the beams_ index.
259  static int LengthFromBeamsIndex(int index) { return index % kNumLengths; }
261  return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT);
262  }
263  static bool IsDawgFromBeamsIndex(int index) {
264  return index / (kNumLengths * NC_COUNT) > 0;
265  }
266  // Computes a beams_ index from the given factors.
267  static int BeamIndex(bool is_dawg, NodeContinuation cont, int length) {
268  return (is_dawg * NC_COUNT + cont) * kNumLengths + length;
269  }
270 
271  private:
272  // Struct for the Re-encode beam search. This struct holds the data for
273  // a single time-step position of the output. Use a PointerVector<RecodeBeam>
274  // to hold all the timesteps and prevent reallocation of the individual heaps.
275  struct RecodeBeam {
276  // Resets to the initial state without deleting all the memory.
277  void Clear() {
278  for (auto & beam : beams_) {
279  beam.clear();
280  }
281  RecodeNode empty;
282  for (auto & best_initial_dawg : best_initial_dawgs_) {
283  best_initial_dawg = empty;
284  }
285  }
286 
287  // A separate beam for each combination of code length,
288  // NodeContinuation, and dictionary flag. Separating out all these types
289  // allows the beam to be quite narrow, and yet still have a low chance of
290  // losing the best path.
291  // We have to keep all these beams separate, since the highest scoring paths
292  // come from the paths that are most likely to dead-end at any time, like
293  // dawg paths, NC_ONLY_DUP etc.
294  // Each heap is stored with the WORST result at the top, so we can quickly
295  // get the top-n values.
296  RecodeHeap beams_[kNumBeams];
297  // While the language model is only a single word dictionary, we can use
298  // word starts as a choke point in the beam, and keep only a single dict
299  // start node at each step (for each NodeContinuation type), so we find the
300  // best one here and push it on the heap, if it qualifies, after processing
301  // all of the step.
302  RecodeNode best_initial_dawgs_[NC_COUNT];
303  };
304  using TopPair = KDPairInc<float, int>;
305 
306  // Generates debug output of the content of a single beam position.
307  void DebugBeamPos(const UNICHARSET& unicharset, const RecodeHeap& heap) const;
308 
309  // Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
310  // duplicates, nulls and intermediate parts.
311  static void ExtractPathAsUnicharIds(
312  const GenericVector<const RecodeNode*>& best_nodes,
313  GenericVector<int>* unichar_ids, GenericVector<float>* certs,
314  GenericVector<float>* ratings, GenericVector<int>* xcoords,
315  std::vector<int>* character_boundaries = nullptr);
316 
317  // Sets up a word with the ratings matrix and fake blobs with boxes in the
318  // right places.
319  WERD_RES* InitializeWord(bool leading_space, const TBOX& line_box,
320  int word_start, int word_end, float space_certainty,
321  const UNICHARSET* unicharset,
322  const GenericVector<int>& xcoords,
323  float scale_factor);
324 
325  // Fills top_n_flags_ with bools that are true iff the corresponding output
326  // is one of the top_n.
327  void ComputeTopN(const float* outputs, int num_outputs, int top_n);
328 
329  void ComputeSecTopN(std::unordered_set<int>* exList,
330  const float* outputs, int num_outputs, int top_n);
331 
332  // Adds the computation for the current time-step to the beam. Call at each
333  // time-step in sequence from left to right. outputs is the activation vector
334  // for the current timestep.
335  void DecodeStep(const float* outputs, int t, double dict_ratio,
336  double cert_offset, double worst_dict_cert,
337  const UNICHARSET* charset, bool debug = false);
338 
339  void DecodeSecondaryStep(const float* outputs, int t, double dict_ratio,
340  double cert_offset, double worst_dict_cert,
341  const UNICHARSET* charset, bool debug = false);
342 
343  // Saves the most certain choices for the current time-step.
344  void SaveMostCertainChoices(const float* outputs, int num_outputs,
345  const UNICHARSET* charset, int xCoord);
346 
347  // Calculates more accurate character boundaries which can be used to
348  // provide more acurate alternative symbol choices.
349  static void calculateCharBoundaries(std::vector<int>* starts,
350  std::vector<int>* ends,
351  std::vector<int>* character_boundaries_,
352  int maxWidth);
353 
354  // Adds to the appropriate beams the legal (according to recoder)
355  // continuations of context prev, which is from the given index to beams_,
356  // using the given network outputs to provide scores to the choices. Uses only
357  // those choices for which top_n_flags[code] == top_n_flag.
358  void ContinueContext(const RecodeNode* prev, int index, const float* outputs,
359  TopNState top_n_flag, const UNICHARSET* unicharset,
360  double dict_ratio, double cert_offset,
361  double worst_dict_cert, RecodeBeam* step);
362  // Continues for a new unichar, using dawg or non-dawg as per flag.
363  void ContinueUnichar(int code, int unichar_id, float cert,
364  float worst_dict_cert, float dict_ratio, bool use_dawgs,
365  NodeContinuation cont, const RecodeNode* prev,
366  RecodeBeam* step);
367  // Adds a RecodeNode composed of the args to the correct heap in step if
368  // unichar_id is a valid dictionary continuation of whatever is in prev.
369  void ContinueDawg(int code, int unichar_id, float cert, NodeContinuation cont,
370  const RecodeNode* prev, RecodeBeam* step);
371  // Sets the correct best_initial_dawgs_ with a RecodeNode composed of the args
372  // if better than what is already there.
373  void PushInitialDawgIfBetter(int code, int unichar_id, PermuterType permuter,
374  bool start, bool end, float cert,
375  NodeContinuation cont, const RecodeNode* prev,
376  RecodeBeam* step);
377  // Adds a RecodeNode composed of the args to the correct heap in step for
378  // partial unichar or duplicate if there is room or if better than the
379  // current worst element if already full.
380  void PushDupOrNoDawgIfBetter(int length, bool dup, int code, int unichar_id,
381  float cert, float worst_dict_cert,
382  float dict_ratio, bool use_dawgs,
383  NodeContinuation cont, const RecodeNode* prev,
384  RecodeBeam* step);
385  // Adds a RecodeNode composed of the args to the correct heap in step if there
386  // is room or if better than the current worst element if already full.
387  void PushHeapIfBetter(int max_size, int code, int unichar_id,
388  PermuterType permuter, bool dawg_start, bool word_start,
389  bool end, bool dup, float cert, const RecodeNode* prev,
390  DawgPositionVector* d, RecodeHeap* heap);
391  // Adds a RecodeNode to heap if there is room
392  // or if better than the current worst element if already full.
393  void PushHeapIfBetter(int max_size, RecodeNode* node, RecodeHeap* heap);
394  // Searches the heap for an entry matching new_node, and updates the entry
395  // with reshuffle if needed. Returns true if there was a match.
396  bool UpdateHeapIfMatched(RecodeNode* new_node, RecodeHeap* heap);
397  // Computes and returns the code-hash for the given code and prev.
398  uint64_t ComputeCodeHash(int code, bool dup, const RecodeNode* prev) const;
399  // Backtracks to extract the best path through the lattice that was built
400  // during Decode. On return the best_nodes vector essentially contains the set
401  // of code, score pairs that make the optimal path with the constraint that
402  // the recoder can decode the code sequence back to a sequence of unichar-ids.
403  void ExtractBestPaths(GenericVector<const RecodeNode*>* best_nodes,
404  GenericVector<const RecodeNode*>* second_nodes) const;
405  // Helper backtracks through the lattice from the given node, storing the
406  // path and reversing it.
407  void ExtractPath(const RecodeNode* node,
409  void ExtractPath(const RecodeNode* node,
411  int limiter) const;
412  // Helper prints debug information on the given lattice path.
413  void DebugPath(const UNICHARSET* unicharset,
414  const GenericVector<const RecodeNode*>& path) const;
415  // Helper prints debug information on the given unichar path.
416  void DebugUnicharPath(const UNICHARSET* unicharset,
418  const GenericVector<int>& unichar_ids,
419  const GenericVector<float>& certs,
420  const GenericVector<float>& ratings,
421  const GenericVector<int>& xcoords) const;
422 
423  static const int kBeamWidths[RecodedCharID::kMaxCodeLen + 1];
424 
425  // The encoder/decoder that we will be using.
426  const UnicharCompress& recoder_;
427  // The beam for each timestep in the output.
428  PointerVector<RecodeBeam> beam_;
429  // Secondary Beam for Results with less Probability
430  PointerVector<RecodeBeam> secondary_beam_;
431  // The number of timesteps valid in beam_;
432  int beam_size_;
433  // A flag to indicate which outputs are the top-n choices. Current timestep
434  // only.
435  GenericVector<TopNState> top_n_flags_;
436  // A record of the highest and second scoring codes.
437  int top_code_;
438  int second_code_;
439  // Heap used to compute the top_n_flags_.
440  GenericHeap<TopPair> top_heap_;
441  // Borrowed pointer to the dictionary to use in the search.
442  Dict* dict_;
443  // True if the language is space-delimited, which is true for most languages
444  // except chi*, jpn, tha.
445  bool space_delimited_;
446  // True if the input is simple text, ie adjacent equal chars are not to be
447  // eliminated.
448  bool is_simple_text_;
449  // The encoded (class label) of the null/reject character.
450  int null_char_;
451 };
452 
453 } // namespace tesseract.
454 
455 #endif // THIRD_PARTY_TESSERACT_LSTM_RECODEBEAM_H_
tesseract::GenericHeap< RecodePair >
tesseract::NC_NO_DUP
Definition: recodebeam.h:78
tesseract::RecodeBeamSearch::kMinCertainty
static constexpr float kMinCertainty
Definition: recodebeam.h:252
tesseract::RecodeBeamSearch::excludedUnichars
std::vector< std::unordered_set< int > > excludedUnichars
Definition: recodebeam.h:245
tesseract::TN_TOP2
Definition: recodebeam.h:86
dict.h
tesseract::RecodeBeamSearch::ctc_choices
std::vector< std::vector< std::pair< const char *, float > > > ctc_choices
Definition: recodebeam.h:243
tesseract::RecodeNode::unichar_id
int unichar_id
Definition: recodebeam.h:144
networkio.h
tesseract::RecodeBeamSearch
Definition: recodebeam.h:180
tesseract::TopNState
TopNState
Definition: recodebeam.h:85
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::RecodeNode::operator=
RecodeNode & operator=(RecodeNode &src)
Definition: recodebeam.h:131
PermuterType
PermuterType
Definition: ratngs.h:230
tesseract::NodeContinuation
NodeContinuation
Definition: recodebeam.h:73
tesseract::TN_ALSO_RAN
Definition: recodebeam.h:88
tesseract::RecodeNode::RecodeNode
RecodeNode(RecodeNode &src)
Definition: recodebeam.h:127
tesseract::PointerVector< WERD_RES >
tesseract::RecodeBeamSearch::LengthFromBeamsIndex
static int LengthFromBeamsIndex(int index)
Definition: recodebeam.h:259
WERD_RES
Definition: pageres.h:160
tesseract::RecodeBeamSearch::timesteps
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:239
tesseract::RecodeNode
Definition: recodebeam.h:93
tesseract::RecodeBeamSearch::Decode
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:78
tesseract::RecodeNode::RecodeNode
RecodeNode()
Definition: recodebeam.h:94
tesseract::RecodeNode::start_of_dawg
bool start_of_dawg
Definition: recodebeam.h:151
GENERIC_2D_ARRAY< float >
tesseract::RecodeNode::prev
const RecodeNode * prev
Definition: recodebeam.h:168
tesseract::RecodeBeamSearch::PrintBeam2
void PrintBeam2(bool uids, int num_outputs, const UNICHARSET *charset, bool secondary) const
Definition: recodebeam.cpp:315
ratngs.h
tesseract::RecodeNode::score
float score
Definition: recodebeam.h:166
tesseract::TN_TOPN
Definition: recodebeam.h:87
tesseract::RecodeNode::duplicate
bool duplicate
Definition: recodebeam.h:162
tesseract::RecodeBeamSearch::extractSymbolChoices
void extractSymbolChoices(const UNICHARSET *unicharset)
Definition: recodebeam.cpp:395
tesseract::RecodeNode::code_hash
uint64_t code_hash
Definition: recodebeam.h:173
dawg.h
tesseract::RecodeNode::dawgs
DawgPositionVector * dawgs
Definition: recodebeam.h:170
tesseract::TN_COUNT
Definition: recodebeam.h:89
tesseract::NC_ONLY_DUP
Definition: recodebeam.h:75
UNICHARSET
Definition: unicharset.h:145
tesseract::NetworkIO
Definition: networkio.h:39
tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex
static bool IsDawgFromBeamsIndex(int index)
Definition: recodebeam.h:263
tesseract::RecodeBeamSearch::kNumLengths
static const int kNumLengths
Definition: recodebeam.h:254
tesseract::NC_COUNT
Definition: recodebeam.h:81
kdpair.h
tesseract
Definition: baseapi.h:65
tesseract::RecodeBeamSearch::DecodeSecondaryBeams
void DecodeSecondaryBeams(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:105
TOP_CHOICE_PERM
Definition: ratngs.h:233
tesseract::RecodeBeamSearch::kNumBeams
static const int kNumBeams
Definition: recodebeam.h:257
tesseract::RecodeBeamSearch::segmentTimestepsByCharacters
void segmentTimestepsByCharacters()
Definition: recodebeam.cpp:156
tesseract::RecodeBeamSearch::BeamIndex
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:267
tesseract::DawgPositionVector
Definition: dawg.h:373
GenericVector< int >
tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex
static NodeContinuation ContinuationFromBeamsIndex(int index)
Definition: recodebeam.h:260
tesseract::KDPairInc
Definition: kdpair.h:51
tesseract::Dict
Definition: dict.h:91
tesseract::RecodeNode::start_of_word
bool start_of_word
Definition: recodebeam.h:153
tesseract::RecodeNode::~RecodeNode
~RecodeNode()
Definition: recodebeam.h:137
tesseract::RecodedCharID::kMaxCodeLen
static const int kMaxCodeLen
Definition: unicharcompress.h:37
tesseract::RecodeNode::certainty
float certainty
Definition: recodebeam.h:164
tesseract::RecodeNode::end_of_word
bool end_of_word
Definition: recodebeam.h:157
tesseract::RecodeNode::RecodeNode
RecodeNode(int c, int uni_id, PermuterType perm, bool dawg_start, bool word_start, bool end, bool dup, float cert, float s, const RecodeNode *p, DawgPositionVector *d, uint64_t hash)
Definition: recodebeam.h:107
tesseract::RecodeBeamSearch::combineSegmentedTimesteps
std::vector< std::vector< std::pair< const char *, float > > > combineSegmentedTimesteps(std::vector< std::vector< std::vector< std::pair< const char *, float >>>> *segmentedTimesteps)
Definition: recodebeam.cpp:166
unicharcompress.h
tesseract::RecodeNode::permuter
PermuterType permuter
Definition: recodebeam.h:148
tesseract::RecodeBeamSearch::ExtractBestPathAsWords
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
Definition: recodebeam.cpp:230
tesseract::RecodeBeamSearch::DebugBeams
void DebugBeams(const UNICHARSET &unicharset) const
Definition: recodebeam.cpp:495
tesseract::RecodeBeamSearch::character_boundaries_
std::vector< int > character_boundaries_
Definition: recodebeam.h:247
tesseract::RecodeHeap
GenericHeap< RecodePair > RecodeHeap
Definition: recodebeam.h:177
tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds
void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:215
tesseract::RecodeBeamSearch::RecodeBeamSearch
RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
Definition: recodebeam.cpp:63
tesseract::RecodeNode::Print
void Print(int null_char, const UNICHARSET &unicharset, int depth) const
Definition: recodebeam.cpp:43
tesseract::UnicharCompress
Definition: unicharcompress.h:128
genericheap.h
tesseract::NC_ANYTHING
Definition: recodebeam.h:74
tesseract::RecodeBeamSearch::ExtractBestPathAsLabels
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:192
tesseract::RecodeBeamSearch::segmentedTimesteps
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmentedTimesteps
Definition: recodebeam.h:241
tesseract::RecodeNode::code
int code
Definition: recodebeam.h:142
TBOX
Definition: rect.h:33