| 
    tesseract
    4.0.0-1-g2a2b
    
   | 
 
#include <trie.h>
  
 Public Types | |
| enum | RTLReversePolicy { RRP_DO_NO_REVERSE, RRP_REVERSE_IF_HAS_RTL, RRP_FORCE_REVERSE } | 
Public Member Functions | |
| Trie (DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level) | |
| virtual | ~Trie () | 
| void | clear () | 
| EDGE_REF | edge_char_of (NODE_REF node_ref, UNICHAR_ID unichar_id, bool word_end) const | 
| void | unichar_ids_of (NODE_REF node, NodeChildVector *vec, bool word_end) const | 
| NODE_REF | next_node (EDGE_REF edge_ref) const | 
| bool | end_of_word (EDGE_REF edge_ref) const | 
| UNICHAR_ID | edge_letter (EDGE_REF edge_ref) const | 
| void | KillEdge (EDGE_RECORD *edge_rec) const | 
| bool | DeadEdge (const EDGE_RECORD &edge_rec) const | 
| void | print_node (NODE_REF node, int max_num_edges) const | 
| SquishedDawg * | trie_to_dawg () | 
| bool | read_and_add_word_list (const char *filename, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse) | 
| bool | read_word_list (const char *filename, GenericVector< STRING > *words) | 
| bool | add_word_list (const GenericVector< STRING > &words, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse_policy) | 
| bool | read_pattern_list (const char *filename, const UNICHARSET &unicharset) | 
| void | initialize_patterns (UNICHARSET *unicharset) | 
| void | unichar_id_to_patterns (UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const | 
| virtual EDGE_REF | pattern_loop_edge (EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const | 
| bool | add_word_to_dawg (const WERD_CHOICE &word, const GenericVector< bool > *repetitions) | 
| bool | add_word_to_dawg (const WERD_CHOICE &word) | 
  Public Member Functions inherited from tesseract::Dawg | |
| DawgType | type () const | 
| const STRING & | lang () const | 
| PermuterType | permuter () const | 
| virtual | ~Dawg () | 
| bool | word_in_dawg (const WERD_CHOICE &word) const | 
| Returns true if the given word is in the Dawg.  More... | |
| bool | prefix_in_dawg (const WERD_CHOICE &prefix, bool requires_complete) const | 
| int | check_for_words (const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const | 
| void | iterate_words (const UNICHARSET &unicharset, TessCallback1< const WERD_CHOICE *> *cb) const | 
| void | iterate_words (const UNICHARSET &unicharset, TessCallback1< const char *> *cb) const | 
Static Public Member Functions | |
| static const char * | get_reverse_policy_name (RTLReversePolicy reverse_policy) | 
Static Public Attributes | |
| static const int | kSaneNumConcreteChars = 0 | 
| static const char | kAlphaPatternUnicode [] = "\u2000" | 
| static const char | kDigitPatternUnicode [] = "\u2001" | 
| static const char | kAlphanumPatternUnicode [] = "\u2002" | 
| static const char | kPuncPatternUnicode [] = "\u2003" | 
| static const char | kLowerPatternUnicode [] = "\u2004" | 
| static const char | kUpperPatternUnicode [] = "\u2005" | 
  Static Public Attributes inherited from tesseract::Dawg | |
| static const int16_t | kDawgMagicNumber = 42 | 
| Magic number to determine endianness when reading the Dawg from file.  More... | |
| static const UNICHAR_ID | kPatternUnicharID = 0 | 
Protected Member Functions | |
| EDGE_RECORD * | deref_edge_ref (EDGE_REF edge_ref) const | 
| EDGE_REF | make_edge_ref (NODE_REF node_index, EDGE_INDEX edge_index) const | 
| void | link_edge (EDGE_RECORD *edge, NODE_REF nxt, bool repeats, int direction, bool word_end, UNICHAR_ID unichar_id) | 
| void | print_edge_rec (const EDGE_RECORD &edge_rec) const | 
| bool | can_be_eliminated (const EDGE_RECORD &edge_rec) | 
| void | print_all (const char *msg, int max_num_edges) | 
| bool | edge_char_of (NODE_REF node_ref, NODE_REF next_node, int direction, bool word_end, UNICHAR_ID unichar_id, EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const | 
| bool | add_edge_linkage (NODE_REF node1, NODE_REF node2, bool repeats, int direction, bool word_end, UNICHAR_ID unichar_id) | 
| bool | add_new_edge (NODE_REF node1, NODE_REF node2, bool repeats, bool word_end, UNICHAR_ID unichar_id) | 
| void | add_word_ending (EDGE_RECORD *edge, NODE_REF the_next_node, bool repeats, UNICHAR_ID unichar_id) | 
| NODE_REF | new_dawg_node () | 
| void | remove_edge_linkage (NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id) | 
| void | remove_edge (NODE_REF node1, NODE_REF node2, bool word_end, UNICHAR_ID unichar_id) | 
| bool | eliminate_redundant_edges (NODE_REF node, const EDGE_RECORD &edge1, const EDGE_RECORD &edge2) | 
| bool | reduce_lettered_edges (EDGE_INDEX edge_index, UNICHAR_ID unichar_id, NODE_REF node, EDGE_VECTOR *backward_edges, NODE_MARKER reduced_nodes) | 
| void | sort_edges (EDGE_VECTOR *edges) | 
| void | reduce_node_input (NODE_REF node, NODE_MARKER reduced_nodes) | 
| UNICHAR_ID | character_class_to_pattern (char ch) | 
  Protected Member Functions inherited from tesseract::Dawg | |
| Dawg (DawgType type, const STRING &lang, PermuterType perm, int debug_level) | |
| NODE_REF | next_node_from_edge_rec (const EDGE_RECORD &edge_rec) const | 
| Returns the next node visited by following this edge.  More... | |
| bool | marker_flag_from_edge_rec (const EDGE_RECORD &edge_rec) const | 
| Returns the marker flag of this edge.  More... | |
| int | direction_from_edge_rec (const EDGE_RECORD &edge_rec) const | 
| Returns the direction flag of this edge.  More... | |
| bool | end_of_word_from_edge_rec (const EDGE_RECORD &edge_rec) const | 
| Returns true if this edge marks the end of a word.  More... | |
| UNICHAR_ID | unichar_id_from_edge_rec (const EDGE_RECORD &edge_rec) const | 
| Returns UNICHAR_ID recorded in this edge.  More... | |
| void | set_next_node_in_edge_rec (EDGE_RECORD *edge_rec, EDGE_REF value) | 
| Sets the next node link for this edge in the Dawg.  More... | |
| void | set_marker_flag_in_edge_rec (EDGE_RECORD *edge_rec) | 
| Sets this edge record to be the last one in a sequence of edges.  More... | |
| int | given_greater_than_edge_rec (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const | 
| bool | edge_rec_match (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const | 
| void | init (int unicharset_size) | 
| bool | match_words (WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const | 
| void | iterate_words_rec (const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE *> *cb) const | 
Protected Attributes | |
| TRIE_NODES | nodes_ | 
| uint64_t | num_edges_ | 
| uint64_t | deref_direction_mask_ | 
| uint64_t | deref_node_index_mask_ | 
| GenericVector< EDGE_INDEX > | root_back_freelist_ | 
| bool | initialized_patterns_ | 
| UNICHAR_ID | alpha_pattern_ | 
| UNICHAR_ID | digit_pattern_ | 
| UNICHAR_ID | alphanum_pattern_ | 
| UNICHAR_ID | punc_pattern_ | 
| UNICHAR_ID | lower_pattern_ | 
| UNICHAR_ID | upper_pattern_ | 
  Protected Attributes inherited from tesseract::Dawg | |
| DawgType | type_ | 
| STRING | lang_ | 
| PermuterType | perm_ | 
| Permuter code that should be used if the word is found in this Dawg.  More... | |
| int | unicharset_size_ | 
| int | flag_start_bit_ | 
| int | next_node_start_bit_ | 
| uint64_t | next_node_mask_ | 
| uint64_t | flags_mask_ | 
| uint64_t | letter_mask_ | 
| int | debug_level_ | 
Concrete class for Trie data structure that allows to store a list of words (extends Dawg base class) as well as dynamically add new words. This class stores a vector of pointers to TRIE_NODE_RECORDs, each of which has a vector of forward and backward edges.
      
  | 
  inline | 
Definition at line 88 of file trie.h.
      
  | 
  protected | 
Definition at line 121 of file trie.cpp.
      
  | 
  inlineprotected | 
Definition at line 355 of file trie.h.
      
  | 
  protected | 
Definition at line 157 of file trie.cpp.
| bool tesseract::Trie::add_word_list | ( | const GenericVector< STRING > & | words, | 
| const UNICHARSET & | unicharset, | ||
| Trie::RTLReversePolicy | reverse_policy | ||
| ) | 
Definition at line 318 of file trie.cpp.
| bool tesseract::Trie::add_word_to_dawg | ( | const WERD_CHOICE & | word, | 
| const GenericVector< bool > * | repetitions | ||
| ) | 
Definition at line 174 of file trie.cpp.
      
  | 
  inline | 
Definition at line 268 of file trie.h.
      
  | 
  inlineprotected | 
Definition at line 325 of file trie.h.
      
  | 
  protected | 
| void tesseract::Trie::clear | ( | ) | 
Definition at line 62 of file trie.cpp.
      
  | 
  inline | 
Definition at line 157 of file trie.h.
      
  | 
  inlineprotected | 
      
  | 
  inlinevirtual | 
Returns the edge that corresponds to the letter out of this node.
Implements tesseract::Dawg.
Definition at line 103 of file trie.h.
      
  | 
  protected | 
Definition at line 70 of file trie.cpp.
      
  | 
  inlinevirtual | 
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Implements tesseract::Dawg.
Definition at line 147 of file trie.h.
      
  | 
  protected | 
Definition at line 563 of file trie.cpp.
      
  | 
  inlinevirtual | 
Returns true if the edge indicated by the given EDGE_REF marks the end of a word.
Implements tesseract::Dawg.
Definition at line 141 of file trie.h.
      
  | 
  static | 
| void tesseract::Trie::initialize_patterns | ( | UNICHARSET * | unicharset | ) | 
Definition at line 342 of file trie.cpp.
      
  | 
  inline | 
      
  | 
  inlineprotected | 
      
  | 
  inlineprotected | 
      
  | 
  protected | 
Returns the next node visited by following the edge indicated by the given EDGE_REF.
Implements tesseract::Dawg.
Definition at line 132 of file trie.h.
      
  | 
  inlinevirtual | 
Returns the given EDGE_REF if the EDGE_RECORD that it points to has a self loop and the given unichar_id matches the unichar_id stored in the EDGE_RECORD, returns NO_EDGE otherwise.
Reimplemented from tesseract::Dawg.
Definition at line 246 of file trie.h.
      
  | 
  inlineprotected | 
Definition at line 333 of file trie.h.
      
  | 
  inlineprotected | 
Prints the given EDGE_RECORD.
Definition at line 316 of file trie.h.
      
  | 
  virtual | 
Prints the contents of the node indicated by the given NODE_REF. At most max_num_edges will be printed.
Implements tesseract::Dawg.
Definition at line 702 of file trie.cpp.
| bool tesseract::Trie::read_and_add_word_list | ( | const char * | filename, | 
| const UNICHARSET & | unicharset, | ||
| Trie::RTLReversePolicy | reverse | ||
| ) | 
Definition at line 286 of file trie.cpp.
| bool tesseract::Trie::read_pattern_list | ( | const char * | filename, | 
| const UNICHARSET & | unicharset | ||
| ) | 
Definition at line 399 of file trie.cpp.
| bool tesseract::Trie::read_word_list | ( | const char * | filename, | 
| GenericVector< STRING > * | words | ||
| ) | 
Definition at line 295 of file trie.cpp.
      
  | 
  protected | 
Definition at line 610 of file trie.cpp.
      
  | 
  protected | 
Eliminates any redundant edges from this node in the Trie.
Definition at line 665 of file trie.cpp.
      
  | 
  inlineprotected | 
Definition at line 380 of file trie.h.
      
  | 
  protected | 
Definition at line 481 of file trie.cpp.
      
  | 
  protected | 
Order num_edges of consecutive EDGE_RECORDS in the given EDGE_VECTOR in increasing order of unichar ids. This function is normally called for all edges in a single node, and since number of edges in each node is usually quite small, selection sort is used.
Definition at line 651 of file trie.cpp.
| SquishedDawg * tesseract::Trie::trie_to_dawg | ( | ) | 
Definition at line 516 of file trie.cpp.
      
  | 
  virtual | 
Fills vec with unichar ids that represent the character classes of the given unichar_id.
Reimplemented from tesseract::Dawg.
Definition at line 359 of file trie.cpp.
      
  | 
  inlinevirtual | 
Fills the given NodeChildVector with all the unichar ids (and the corresponding EDGE_REFs) for which there is an edge out of this node.
Implements tesseract::Dawg.
Definition at line 116 of file trie.h.
      
  | 
  protected | 
      
  | 
  protected | 
      
  | 
  protected | 
      
  | 
  static | 
      
  | 
  static | 
      
  | 
  static | 
      
  | 
  static | 
      
  | 
  static | 
      
  | 
  static | 
      
  | 
  protected | 
      
  | 
  protected | 
      
  | 
  protected | 
      
  | 
  protected | 
      
  | 
  protected |