tesseract  5.0.0-alpha-619-ge9db
tesseract::Dawg Class Referenceabstract

#include <dawg.h>

Inheritance diagram for tesseract::Dawg:
tesseract::SquishedDawg tesseract::Trie

Public Member Functions

DawgType type () const
 
const STRINGlang () const
 
PermuterType permuter () const
 
virtual ~Dawg ()
 
bool word_in_dawg (const WERD_CHOICE &word) const
 Returns true if the given word is in the Dawg. More...
 
bool prefix_in_dawg (const WERD_CHOICE &prefix, bool requires_complete) const
 
int check_for_words (const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
 
void iterate_words (const UNICHARSET &unicharset, std::function< void(const WERD_CHOICE *)> cb) const
 
void iterate_words (const UNICHARSET &unicharset, std::function< void(const char *)> cb) const
 
virtual EDGE_REF edge_char_of (NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
 Returns the edge that corresponds to the letter out of this node. More...
 
virtual void unichar_ids_of (NODE_REF node, NodeChildVector *vec, bool word_end) const =0
 
virtual NODE_REF next_node (EDGE_REF edge_ref) const =0
 
virtual bool end_of_word (EDGE_REF edge_ref) const =0
 
virtual UNICHAR_ID edge_letter (EDGE_REF edge_ref) const =0
 Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. More...
 
virtual void print_node (NODE_REF node, int max_num_edges) const =0
 
virtual void unichar_id_to_patterns (UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
 
virtual EDGE_REF pattern_loop_edge (EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
 

Static Public Attributes

static const int16_t kDawgMagicNumber = 42
 Magic number to determine endianness when reading the Dawg from file. More...
 
static const UNICHAR_ID kPatternUnicharID = 0
 

Protected Member Functions

 Dawg (DawgType type, const STRING &lang, PermuterType perm, int debug_level)
 
NODE_REF next_node_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the next node visited by following this edge. More...
 
bool marker_flag_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the marker flag of this edge. More...
 
int direction_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the direction flag of this edge. More...
 
bool end_of_word_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns true if this edge marks the end of a word. More...
 
UNICHAR_ID unichar_id_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns UNICHAR_ID recorded in this edge. More...
 
void set_next_node_in_edge_rec (EDGE_RECORD *edge_rec, EDGE_REF value)
 Sets the next node link for this edge in the Dawg. More...
 
void set_marker_flag_in_edge_rec (EDGE_RECORD *edge_rec)
 Sets this edge record to be the last one in a sequence of edges. More...
 
int given_greater_than_edge_rec (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
 
bool edge_rec_match (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
 
void init (int unicharset_size)
 
bool match_words (WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const
 
void iterate_words_rec (const WERD_CHOICE &word_so_far, NODE_REF to_explore, std::function< void(const WERD_CHOICE *)> cb) const
 

Protected Attributes

STRING lang_
 
DawgType type_
 
PermuterType perm_
 Permuter code that should be used if the word is found in this Dawg. More...
 
uint64_t next_node_mask_ = 0
 
uint64_t flags_mask_ = 0
 
uint64_t letter_mask_ = 0
 
int unicharset_size_
 
int flag_start_bit_ = 0
 
int next_node_start_bit_ = 0
 
int debug_level_
 

Detailed Description

Abstract class (an interface) that declares methods needed by the various tesseract classes to operate on SquishedDawg and Trie objects.

This class initializes all the edge masks (since their usage by SquishedDawg and Trie is identical) and implements simple accessors for each of the fields encoded in an EDGE_RECORD. This class also implements word_in_dawg() and check_for_words() (since they use only the public methods of SquishedDawg and Trie classes that are inherited from the Dawg base class).

Definition at line 113 of file dawg.h.

Constructor & Destructor Documentation

◆ ~Dawg()

tesseract::Dawg::~Dawg ( )
virtualdefault

◆ Dawg()

tesseract::Dawg::Dawg ( DawgType  type,
const STRING lang,
PermuterType  perm,
int  debug_level 
)
inlineprotected

Definition at line 199 of file dawg.h.

199  :
200  Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
201  : lang_(lang),
202  type_(type),
203  perm_(perm),
204  unicharset_size_(0),

Member Function Documentation

◆ check_for_words()

int tesseract::Dawg::check_for_words ( const char *  filename,
const UNICHARSET unicharset,
bool  enable_wildcard 
) const

Checks the Dawg for the words that are listed in the requested file. Returns the number of words in the given file missing from the Dawg.

Definition at line 82 of file dawg.cpp.

83  {
84  chomp_string(string); // remove newline
85  WERD_CHOICE word(string, unicharset);
86  if (word.length() > 0 &&
87  !word.contains_unichar_id(INVALID_UNICHAR_ID)) {
88  if (!match_words(&word, 0, 0,
89  enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
90  tprintf("Missing word: %s\n", string);
91  ++misses;
92  }
93  } else {
94  tprintf("Failed to create a valid word from %s\n", string);
95  }
96  }
97  fclose (word_file);
98  // Make sure the user sees this with fprintf instead of tprintf.
99  if (debug_level_) tprintf("Number of lost words=%d\n", misses);
100  return misses;
101 }
102 
103 void Dawg::iterate_words(const UNICHARSET& unicharset,
104  std::function<void(const WERD_CHOICE*)> cb) const {
105  WERD_CHOICE word(&unicharset);
106  iterate_words_rec(word, 0, cb);
107 }
108 
109 static void CallWithUTF8(std::function<void(const char*)> cb,
110  const WERD_CHOICE* wc) {
111  STRING s;
112  wc->string_and_lengths(&s, nullptr);
113  cb(s.c_str());
114 }
115 
116 void Dawg::iterate_words(const UNICHARSET& unicharset,

◆ direction_from_edge_rec()

int tesseract::Dawg::direction_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns the direction flag of this edge.

Definition at line 215 of file dawg.h.

216  {
217  return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ?

◆ edge_char_of()

virtual EDGE_REF tesseract::Dawg::edge_char_of ( NODE_REF  node,
UNICHAR_ID  unichar_id,
bool  word_end 
) const
pure virtual

Returns the edge that corresponds to the letter out of this node.

Implemented in tesseract::Trie, and tesseract::SquishedDawg.

◆ edge_letter()

virtual UNICHAR_ID tesseract::Dawg::edge_letter ( EDGE_REF  edge_ref) const
pure virtual

Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

◆ edge_rec_match()

bool tesseract::Dawg::edge_rec_match ( NODE_REF  next_node,
bool  word_end,
UNICHAR_ID  unichar_id,
NODE_REF  other_next_node,
bool  other_word_end,
UNICHAR_ID  other_unichar_id 
) const
inlineprotected

Returns true if all the values are equal (any value matches next_node if next_node == NO_EDGE, any value matches word_end if word_end is false).

Definition at line 266 of file dawg.h.

272  {
273  return ((unichar_id == other_unichar_id) &&
274  (next_node == NO_EDGE || next_node == other_next_node) &&
275  (!word_end || (word_end == other_word_end)));

◆ end_of_word()

virtual bool tesseract::Dawg::end_of_word ( EDGE_REF  edge_ref) const
pure virtual

Returns true if the edge indicated by the given EDGE_REF marks the end of a word.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

◆ end_of_word_from_edge_rec()

bool tesseract::Dawg::end_of_word_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns true if this edge marks the end of a word.

Definition at line 220 of file dawg.h.

221  {
222  return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;

◆ given_greater_than_edge_rec()

int tesseract::Dawg::given_greater_than_edge_rec ( NODE_REF  next_node,
bool  word_end,
UNICHAR_ID  unichar_id,
const EDGE_RECORD edge_rec 
) const
inlineprotected

Sequentially compares the given values of unichar ID, next node and word end marker with the values in the given EDGE_RECORD. Returns: 1 if at any step the given input value exceeds that of edge_rec (and all the values already checked are the same) 0 if edge_rec_match() returns true -1 otherwise

Definition at line 245 of file dawg.h.

249  {
250  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
251  NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
252  bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
253  if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
254  curr_word_end, curr_unichar_id)) return 0;
255  if (unichar_id > curr_unichar_id) return 1;
256  if (unichar_id == curr_unichar_id) {
257  if (next_node > curr_next_node) return 1;
258  if (next_node == curr_next_node) {
259  if (word_end > curr_word_end) return 1;
260  }
261  }
262  return -1;

◆ init()

void tesseract::Dawg::init ( int  unicharset_size)
protected

Sets unicharset_size_. Initializes the values of various masks from unicharset_size_.

Definition at line 190 of file dawg.cpp.

192  { delete[] edges_; }
193 
195  UNICHAR_ID unichar_id,
196  bool word_end) const {
197  EDGE_REF edge = node;
198  if (node == 0) { // binary search
199  EDGE_REF start = 0;
200  EDGE_REF end = num_forward_edges_in_node0 - 1;

◆ iterate_words() [1/2]

void tesseract::Dawg::iterate_words ( const UNICHARSET unicharset,
std::function< void(const char *)>  cb 
) const

Definition at line 131 of file dawg.cpp.

133  {
134  cb(&next_word);
135  }
136  NODE_REF next = next_node(children[i].edge_ref);
137  if (next != 0) {
138  iterate_words_rec(next_word, next, cb);

◆ iterate_words() [2/2]

void tesseract::Dawg::iterate_words ( const UNICHARSET unicharset,
std::function< void(const WERD_CHOICE *)>  cb 
) const

Definition at line 118 of file dawg.cpp.

◆ iterate_words_rec()

void tesseract::Dawg::iterate_words_rec ( const WERD_CHOICE word_so_far,
NODE_REF  to_explore,
std::function< void(const WERD_CHOICE *)>  cb 
) const
protected

Definition at line 140 of file dawg.cpp.

144  {
145  EDGE_REF edge;
146  int32_t word_end;
147 
148  if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) {
149  bool any_matched = false;
150  NodeChildVector vec;
151  this->unichar_ids_of(node, &vec, false);
152  for (int i = 0; i < vec.size(); ++i) {
153  word->set_unichar_id(vec[i].unichar_id, index);
154  if (match_words(word, index, node, wildcard))
155  any_matched = true;
156  }

◆ lang()

const STRING& tesseract::Dawg::lang ( ) const
inline

Definition at line 123 of file dawg.h.

123 { return type_; }

◆ marker_flag_from_edge_rec()

bool tesseract::Dawg::marker_flag_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns the marker flag of this edge.

Definition at line 211 of file dawg.h.

212  {
213  return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0;

◆ match_words()

bool tesseract::Dawg::match_words ( WERD_CHOICE word,
int32_t  index,
NODE_REF  node,
UNICHAR_ID  wildcard 
) const
protected

Matches all of the words that are represented by this string. If wildcard is set to something other than INVALID_UNICHAR_ID, the *'s in this string are interpreted as wildcards. WERD_CHOICE param is not passed by const so that wildcard searches can modify it and work without having to copy WERD_CHOICEs.

Definition at line 158 of file dawg.cpp.

159  {
160  word_end = index == word->length() - 1;
161  edge = edge_char_of(node, word->unichar_id(index), word_end);
162  if (edge != NO_EDGE) { // normal edge in DAWG
163  node = next_node(edge);
164  if (word_end) {
165  if (debug_level_ > 1) word->print("match_words() found: ");
166  return true;
167  } else if (node != 0) {
168  return match_words(word, index+1, node, wildcard);
169  }
170  }
171  }
172  return false;
173 }
174 
175 void Dawg::init(int unicharset_size) {
176  ASSERT_HOST(unicharset_size > 0);
177  unicharset_size_ = unicharset_size;
178  // Set bit masks. We will use the value unicharset_size_ as a null char, so
179  // the actual number of unichars is unicharset_size_ + 1.
180  flag_start_bit_ = ceil(log(unicharset_size_ + 1.0) / log(2.0));
182  letter_mask_ = ~(~0ull << flag_start_bit_);
185 }
186 
187 
188 /*----------------------------------------------------------------------

◆ next_node()

virtual NODE_REF tesseract::Dawg::next_node ( EDGE_REF  edge_ref) const
pure virtual

Returns the next node visited by following the edge indicated by the given EDGE_REF.

Implemented in tesseract::Trie, and tesseract::SquishedDawg.

◆ next_node_from_edge_rec()

NODE_REF tesseract::Dawg::next_node_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns the next node visited by following this edge.

Definition at line 207 of file dawg.h.

208  {
209  return ((edge_rec & next_node_mask_) >> next_node_start_bit_);

◆ pattern_loop_edge()

virtual EDGE_REF tesseract::Dawg::pattern_loop_edge ( EDGE_REF  edge_ref,
UNICHAR_ID  unichar_id,
bool  word_end 
) const
inlinevirtual

Returns the given EDGE_REF if the EDGE_RECORD that it points to has a self loop and the given unichar_id matches the unichar_id stored in the EDGE_RECORD, returns NO_EDGE otherwise.

Reimplemented in tesseract::Trie.

Definition at line 190 of file dawg.h.

192  {
193  (void)edge_ref;
194  (void)unichar_id;
195  (void)word_end;
196  return false;

◆ permuter()

PermuterType tesseract::Dawg::permuter ( ) const
inline

Definition at line 124 of file dawg.h.

124 { return lang_; }

◆ prefix_in_dawg()

bool tesseract::Dawg::prefix_in_dawg ( const WERD_CHOICE prefix,
bool  requires_complete 
) const

Definition at line 57 of file dawg.cpp.

63  {
64  return prefix_in_dawg(word, true);
65 }
66 
67 int Dawg::check_for_words(const char *filename,
68  const UNICHARSET &unicharset,
69  bool enable_wildcard) const {
70  if (filename == nullptr) return 0;
71 
72  FILE *word_file;
73  char string [CHARS_PER_LINE];
74  int misses = 0;
75  UNICHAR_ID wildcard = unicharset.unichar_to_id(kWildcard);
76 

◆ print_node()

virtual void tesseract::Dawg::print_node ( NODE_REF  node,
int  max_num_edges 
) const
pure virtual

Prints the contents of the node indicated by the given NODE_REF. At most max_num_edges will be printed.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

◆ set_marker_flag_in_edge_rec()

void tesseract::Dawg::set_marker_flag_in_edge_rec ( EDGE_RECORD edge_rec)
inlineprotected

Sets this edge record to be the last one in a sequence of edges.

Definition at line 235 of file dawg.h.

236  {
237  *edge_rec |= (MARKER_FLAG << flag_start_bit_);

◆ set_next_node_in_edge_rec()

void tesseract::Dawg::set_next_node_in_edge_rec ( EDGE_RECORD edge_rec,
EDGE_REF  value 
)
inlineprotected

Sets the next node link for this edge in the Dawg.

Definition at line 229 of file dawg.h.

231  {
232  *edge_rec &= (~next_node_mask_);
233  *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_);

◆ type()

DawgType tesseract::Dawg::type ( ) const
inline

Definition at line 122 of file dawg.h.

123 { return type_; }

◆ unichar_id_from_edge_rec()

UNICHAR_ID tesseract::Dawg::unichar_id_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns UNICHAR_ID recorded in this edge.

Definition at line 224 of file dawg.h.

226  {
227  return ((edge_rec & letter_mask_) >> LETTER_START_BIT);

◆ unichar_id_to_patterns()

virtual void tesseract::Dawg::unichar_id_to_patterns ( UNICHAR_ID  unichar_id,
const UNICHARSET unicharset,
GenericVector< UNICHAR_ID > *  vec 
) const
inlinevirtual

Fills vec with unichar ids that represent the character classes of the given unichar_id.

Reimplemented in tesseract::Trie.

Definition at line 179 of file dawg.h.

182  {
183  (void)unichar_id;
184  (void)unicharset;
185  (void)vec;

◆ unichar_ids_of()

virtual void tesseract::Dawg::unichar_ids_of ( NODE_REF  node,
NodeChildVector vec,
bool  word_end 
) const
pure virtual

Fills the given NodeChildVector with all the unichar ids (and the corresponding EDGE_REFs) for which there is an edge out of this node.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

◆ word_in_dawg()

bool tesseract::Dawg::word_in_dawg ( const WERD_CHOICE word) const

Returns true if the given word is in the Dawg.

Definition at line 78 of file dawg.cpp.

78  {
79  tprintf("Error: Could not open file %s\n", filename);
80  ASSERT_HOST(word_file);

Member Data Documentation

◆ debug_level_

int tesseract::Dawg::debug_level_
protected

Definition at line 310 of file dawg.h.

◆ flag_start_bit_

int tesseract::Dawg::flag_start_bit_ = 0
protected

Definition at line 307 of file dawg.h.

◆ flags_mask_

uint64_t tesseract::Dawg::flags_mask_ = 0
protected

Definition at line 304 of file dawg.h.

◆ kDawgMagicNumber

const int16_t tesseract::Dawg::kDawgMagicNumber = 42
static

Magic number to determine endianness when reading the Dawg from file.

Definition at line 116 of file dawg.h.

◆ kPatternUnicharID

const UNICHAR_ID tesseract::Dawg::kPatternUnicharID = 0
static

A special unichar id that indicates that any appropriate pattern (e.g.dicitonary word, 0-9 digit, etc) can be inserted instead Used for expressing patterns in punctuation and number Dawgs.

Definition at line 120 of file dawg.h.

◆ lang_

STRING tesseract::Dawg::lang_
protected

Definition at line 295 of file dawg.h.

◆ letter_mask_

uint64_t tesseract::Dawg::letter_mask_ = 0
protected

Definition at line 305 of file dawg.h.

◆ next_node_mask_

uint64_t tesseract::Dawg::next_node_mask_ = 0
protected

Definition at line 303 of file dawg.h.

◆ next_node_start_bit_

int tesseract::Dawg::next_node_start_bit_ = 0
protected

Definition at line 308 of file dawg.h.

◆ perm_

PermuterType tesseract::Dawg::perm_
protected

Permuter code that should be used if the word is found in this Dawg.

Definition at line 298 of file dawg.h.

◆ type_

DawgType tesseract::Dawg::type_
protected

Definition at line 296 of file dawg.h.

◆ unicharset_size_

int tesseract::Dawg::unicharset_size_
protected

Definition at line 306 of file dawg.h.


The documentation for this class was generated from the following files:
tesseract::Dawg::prefix_in_dawg
bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const
Definition: dawg.cpp:57
tesseract::Dawg::match_words
bool match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const
Definition: dawg.cpp:158
tesseract::SquishedDawg::edge_char_of
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const override
Returns the edge that corresponds to the letter out of this node.
Definition: dawg.cpp:209
tesseract::Dawg::type
DawgType type() const
Definition: dawg.h:122
WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:303
tesseract::Dawg::flag_start_bit_
int flag_start_bit_
Definition: dawg.h:307
WERD_CHOICE
Definition: ratngs.h:261
tesseract::Dawg::flags_mask_
uint64_t flags_mask_
Definition: dawg.h:304
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::Dawg::iterate_words_rec
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, std::function< void(const WERD_CHOICE *)> cb) const
Definition: dawg.cpp:140
language_specific.log
log
Definition: language_specific.py:25
tesseract::Dawg::next_node_from_edge_rec
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:207
tesseract::Dawg::lang
const STRING & lang() const
Definition: dawg.h:123
PermuterType
PermuterType
Definition: ratngs.h:230
tesseract::Dawg::end_of_word_from_edge_rec
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:220
tesseract::NodeChildVector
GenericVector< NodeChild > NodeChildVector
Definition: dawg.h:62
STRING
Definition: strngs.h:45
tesseract::Dawg::unichar_ids_of
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
FORWARD_EDGE
#define FORWARD_EDGE
Definition: dawg.h:79
tesseract::Dawg::unichar_id_from_edge_rec
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:224
tesseract::Dawg::perm_
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:298
WERD_CHOICE::string_and_lengths
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:451
tesseract::Dawg::edge_rec_match
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
Definition: dawg.h:266
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
chomp_string
void chomp_string(char *str)
Definition: helpers.h:75
tesseract::Dawg::init
void init(int unicharset_size)
Definition: dawg.cpp:190
tesseract::Dawg::next_node_start_bit_
int next_node_start_bit_
Definition: dawg.h:308
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
tesseract::Dawg::debug_level_
int debug_level_
Definition: dawg.h:310
DIRECTION_FLAG
#define DIRECTION_FLAG
Definition: dawg.h:83
tesseract::Dawg::Dawg
Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:199
UNICHARSET
Definition: unicharset.h:145
NUM_FLAG_BITS
#define NUM_FLAG_BITS
Definition: dawg.h:86
tesseract::Dawg::edge_char_of
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
tesseract::Dawg::iterate_words
void iterate_words(const UNICHARSET &unicharset, std::function< void(const WERD_CHOICE *)> cb) const
Definition: dawg.cpp:118
tesseract::Dawg::next_node
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
tesseract::DawgType
DawgType
Definition: dawg.h:66
LETTER_START_BIT
#define LETTER_START_BIT
Definition: dawg.h:85
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
tesseract::Dawg::lang_
STRING lang_
Definition: dawg.h:295
WERD_CHOICE::print
void print() const
Definition: ratngs.h:568
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
MARKER_FLAG
#define MARKER_FLAG
Definition: dawg.h:82
tesseract::Dawg::check_for_words
int check_for_words(const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
Definition: dawg.cpp:82
EDGE_REF
int64_t EDGE_REF
Definition: dawg.h:49
tesseract::Dawg::letter_mask_
uint64_t letter_mask_
Definition: dawg.h:305
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
WERD_END_FLAG
#define WERD_END_FLAG
Definition: dawg.h:84
tesseract::Dawg::unicharset_size_
int unicharset_size_
Definition: dawg.h:306
tesseract::Dawg::next_node_mask_
uint64_t next_node_mask_
Definition: dawg.h:303
CHARS_PER_LINE
#define CHARS_PER_LINE
Definition: dict.h:38
BACKWARD_EDGE
#define BACKWARD_EDGE
Definition: dawg.h:80
tesseract::Dawg::type_
DawgType type_
Definition: dawg.h:296
NODE_REF
int64_t NODE_REF
Definition: dawg.h:50