tesseract  4.0.0-1-g2a2b
dawg.h
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: dawg.h
5  * Description: Definition of a class that represents Directed Acyclic Word
6  * Graph (DAWG), functions to build and manipulate the DAWG.
7  * Author: Mark Seaman, SW Productivity
8  * Created: Fri Oct 16 14:37:00 1987
9  * Modified: Wed Jun 19 16:50:24 1991 (Mark Seaman) marks@hpgrlt
10  * Language: C
11  * Package: N/A
12  * Status: Reusable Software Component
13  *
14  * (c) Copyright 1987, Hewlett-Packard Company.
15  ** Licensed under the Apache License, Version 2.0 (the "License");
16  ** you may not use this file except in compliance with the License.
17  ** You may obtain a copy of the License at
18  ** http://www.apache.org/licenses/LICENSE-2.0
19  ** Unless required by applicable law or agreed to in writing, software
20  ** distributed under the License is distributed on an "AS IS" BASIS,
21  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22  ** See the License for the specific language governing permissions and
23  ** limitations under the License.
24  *
25  *********************************************************************************/
26 
27 #ifndef DICT_DAWG_H_
28 #define DICT_DAWG_H_
29 
30 /*----------------------------------------------------------------------
31  I n c l u d e s
32 ----------------------------------------------------------------------*/
33 
34 #include <memory>
35 #include "elst.h"
36 #include "params.h"
37 #include "ratngs.h"
38 #include "tesscallback.h"
39 
40 #ifndef __GNUC__
41 #ifdef _WIN32
42 #define NO_EDGE (int64_t) 0xffffffffffffffffi64
43 #endif /*_WIN32*/
44 #else
45 #define NO_EDGE (int64_t) 0xffffffffffffffffll
46 #endif /*__GNUC__*/
47 
48 /*----------------------------------------------------------------------
49  T y p e s
50 ----------------------------------------------------------------------*/
51 class UNICHARSET;
52 
53 using EDGE_RECORD = uint64_t;
55 using EDGE_REF = int64_t;
56 using NODE_REF = int64_t;
57 using NODE_MAP = EDGE_REF *;
58 
59 namespace tesseract {
60 
61 struct NodeChild {
65  NodeChild(): unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {}
66 };
67 
71 
72 enum DawgType {
77 
78  DAWG_TYPE_COUNT // number of enum entries
79 };
80 
81 /*----------------------------------------------------------------------
82  C o n s t a n t s
83 ----------------------------------------------------------------------*/
84 
85 #define FORWARD_EDGE (int32_t) 0
86 #define BACKWARD_EDGE (int32_t) 1
87 #define MAX_NODE_EDGES_DISPLAY (int64_t) 100
88 #define MARKER_FLAG (int64_t) 1
89 #define DIRECTION_FLAG (int64_t) 2
90 #define WERD_END_FLAG (int64_t) 4
91 #define LETTER_START_BIT 0
92 #define NUM_FLAG_BITS 3
93 #define REFFORMAT "%" PRId64
94 
95 static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] = {
96  { 0, 1, 1, 0 }, // for DAWG_TYPE_PUNCTUATION
97  { 1, 0, 0, 0 }, // for DAWG_TYPE_WORD
98  { 1, 0, 0, 0 }, // for DAWG_TYPE_NUMBER
99  { 0, 0, 0, 0 }, // for DAWG_TYPE_PATTERN
100 };
101 
102 static const char kWildcard[] = "*";
103 
104 
105 /*----------------------------------------------------------------------
106  C l a s s e s a n d S t r u c t s
107 ----------------------------------------------------------------------*/
108 //
118 //
119 class Dawg {
120  public:
122  static const int16_t kDawgMagicNumber = 42;
126  static const UNICHAR_ID kPatternUnicharID = 0;
127 
128  inline DawgType type() const { return type_; }
129  inline const STRING &lang() const { return lang_; }
130  inline PermuterType permuter() const { return perm_; }
131 
132  virtual ~Dawg();
133 
135  bool word_in_dawg(const WERD_CHOICE &word) const;
136 
137  // Returns true if the given word prefix is not contraindicated by the dawg.
138  // If requires_complete is true, then the exact complete word must be present.
139  bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const;
140 
143  int check_for_words(const char *filename,
144  const UNICHARSET &unicharset,
145  bool enable_wildcard) const;
146 
147  // For each word in the Dawg, call the given (permanent) callback with the
148  // text (UTF-8) version of the word.
149  void iterate_words(const UNICHARSET &unicharset,
151 
152  // For each word in the Dawg, call the given (permanent) callback with the
153  // text (UTF-8) version of the word.
154  void iterate_words(const UNICHARSET &unicharset,
155  TessCallback1<const char *> *cb) const;
156 
157  // Pure virtual function that should be implemented by the derived classes.
158 
160  virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
161  bool word_end) const = 0;
162 
165  virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
166  bool word_end) const = 0;
167 
170  virtual NODE_REF next_node(EDGE_REF edge_ref) const = 0;
171 
174  virtual bool end_of_word(EDGE_REF edge_ref) const = 0;
175 
177  virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const = 0;
178 
181  virtual void print_node(NODE_REF node, int max_num_edges) const = 0;
182 
185  virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
186  const UNICHARSET &unicharset,
187  GenericVector<UNICHAR_ID> *vec) const {
188  (void)unichar_id;
189  (void)unicharset;
190  (void)vec;
191  }
192 
197  EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const {
198  (void)edge_ref;
199  (void)unichar_id;
200  (void)word_end;
201  return false;
202  }
203 
204  protected:
205  Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
206  : type_(type),
207  lang_(lang),
208  perm_(perm),
209  unicharset_size_(0),
210  debug_level_(debug_level) {}
211 
213  inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
214  return ((edge_rec & next_node_mask_) >> next_node_start_bit_);
215  }
217  inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const {
218  return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0;
219  }
221  inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const {
222  return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ?
224  }
226  inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const {
227  return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
228  }
231  const EDGE_RECORD &edge_rec) const {
232  return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
233  }
236  EDGE_RECORD *edge_rec, EDGE_REF value) {
237  *edge_rec &= (~next_node_mask_);
238  *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_);
239  }
241  inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) {
242  *edge_rec |= (MARKER_FLAG << flag_start_bit_);
243  }
252  bool word_end,
253  UNICHAR_ID unichar_id,
254  const EDGE_RECORD &edge_rec) const {
255  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
256  NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
257  bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
258  if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
259  curr_word_end, curr_unichar_id)) return 0;
260  if (unichar_id > curr_unichar_id) return 1;
261  if (unichar_id == curr_unichar_id) {
262  if (next_node > curr_next_node) return 1;
263  if (next_node == curr_next_node) {
264  if (word_end > curr_word_end) return 1;
265  }
266  }
267  return -1;
268  }
273  bool word_end,
274  UNICHAR_ID unichar_id,
275  NODE_REF other_next_node,
276  bool other_word_end,
277  UNICHAR_ID other_unichar_id) const {
278  return ((unichar_id == other_unichar_id) &&
279  (next_node == NO_EDGE || next_node == other_next_node) &&
280  (!word_end || (word_end == other_word_end)));
281  }
282 
285  void init(int unicharset_size);
286 
292  bool match_words(WERD_CHOICE *word, int32_t index,
293  NODE_REF node, UNICHAR_ID wildcard) const;
294 
295  // Recursively iterate over all words in a dawg (see public iterate_words).
296  void iterate_words_rec(const WERD_CHOICE &word_so_far,
297  NODE_REF to_explore,
299 
300  // Member Variables.
305  // Variables to construct various edge masks. Formerly:
306  // #define NEXT_EDGE_MASK (int64_t) 0xfffffff800000000i64
307  // #define FLAGS_MASK (int64_t) 0x0000000700000000i64
308  // #define LETTER_MASK (int64_t) 0x00000000ffffffffi64
312  uint64_t next_node_mask_;
313  uint64_t flags_mask_;
314  uint64_t letter_mask_;
315  // Level of debug statements to print to stdout.
317 };
318 
319 //
320 // DawgPosition keeps track of where we are in the primary dawg we're searching
321 // as well as where we may be in the "punctuation dawg" which may provide
322 // surrounding context.
323 //
324 // Example:
325 // punctuation dawg -- space is the "pattern character"
326 // " " // no punctuation
327 // "' '" // leading and trailing apostrophes
328 // " '" // trailing apostrophe
329 // word dawg:
330 // "cat"
331 // "cab"
332 // "cat's"
333 //
334 // DawgPosition(dawg_index, dawg_ref, punc_index, punc_ref, rtp)
335 //
336 // DawgPosition(-1, NO_EDGE, p, pe, false)
337 // We're in the punctuation dawg, no other dawg has been started.
338 // (1) If there's a pattern edge as a punc dawg child of us,
339 // for each punc-following dawg starting with ch, produce:
340 // Result: DawgPosition(k, w, p', false)
341 // (2) If there's a valid continuation in the punc dawg, produce:
342 // Result: DawgPosition(-k, NO_EDGE, p', false)
343 //
344 // DawgPosition(k, w, -1, NO_EDGE, false)
345 // We're in dawg k. Going back to punctuation dawg is not an option.
346 // Follow ch in dawg k.
347 //
348 // DawgPosition(k, w, p, pe, false)
349 // We're in dawg k. Continue in dawg k and/or go back to the punc dawg.
350 // If ending, check that the punctuation dawg is also ok to end here.
351 //
352 // DawgPosition(k, w, p, pe true)
353 // We're back in the punctuation dawg. Continuing there is the only option.
354 struct DawgPosition {
356  : dawg_index(-1), dawg_ref(NO_EDGE), punc_ref(NO_EDGE),
357  back_to_punc(false) {}
358  DawgPosition(int dawg_idx, EDGE_REF dawgref,
359  int punc_idx, EDGE_REF puncref,
360  bool backtopunc)
361  : dawg_index(dawg_idx), dawg_ref(dawgref),
362  punc_index(punc_idx), punc_ref(puncref),
363  back_to_punc(backtopunc) {
364  }
365  bool operator==(const DawgPosition &other) {
366  return dawg_index == other.dawg_index &&
367  dawg_ref == other.dawg_ref &&
368  punc_index == other.punc_index &&
369  punc_ref == other.punc_ref &&
370  back_to_punc == other.back_to_punc;
371  }
372 
373  int8_t dawg_index;
375  int8_t punc_index;
377  // Have we returned to the punc dawg at the end of the word?
379 };
380 
381 class DawgPositionVector : public GenericVector<DawgPosition> {
382  public:
385  void clear() { size_used_ = 0; }
389  inline bool add_unique(const DawgPosition &new_pos,
390  bool debug,
391  const char *debug_msg) {
392  for (int i = 0; i < size_used_; ++i) {
393  if (data_[i] == new_pos) return false;
394  }
395  push_back(new_pos);
396  if (debug) {
397  tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n",
398  debug_msg, new_pos.dawg_index, new_pos.dawg_ref,
399  new_pos.punc_ref, new_pos.back_to_punc ? " returned" : "");
400  }
401  return true;
402  }
403 };
404 
405 //
412 //
413 class SquishedDawg : public Dawg {
414  public:
416  int debug_level)
417  : Dawg(type, lang, perm, debug_level) {}
418  SquishedDawg(const char *filename, DawgType type, const STRING &lang,
419  PermuterType perm, int debug_level)
420  : Dawg(type, lang, perm, debug_level) {
421  TFile file;
422  ASSERT_HOST(file.Open(filename, nullptr));
423  ASSERT_HOST(read_squished_dawg(&file));
424  num_forward_edges_in_node0 = num_forward_edges(0);
425  }
426  SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
427  const STRING &lang, PermuterType perm, int unicharset_size,
428  int debug_level)
429  : Dawg(type, lang, perm, debug_level),
430  edges_(edges),
431  num_edges_(num_edges) {
432  init(unicharset_size);
433  num_forward_edges_in_node0 = num_forward_edges(0);
434  if (debug_level > 3) print_all("SquishedDawg:");
435  }
436  virtual ~SquishedDawg();
437 
438  // Loads using the given TFile. Returns false on failure.
439  bool Load(TFile *fp) {
440  if (!read_squished_dawg(fp)) return false;
441  num_forward_edges_in_node0 = num_forward_edges(0);
442  return true;
443  }
444 
445  int NumEdges() { return num_edges_; }
446 
448  EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
449  bool word_end) const;
450 
454  bool word_end) const {
455  EDGE_REF edge = node;
456  if (!edge_occupied(edge) || edge == NO_EDGE) return;
457  assert(forward_edge(edge)); // we don't expect any backward edges to
458  do { // be present when this function is called
459  if (!word_end || end_of_word_from_edge_rec(edges_[edge])) {
460  vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge));
461  }
462  } while (!last_edge(edge++));
463  }
464 
468  return next_node_from_edge_rec((edges_[edge]));
469  }
470 
473  bool end_of_word(EDGE_REF edge_ref) const {
474  return end_of_word_from_edge_rec((edges_[edge_ref]));
475  }
476 
478  UNICHAR_ID edge_letter(EDGE_REF edge_ref) const {
479  return unichar_id_from_edge_rec((edges_[edge_ref]));
480  }
481 
484  void print_node(NODE_REF node, int max_num_edges) const;
485 
487  bool write_squished_dawg(TFile *file);
488 
491  bool write_squished_dawg(const char *filename) {
492  TFile file;
493  file.OpenWrite(nullptr);
494  if (!this->write_squished_dawg(&file)) {
495  tprintf("Error serializing %s\n", filename);
496  return false;
497  }
498  if (!file.CloseWrite(filename, nullptr)) {
499  tprintf("Error writing file %s\n", filename);
500  return false;
501  }
502  return true;
503  }
504 
505  private:
507  inline void set_next_node(EDGE_REF edge_ref, EDGE_REF value) {
508  set_next_node_in_edge_rec(&(edges_[edge_ref]), value);
509  }
511  inline void set_empty_edge(EDGE_REF edge_ref) {
512  (edges_[edge_ref] = next_node_mask_);
513  }
515  inline void clear_all_edges() {
516  for (int edge = 0; edge < num_edges_; edge++) set_empty_edge(edge);
517  }
519  inline void clear_marker_flag(EDGE_REF edge_ref) {
520  (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_));
521  }
523  inline bool forward_edge(EDGE_REF edge_ref) const {
524  return (edge_occupied(edge_ref) &&
525  (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
526  }
528  inline bool backward_edge(EDGE_REF edge_ref) const {
529  return (edge_occupied(edge_ref) &&
530  (BACKWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
531  }
533  inline bool edge_occupied(EDGE_REF edge_ref) const {
534  return (edges_[edge_ref] != next_node_mask_);
535  }
537  inline bool last_edge(EDGE_REF edge_ref) const {
538  return (edges_[edge_ref] & (MARKER_FLAG << flag_start_bit_)) != 0;
539  }
540 
542  int32_t num_forward_edges(NODE_REF node) const;
543 
545  bool read_squished_dawg(TFile *file);
546 
548  void print_edge(EDGE_REF edge) const;
549 
551  void print_all(const char* msg) {
552  tprintf("\n__________________________\n%s\n", msg);
553  for (int i = 0; i < num_edges_; ++i) print_edge(i);
554  tprintf("__________________________\n");
555  }
557  std::unique_ptr<EDGE_REF[]> build_node_map(int32_t *num_nodes) const;
558 
559  // Member variables.
560  EDGE_ARRAY edges_;
561  int32_t num_edges_;
562  int num_forward_edges_in_node0;
563 };
564 
565 } // namespace tesseract
566 
567 #endif // DICT_DAWG_H_
bool Load(TFile *fp)
Definition: dawg.h:439
int next_node_start_bit_
Definition: dawg.h:311
int UNICHAR_ID
Definition: unichar.h:35
#define FORWARD_EDGE
Definition: dawg.h:85
NodeChild(UNICHAR_ID id, EDGE_REF ref)
Definition: dawg.h:64
void OpenWrite(GenericVector< char > *data)
Definition: serialis.cpp:295
bool write_squished_dawg(const char *filename)
Definition: dawg.h:491
void print_node(NODE_REF node, int max_num_edges) const
Definition: dawg.cpp:246
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
Definition: dawg.h:185
uint64_t letter_mask_
Definition: dawg.h:314
virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const =0
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
Definition: dawg.h:221
STRING lang_
Definition: dawg.h:302
int unicharset_size_
Definition: dawg.h:309
#define BACKWARD_EDGE
Definition: dawg.h:86
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:230
bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const
Definition: dawg.cpp:50
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE *> *cb) const
Definition: dawg.cpp:132
virtual ~Dawg()
bool word_in_dawg(const WERD_CHOICE &word) const
Returns true if the given word is in the Dawg.
Definition: dawg.cpp:71
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Definition: dawg.h:478
void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec)
Sets this edge record to be the last one in a sequence of edges.
Definition: dawg.h:241
bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg)
Definition: dawg.h:389
void set_next_node_in_edge_rec(EDGE_RECORD *edge_rec, EDGE_REF value)
Sets the next node link for this edge in the Dawg.
Definition: dawg.h:235
#define REFFORMAT
Definition: dawg.h:93
EDGE_REF dawg_ref
Definition: dawg.h:374
#define LETTER_START_BIT
Definition: dawg.h:91
#define WERD_END_FLAG
Definition: dawg.h:90
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:304
int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
Definition: dawg.h:251
static const int16_t kDawgMagicNumber
Magic number to determine endianness when reading the Dawg from file.
Definition: dawg.h:122
int64_t EDGE_REF
Definition: dawg.h:55
SquishedDawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:415
int64_t NODE_REF
Definition: dawg.h:56
bool operator==(const DawgPosition &other)
Definition: dawg.h:365
#define DIRECTION_FLAG
Definition: dawg.h:89
bool write_squished_dawg(TFile *file)
Writes the squished/reduced Dawg to a file.
Definition: dawg.cpp:374
uint64_t next_node_mask_
Definition: dawg.h:312
SquishedDawg(const char *filename, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:418
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const
Returns the edge that corresponds to the letter out of this node.
Definition: dawg.cpp:201
UNICHAR_ID unichar_id
Definition: dawg.h:62
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
uint64_t flags_mask_
Definition: dawg.h:313
virtual void print_node(NODE_REF node, int max_num_edges) const =0
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
Definition: dawg.h:217
virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
Definition: dawg.h:196
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const
Definition: dawg.cpp:150
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
bool Open(const STRING &filename, FileReader reader)
Definition: serialis.cpp:196
int push_back(DawgPosition object)
void iterate_words(const UNICHARSET &unicharset, TessCallback1< const WERD_CHOICE *> *cb) const
Definition: dawg.cpp:111
int flag_start_bit_
Definition: dawg.h:310
Definition: strngs.h:45
uint64_t EDGE_RECORD
Definition: dawg.h:53
DawgType type_
Definition: dawg.h:301
void init(int unicharset_size)
Definition: dawg.cpp:182
bool CloseWrite(const STRING &filename, FileWriter writer)
Definition: serialis.cpp:310
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:226
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc)
Definition: dawg.h:358
static const UNICHAR_ID kPatternUnicharID
Definition: dawg.h:126
const STRING & lang() const
Definition: dawg.h:129
DawgType
Definition: dawg.h:72
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: dawg.h:426
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:213
DawgType type() const
Definition: dawg.h:128
virtual ~SquishedDawg()
Definition: dawg.cpp:199
int debug_level_
Definition: dawg.h:316
PermuterType permuter() const
Definition: dawg.h:130
int check_for_words(const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
Definition: dawg.cpp:75
PermuterType
Definition: ratngs.h:242
NODE_REF next_node(EDGE_REF edge) const
Definition: dawg.h:467
EDGE_REF punc_ref
Definition: dawg.h:376
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
Definition: dawg.h:272
virtual bool end_of_word(EDGE_REF edge_ref) const =0
EDGE_REF edge_ref
Definition: dawg.h:63
EDGE_RECORD * EDGE_ARRAY
Definition: dawg.h:54
#define MARKER_FLAG
Definition: dawg.h:88
EDGE_REF * NODE_MAP
Definition: dawg.h:57
Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:205
bool end_of_word(EDGE_REF edge_ref) const
Definition: dawg.h:473
#define ASSERT_HOST(x)
Definition: errcode.h:84
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const
Definition: dawg.h:453