All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
dawg.h
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: dawg.h (Formerly dawg.h)
5  * Description: Definition of a class that represents Directed Accyclic Word
6  * Graph (DAWG), functions to build and manipulate the DAWG.
7  * Author: Mark Seaman, SW Productivity
8  * Created: Fri Oct 16 14:37:00 1987
9  * Modified: Wed Jun 19 16:50:24 1991 (Mark Seaman) marks@hpgrlt
10  * Language: C
11  * Package: N/A
12  * Status: Reusable Software Component
13  *
14  * (c) Copyright 1987, Hewlett-Packard Company.
15  ** Licensed under the Apache License, Version 2.0 (the "License");
16  ** you may not use this file except in compliance with the License.
17  ** You may obtain a copy of the License at
18  ** http://www.apache.org/licenses/LICENSE-2.0
19  ** Unless required by applicable law or agreed to in writing, software
20  ** distributed under the License is distributed on an "AS IS" BASIS,
21  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22  ** See the License for the specific language governing permissions and
23  ** limitations under the License.
24  *
25  *********************************************************************************/
26 
27 #ifndef DICT_DAWG_H_
28 #define DICT_DAWG_H_
29 
30 /*----------------------------------------------------------------------
31  I n c l u d e s
32 ----------------------------------------------------------------------*/
33 
34 #include "elst.h"
35 #include "ratngs.h"
36 #include "params.h"
37 #include "tesscallback.h"
38 
39 #ifndef __GNUC__
40 #ifdef _WIN32
41 #define NO_EDGE (inT64) 0xffffffffffffffffi64
42 #endif /*_WIN32*/
43 #else
44 #define NO_EDGE (inT64) 0xffffffffffffffffll
45 #endif /*__GNUC__*/
46 
47 /*----------------------------------------------------------------------
48  T y p e s
49 ----------------------------------------------------------------------*/
50 class UNICHARSET;
51 
52 typedef uinT64 EDGE_RECORD;
54 typedef inT64 EDGE_REF;
55 typedef inT64 NODE_REF;
56 typedef EDGE_REF *NODE_MAP;
57 
58 namespace tesseract {
59 
60 struct NodeChild {
63  NodeChild(UNICHAR_ID id, EDGE_REF ref): unichar_id(id), edge_ref(ref) {}
64  NodeChild(): unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {}
65 };
66 
70 
71 enum DawgType {
76 
77  DAWG_TYPE_COUNT // number of enum entries
78 };
79 
80 /*----------------------------------------------------------------------
81  C o n s t a n t s
82 ----------------------------------------------------------------------*/
83 
84 #define FORWARD_EDGE (inT32) 0
85 #define BACKWARD_EDGE (inT32) 1
86 #define MAX_NODE_EDGES_DISPLAY (inT64) 100
87 #define MARKER_FLAG (inT64) 1
88 #define DIRECTION_FLAG (inT64) 2
89 #define WERD_END_FLAG (inT64) 4
90 #define LETTER_START_BIT 0
91 #define NUM_FLAG_BITS 3
92 #define REFFORMAT "%lld"
93 
94 static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] = {
95  { 0, 1, 1, 0 }, // for DAWG_TYPE_PUNCTUATION
96  { 1, 0, 0, 0 }, // for DAWG_TYPE_WORD
97  { 1, 0, 0, 0 }, // for DAWG_TYPE_NUMBER
98  { 0, 0, 0, 0 }, // for DAWG_TYPE_PATTERN
99 };
100 
101 static const char kWildcard[] = "*";
102 
103 
104 /*----------------------------------------------------------------------
105  C l a s s e s a n d S t r u c t s
106 ----------------------------------------------------------------------*/
107 //
117 //
118 class Dawg {
119  public:
121  static const inT16 kDawgMagicNumber = 42;
125  static const UNICHAR_ID kPatternUnicharID = 0;
126 
127  inline DawgType type() const { return type_; }
128  inline const STRING &lang() const { return lang_; }
129  inline PermuterType permuter() const { return perm_; }
130 
131  virtual ~Dawg() {};
132 
134  bool word_in_dawg(const WERD_CHOICE &word) const;
135 
136  // Returns true if the given word prefix is not contraindicated by the dawg.
137  // If requires_complete is true, then the exact complete word must be present.
138  bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const;
139 
142  int check_for_words(const char *filename,
143  const UNICHARSET &unicharset,
144  bool enable_wildcard) const;
145 
146  // For each word in the Dawg, call the given (permanent) callback with the
147  // text (UTF-8) version of the word.
148  void iterate_words(const UNICHARSET &unicharset,
150 
151  // For each word in the Dawg, call the given (permanent) callback with the
152  // text (UTF-8) version of the word.
153  void iterate_words(const UNICHARSET &unicharset,
154  TessCallback1<const char *> *cb) const;
155 
156  // Pure virtual function that should be implemented by the derived classes.
157 
159  virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
160  bool word_end) const = 0;
161 
164  virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
165  bool word_end) const = 0;
166 
169  virtual NODE_REF next_node(EDGE_REF edge_ref) const = 0;
170 
173  virtual bool end_of_word(EDGE_REF edge_ref) const = 0;
174 
176  virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const = 0;
177 
180  virtual void print_node(NODE_REF node, int max_num_edges) const = 0;
181 
184  virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
185  const UNICHARSET &unicharset,
186  GenericVector<UNICHAR_ID> *vec) const {};
187 
192  EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const {
193  return false;
194  }
195 
196  protected:
197  Dawg() {}
198 
200  inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
201  return ((edge_rec & next_node_mask_) >> next_node_start_bit_);
202  }
204  inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const {
205  return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0;
206  }
208  inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const {
209  return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ?
211  }
213  inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const {
214  return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
215  }
218  const EDGE_RECORD &edge_rec) const {
219  return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
220  }
223  EDGE_RECORD *edge_rec, EDGE_REF value) {
224  *edge_rec &= (~next_node_mask_);
225  *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_);
226  }
228  inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) {
229  *edge_rec |= (MARKER_FLAG << flag_start_bit_);
230  }
239  bool word_end,
240  UNICHAR_ID unichar_id,
241  const EDGE_RECORD &edge_rec) const {
242  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
243  NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
244  bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
245  if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
246  curr_word_end, curr_unichar_id)) return 0;
247  if (unichar_id > curr_unichar_id) return 1;
248  if (unichar_id == curr_unichar_id) {
249  if (next_node > curr_next_node) return 1;
250  if (next_node == curr_next_node) {
251  if (word_end > curr_word_end) return 1;
252  }
253  }
254  return -1;
255  }
260  bool word_end,
261  UNICHAR_ID unichar_id,
262  NODE_REF other_next_node,
263  bool other_word_end,
264  UNICHAR_ID other_unichar_id) const {
265  return ((unichar_id == other_unichar_id) &&
266  (next_node == NO_EDGE || next_node == other_next_node) &&
267  (!word_end || (word_end == other_word_end)));
268  }
269 
272  void init(DawgType type, const STRING &lang,
273  PermuterType perm, int unicharset_size, int debug_level);
274 
280  bool match_words(WERD_CHOICE *word, inT32 index,
281  NODE_REF node, UNICHAR_ID wildcard) const;
282 
283  // Recursively iterate over all words in a dawg (see public iterate_words).
284  void iterate_words_rec(const WERD_CHOICE &word_so_far,
285  NODE_REF to_explore,
287 
288  // Member Variables.
293  // Variables to construct various edge masks. Formerly:
294  // #define NEXT_EDGE_MASK (inT64) 0xfffffff800000000i64
295  // #define FLAGS_MASK (inT64) 0x0000000700000000i64
296  // #define LETTER_MASK (inT64) 0x00000000ffffffffi64
303  // Level of debug statements to print to stdout.
305 };
306 
307 //
308 // DawgPosition keeps track of where we are in the primary dawg we're searching
309 // as well as where we may be in the "punctuation dawg" which may provide
310 // surrounding context.
311 //
312 // Example:
313 // punctuation dawg -- space is the "pattern character"
314 // " " // no punctuation
315 // "' '" // leading and trailing apostrophes
316 // " '" // trailing apostrophe
317 // word dawg:
318 // "cat"
319 // "cab"
320 // "cat's"
321 //
322 // DawgPosition(dawg_index, dawg_ref, punc_index, punc_ref, rtp)
323 //
324 // DawgPosition(-1, NO_EDGE, p, pe, false)
325 // We're in the punctuation dawg, no other dawg has been started.
326 // (1) If there's a pattern edge as a punc dawg child of us,
327 // for each punc-following dawg starting with ch, produce:
328 // Result: DawgPosition(k, w, p', false)
329 // (2) If there's a valid continuation in the punc dawg, produce:
330 // Result: DawgPosition(-k, NO_EDGE, p', false)
331 //
332 // DawgPosition(k, w, -1, NO_EDGE, false)
333 // We're in dawg k. Going back to punctuation dawg is not an option.
334 // Follow ch in dawg k.
335 //
336 // DawgPosition(k, w, p, pe, false)
337 // We're in dawg k. Continue in dawg k and/or go back to the punc dawg.
338 // If ending, check that the punctuation dawg is also ok to end here.
339 //
340 // DawgPosition(k, w, p, pe true)
341 // We're back in the punctuation dawg. Continuing there is the only option.
342 struct DawgPosition {
344  : dawg_index(-1), dawg_ref(NO_EDGE), punc_ref(NO_EDGE),
345  back_to_punc(false) {}
346  DawgPosition(int dawg_idx, EDGE_REF dawgref,
347  int punc_idx, EDGE_REF puncref,
348  bool backtopunc)
349  : dawg_index(dawg_idx), dawg_ref(dawgref),
350  punc_index(punc_idx), punc_ref(puncref),
351  back_to_punc(backtopunc) {
352  }
353  bool operator==(const DawgPosition &other) {
354  return dawg_index == other.dawg_index &&
355  dawg_ref == other.dawg_ref &&
356  punc_index == other.punc_index &&
357  punc_ref == other.punc_ref &&
358  back_to_punc == other.back_to_punc;
359  }
360 
365  // Have we returned to the punc dawg at the end of the word?
367 };
368 
369 class DawgPositionVector : public GenericVector<DawgPosition> {
370  public:
373  if (size_reserved_ > 0) {
374  delete[] data_;
375  size_used_ = 0;
376  size_reserved_ = 0;
377  }
378  }
381  void clear() { size_used_ = 0; }
385  inline bool add_unique(const DawgPosition &new_pos,
386  bool debug,
387  const char *debug_msg) {
388  for (int i = 0; i < size_used_; ++i) {
389  if (data_[i] == new_pos) return false;
390  }
391  push_back(new_pos);
392  if (debug) {
393  tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n",
394  debug_msg, new_pos.dawg_index, new_pos.dawg_ref,
395  new_pos.punc_ref, new_pos.back_to_punc ? " returned" : "");
396  }
397  return true;
398  }
399 };
400 
401 //
408 //
409 class SquishedDawg : public Dawg {
410  public:
411  SquishedDawg(FILE *file, DawgType type, const STRING &lang,
412  PermuterType perm, int debug_level) {
413  read_squished_dawg(file, type, lang, perm, debug_level);
414  num_forward_edges_in_node0 = num_forward_edges(0);
415  }
417  const STRING &lang, PermuterType perm, int debug_level) {
418  FILE *file = fopen(filename, "rb");
419  if (file == NULL) {
420  tprintf("Failed to open dawg file %s\n", filename);
421  exit(1);
422  }
423  read_squished_dawg(file, type, lang, perm, debug_level);
424  num_forward_edges_in_node0 = num_forward_edges(0);
425  fclose(file);
426  }
427  SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
428  const STRING &lang, PermuterType perm,
429  int unicharset_size, int debug_level) :
430  edges_(edges), num_edges_(num_edges) {
431  init(type, lang, perm, unicharset_size, debug_level);
432  num_forward_edges_in_node0 = num_forward_edges(0);
433  if (debug_level > 3) print_all("SquishedDawg:");
434  }
435  ~SquishedDawg();
436 
437  int NumEdges() { return num_edges_; }
438 
440  EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
441  bool word_end) const;
442 
445  void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
446  bool word_end) const {
447  EDGE_REF edge = node;
448  if (!edge_occupied(edge) || edge == NO_EDGE) return;
449  assert(forward_edge(edge)); // we don't expect any backward edges to
450  do { // be present when this funciton is called
451  if (!word_end || end_of_word_from_edge_rec(edges_[edge])) {
452  vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge));
453  }
454  } while (!last_edge(edge++));
455  }
456 
460  return next_node_from_edge_rec((edges_[edge]));
461  }
462 
465  bool end_of_word(EDGE_REF edge_ref) const {
466  return end_of_word_from_edge_rec((edges_[edge_ref]));
467  }
468 
470  UNICHAR_ID edge_letter(EDGE_REF edge_ref) const {
471  return unichar_id_from_edge_rec((edges_[edge_ref]));
472  }
473 
476  void print_node(NODE_REF node, int max_num_edges) const;
477 
479  void write_squished_dawg(FILE *file);
480 
483  void write_squished_dawg(const char *filename) {
484  FILE *file = fopen(filename, "wb");
485  if (file == NULL) {
486  tprintf("Error opening %s\n", filename);
487  exit(1);
488  }
489  this->write_squished_dawg(file);
490  fclose(file);
491  }
492 
493  private:
495  inline void set_next_node(EDGE_REF edge_ref, EDGE_REF value) {
496  set_next_node_in_edge_rec(&(edges_[edge_ref]), value);
497  }
499  inline void set_empty_edge(EDGE_REF edge_ref) {
500  (edges_[edge_ref] = next_node_mask_);
501  }
503  inline void clear_all_edges() {
504  for (int edge = 0; edge < num_edges_; edge++) set_empty_edge(edge);
505  }
507  inline void clear_marker_flag(EDGE_REF edge_ref) {
508  (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_));
509  }
511  inline bool forward_edge(EDGE_REF edge_ref) const {
512  return (edge_occupied(edge_ref) &&
513  (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
514  }
516  inline bool backward_edge(EDGE_REF edge_ref) const {
517  return (edge_occupied(edge_ref) &&
518  (BACKWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
519  }
521  inline bool edge_occupied(EDGE_REF edge_ref) const {
522  return (edges_[edge_ref] != next_node_mask_);
523  }
525  inline bool last_edge(EDGE_REF edge_ref) const {
526  return (edges_[edge_ref] & (MARKER_FLAG << flag_start_bit_)) != 0;
527  }
528 
530  inT32 num_forward_edges(NODE_REF node) const;
531 
533  void read_squished_dawg(FILE *file, DawgType type, const STRING &lang,
534  PermuterType perm, int debug_level);
535 
537  void print_edge(EDGE_REF edge) const;
538 
540  void print_all(const char* msg) {
541  tprintf("\n__________________________\n%s\n", msg);
542  for (int i = 0; i < num_edges_; ++i) print_edge(i);
543  tprintf("__________________________\n");
544  }
546  NODE_MAP build_node_map(inT32 *num_nodes) const;
547 
548 
549  // Member variables.
550  EDGE_ARRAY edges_;
551  int num_edges_;
552  int num_forward_edges_in_node0;
553 };
554 
555 } // namespace tesseract
556 
557 #endif // DICT_DAWG_H_
int next_node_start_bit_
Definition: dawg.h:299
const STRING & lang() const
Definition: dawg.h:128
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:213
~DawgPositionVector()
Overload destructor, since clear() does not delete data_[] any more.
Definition: dawg.h:372
void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec)
Sets this edge record to be the last one in a sequence of edges.
Definition: dawg.h:228
EDGE_RECORD * EDGE_ARRAY
Definition: dawg.h:53
UNICHAR_ID unichar_id
Definition: dawg.h:61
virtual bool end_of_word(EDGE_REF edge_ref) const =0
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc)
Definition: dawg.h:346
EDGE_REF dawg_ref
Definition: dawg.h:362
#define LETTER_START_BIT
Definition: dawg.h:90
int push_back(DawgPositionobject)
void print_node(NODE_REF node, int max_num_edges) const
Definition: dawg.cpp:247
EDGE_REF * NODE_MAP
Definition: dawg.h:56
NODE_REF next_node(EDGE_REF edge) const
Definition: dawg.h:459
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE * > *cb) const
Definition: dawg.cpp:127
#define tprintf(...)
Definition: tprintf.h:31
#define DIRECTION_FLAG
Definition: dawg.h:88
PermuterType
Definition: ratngs.h:240
SquishedDawg(FILE *file, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:411
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Definition: dawg.h:470
GenericVector< NodeChild > NodeChildVector
Definition: dawg.h:67
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:200
virtual void print_node(NODE_REF node, int max_num_edges) const =0
void write_squished_dawg(const char *filename)
Definition: dawg.h:483
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
Definition: dawg.h:259
EDGE_REF punc_ref
Definition: dawg.h:364
bool end_of_word(EDGE_REF edge_ref) const
Definition: dawg.h:465
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const
Returns the edge that corresponds to the letter out of this node.
Definition: dawg.cpp:202
static const UNICHAR_ID kPatternUnicharID
Definition: dawg.h:125
int check_for_words(const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
Definition: dawg.cpp:74
bool word_in_dawg(const WERD_CHOICE &word) const
Returns true if the given word is in the Dawg.
Definition: dawg.cpp:70
void write_squished_dawg(FILE *file)
Writes the squished/reduced Dawg to a file.
Definition: dawg.cpp:388
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
#define WERD_END_FLAG
Definition: dawg.h:89
void iterate_words(const UNICHARSET &unicharset, TessCallback1< const WERD_CHOICE * > *cb) const
Definition: dawg.cpp:106
#define FORWARD_EDGE
Definition: dawg.h:84
uinT64 next_node_mask_
Definition: dawg.h:300
uinT64 letter_mask_
Definition: dawg.h:302
int unicharset_size_
Definition: dawg.h:297
static const inT16 kDawgMagicNumber
Magic number to determine endianness when reading the Dawg from file.
Definition: dawg.h:121
uinT64 EDGE_RECORD
Definition: dawg.h:50
bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const
Definition: dawg.cpp:49
NodeChild(UNICHAR_ID id, EDGE_REF ref)
Definition: dawg.h:63
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
Definition: dawg.h:184
#define BACKWARD_EDGE
Definition: dawg.h:85
bool match_words(WERD_CHOICE *word, inT32 index, NODE_REF node, UNICHAR_ID wildcard) const
Definition: dawg.cpp:145
int debug_level_
Definition: dawg.h:304
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:217
void set_next_node_in_edge_rec(EDGE_RECORD *edge_rec, EDGE_REF value)
Sets the next node link for this edge in the Dawg.
Definition: dawg.h:222
GenericVector< SuccessorList * > SuccessorListsVector
Definition: dawg.h:69
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
Definition: dawg.h:208
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: dawg.h:427
int UNICHAR_ID
Definition: unichar.h:33
int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
Definition: dawg.h:238
STRING lang_
Definition: dawg.h:290
virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
Definition: dawg.h:191
unsigned long long int uinT64
Definition: host.h:109
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
int flag_start_bit_
Definition: dawg.h:298
bool operator==(const DawgPosition &other)
Definition: dawg.h:353
virtual ~Dawg()
Definition: dawg.h:131
#define REFFORMAT
Definition: dawg.h:92
GenericVector< int > SuccessorList
Definition: dawg.h:68
DawgType type() const
Definition: dawg.h:127
#define MARKER_FLAG
Definition: dawg.h:87
DawgType
Definition: dawg.h:71
bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg)
Definition: dawg.h:385
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
Definition: dawg.h:204
inT64 EDGE_REF
Definition: dawg.h:54
Definition: strngs.h:44
#define NULL
Definition: host.h:144
virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const =0
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
DawgType type_
Definition: dawg.h:289
SIGNED char inT8
Definition: host.h:98
inT64 NODE_REF
Definition: dawg.h:55
void init(DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: dawg.cpp:177
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:292
PermuterType permuter() const
Definition: dawg.h:129
EDGE_REF edge_ref
Definition: dawg.h:62
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const
Definition: dawg.h:445
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
uinT64 flags_mask_
Definition: dawg.h:301
short inT16
Definition: host.h:100
SquishedDawg(const char *filename, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:416
int inT32
Definition: host.h:102
long long int inT64
Definition: host.h:108