tesseract
5.0.0-alpha-619-ge9db
|
Go to the documentation of this file.
55 class Trie :
public Dawg {
83 int unicharset_size,
int debug_level)
85 init(unicharset_size);
98 bool word_end)
const override {
102 &edge_ptr, &edge_index))
return NO_EDGE;
111 bool word_end)
const override {
113 nodes_[static_cast<int>(node)]->forward_edges;
114 for (
int i = 0; i < forward_edges.
size(); ++i) {
127 if (edge_ref == NO_EDGE ||
num_edges_ == 0)
return NO_EDGE;
136 if (edge_ref == NO_EDGE ||
num_edges_ == 0)
return false;
142 if (edge_ref == NO_EDGE ||
num_edges_ == 0)
return INVALID_UNICHAR_ID;
242 bool word_end)
const override {
243 if (edge_ref == NO_EDGE)
return NO_EDGE;
285 int edge_index = static_cast<int>(
287 int node_index = static_cast<int>(
300 int direction,
bool word_end,
UNICHAR_ID unichar_id) {
321 return (node_ref != NO_EDGE &&
322 nodes_[static_cast<int>(node_ref)]->forward_edges.
size() == 1);
327 void print_all(
const char* msg,
int max_num_edges) {
328 tprintf(
"\n__________________________\n%s\n", msg);
330 tprintf(
"__________________________\n");
338 int direction,
bool word_end,
UNICHAR_ID unichar_id,
344 int direction,
bool word_end,
350 bool repeats,
bool word_end,
UNICHAR_ID unichar_id) {
352 word_end, unichar_id) &&
354 word_end, unichar_id));
void delete_data_pointers()
EDGE_VECTOR forward_edges
EDGE_REF edge_char_of(NODE_REF node_ref, UNICHAR_ID unichar_id, bool word_end) const override
void remove_edge(NODE_REF node1, NODE_REF node2, bool word_end, UNICHAR_ID unichar_id)
static const char kLowerPatternUnicode[]
UNICHAR_ID upper_pattern_
uint64_t deref_node_index_mask_
static const char kDigitPatternUnicode[]
UNICHAR_ID character_class_to_pattern(char ch)
static const char kAlphanumPatternUnicode[]
void initialize_patterns(UNICHARSET *unicharset)
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
const STRING & lang() const
bool read_pattern_list(const char *filename, const UNICHARSET &unicharset)
bool end_of_word(EDGE_REF edge_ref) const override
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
GenericVector< NodeChild > NodeChildVector
static const char kPuncPatternUnicode[]
static const char kAlphaPatternUnicode[]
void print_node(NODE_REF node, int max_num_edges) const override
UNICHAR_ID alphanum_pattern_
NODE_REF next_node(EDGE_REF edge_ref) const override
void add_word_ending(EDGE_RECORD *edge, NODE_REF the_next_node, bool repeats, UNICHAR_ID unichar_id)
bool add_new_edge(NODE_REF node1, NODE_REF node2, bool repeats, bool word_end, UNICHAR_ID unichar_id)
void reduce_node_input(NODE_REF node, NODE_MARKER reduced_nodes)
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const override
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
static const int kSaneNumConcreteChars
bool read_word_list(const char *filename, GenericVector< STRING > *words)
EDGE_REF make_edge_ref(NODE_REF node_index, EDGE_INDEX edge_index) const
void print_all(const char *msg, int max_num_edges)
UNICHAR_ID alpha_pattern_
bool initialized_patterns_
UNICHAR_ID digit_pattern_
void sort_edges(EDGE_VECTOR *edges)
void init(int unicharset_size)
void link_edge(EDGE_RECORD *edge, NODE_REF nxt, bool repeats, int direction, bool word_end, UNICHAR_ID unichar_id)
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
SquishedDawg * trie_to_dawg()
bool eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1, const EDGE_RECORD &edge2)
Trie(DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
bool add_word_to_dawg(const WERD_CHOICE &word, const GenericVector< bool > *repetitions)
EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const override
void KillEdge(EDGE_RECORD *edge_rec) const
static const char kUpperPatternUnicode[]
void remove_edge_linkage(NODE_REF node1, NODE_REF node2, int direction, bool word_end, UNICHAR_ID unichar_id)
uint64_t deref_direction_mask_
static const char * get_reverse_policy_name(RTLReversePolicy reverse_policy)
bool add_edge_linkage(NODE_REF node1, NODE_REF node2, bool repeats, int direction, bool word_end, UNICHAR_ID unichar_id)
GenericVector< EDGE_INDEX > root_back_freelist_
bool add_word_list(const GenericVector< STRING > &words, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse_policy)
void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const override
DLLSYM void tprintf(const char *format,...)
void print_edge_rec(const EDGE_RECORD &edge_rec) const
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
bool can_be_eliminated(const EDGE_RECORD &edge_rec)
bool reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, NODE_REF node, EDGE_VECTOR *backward_edges, NODE_MARKER reduced_nodes)
EDGE_VECTOR backward_edges
EDGE_RECORD * deref_edge_ref(EDGE_REF edge_ref) const
bool DeadEdge(const EDGE_RECORD &edge_rec) const
UNICHAR_ID lower_pattern_
bool read_and_add_word_list(const char *filename, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse)
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override