43 bool requires_complete)
const {
44 if (word.
length() == 0)
return !requires_complete;
46 int end_index = word.
length() - 1;
47 for (
int i = 0; i < end_index; i++) {
49 if (edge == NO_EDGE) {
69 bool enable_wildcard)
const {
70 if (filename ==
nullptr)
return 0;
77 word_file = fopen(filename,
"r");
78 if (word_file ==
nullptr) {
79 tprintf(
"Error: Could not open file %s\n", filename);
89 enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
90 tprintf(
"Missing word: %s\n",
string);
94 tprintf(
"Failed to create a valid word from %s\n",
string);
104 std::function<
void(
const WERD_CHOICE*)> cb)
const {
109 static void CallWithUTF8(std::function<
void(
const char*)> cb,
117 std::function<
void(
const char*)> cb)
const {
118 using namespace std::placeholders;
120 std::bind(CallWithUTF8, cb, _1));
127 std::function<
void(
const WERD_CHOICE*)> cb)
const {
130 for (
int i = 0; i < children.
size(); i++) {
132 next_word.append_unichar_id(children[i].unichar_id, 1, 0.0, 0.0);
148 if (wildcard != INVALID_UNICHAR_ID && word->
unichar_id(index) == wildcard) {
149 bool any_matched =
false;
152 for (
int i = 0; i < vec.size(); ++i) {
160 word_end = index == word->
length() - 1;
162 if (edge != NO_EDGE) {
167 }
else if (node != 0) {
196 bool word_end)
const {
200 EDGE_REF end = num_forward_edges_in_node0 - 1;
202 while (start <= end) {
203 edge = (start + end) >> 1;
205 unichar_id, edges_[edge]);
208 }
else if (compare == 1) {
215 if (edge != NO_EDGE && edge_occupied(edge)) {
220 }
while (!last_edge(edge++));
226 int32_t SquishedDawg::num_forward_edges(
NODE_REF node)
const {
230 if (forward_edge (edge)) {
233 }
while (!last_edge(edge++));
240 if (node == NO_EDGE)
return;
243 const char *forward_string =
"FORWARD";
244 const char *backward_string =
" ";
246 const char *last_string =
"LAST";
247 const char *not_last_string =
" ";
249 const char *eow_string =
"EOW";
250 const char *not_eow_string =
" ";
252 const char *direction;
258 if (edge_occupied(edge)) {
261 forward_edge(edge) ? forward_string : backward_string;
262 is_last = last_edge(edge) ? last_string : not_last_string;
263 eow =
end_of_word(edge) ? eow_string : not_eow_string;
268 direction, is_last, eow);
270 if (edge - node > max_num_edges)
return;
271 }
while (!last_edge(edge++));
273 if (edge < num_edges_ &&
274 edge_occupied(edge) && backward_edge(edge)) {
277 forward_edge(edge) ? forward_string : backward_string;
278 is_last = last_edge(edge) ? last_string : not_last_string;
279 eow =
end_of_word(edge) ? eow_string : not_eow_string;
283 ", unichar_id = %d, %s %s %s\n",
285 direction, is_last, eow);
288 }
while (!last_edge(edge++));
297 void SquishedDawg::print_edge(
EDGE_REF edge)
const {
298 if (edge == NO_EDGE) {
302 ", unichar_id = '%d', %s %s %s\n", edge,
304 (forward_edge(edge) ?
"FORWARD" :
" "),
305 (last_edge(edge) ?
"LAST" :
" "),
310 bool SquishedDawg::read_squished_dawg(TFile *
file) {
316 if (!
file->DeSerialize(&magic))
return false;
322 int32_t unicharset_size;
323 if (!
file->DeSerialize(&unicharset_size))
return false;
324 if (!
file->DeSerialize(&num_edges_))
return false;
329 if (!
file->DeSerialize(&edges_[0], num_edges_))
return false;
331 tprintf(
"type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n",
333 for (
EDGE_REF edge = 0; edge < num_edges_; ++edge) print_edge(edge);
338 std::unique_ptr<EDGE_REF[]> SquishedDawg::build_node_map(
339 int32_t *num_nodes)
const {
341 std::unique_ptr<EDGE_REF[]> node_map(
new EDGE_REF[num_edges_]);
342 int32_t node_counter;
345 for (edge = 0; edge < num_edges_; edge++)
348 node_counter = num_forward_edges(0);
351 for (edge = 0; edge < num_edges_; edge++) {
353 if (forward_edge(edge)) {
355 node_map[edge] = (edge ? node_counter : 0);
356 num_edges = num_forward_edges(edge);
357 if (edge != 0) node_counter += num_edges;
359 if (edge >= num_edges_)
break;
360 if (backward_edge(edge))
while (!last_edge(edge++));
370 int32_t node_count = 0;
376 std::unique_ptr<EDGE_REF[]> node_map(build_node_map(&node_count));
380 if (!
file->Serialize(&magic))
return false;
385 for (edge=0; edge < num_edges_; edge++)
386 if (forward_edge(edge))
390 if (!
file->Serialize(&num_edges))
return false;
393 tprintf(
"%d nodes in DAWG\n", node_count);
394 tprintf(
"%d edges in DAWG\n", num_edges);
397 for (edge = 0; edge < num_edges_; edge++) {
398 if (forward_edge(edge)) {
401 set_next_node(edge, node_map[old_index]);
402 temp_record = edges_[edge];
403 if (!
file->Serialize(&temp_record))
return false;
404 set_next_node(edge, old_index);
405 }
while (!last_edge(edge++));
407 if (edge >= num_edges_)
break;
408 if (backward_edge(edge))
409 while (!last_edge(edge++));