#include <stdio.h>
#include "classify.h"
#include "dawg.h"
#include "dict.h"
#include "emalloc.h"
#include "freelist.h"
#include "helpers.h"
#include "serialis.h"
#include "trie.h"
#include "unicharset.h"
Go to the source code of this file.
|
int | main (int argc, char **argv) |
|
int main |
( |
int |
argc, |
|
|
char ** |
argv |
|
) |
| |
This program reads in a text file consisting of feature samples from a training page in the following format:
FontName UTF8-char-str xmin ymin xmax ymax page-number
NumberOfFeatureTypes(N)
FeatureTypeName1 NumberOfFeatures(M)
Feature1
...
FeatureM
FeatureTypeName2 NumberOfFeatures(M)
Feature1
...
FeatureM
...
FeatureTypeNameN NumberOfFeatures(M)
Feature1
...
FeatureM
FontName CharName ...
The result of this program is a binary inttemp file used by the OCR engine.
- Parameters
-
argc | number of command line arguments |
argv | array of command line arguments |
- Returns
- none
- Note
- Exceptions: none
-
History: Fri Aug 18 08:56:17 1989, DSJ, Created.
-
History: Mon May 18 1998, Christy Russson, Revistion started.
Definition at line 35 of file wordlist2dawg.cpp.
36 if (!(argc == 4 || (argc == 5 && strcmp(argv[1],
"-t") == 0) ||
37 (argc == 6 && strcmp(argv[1],
"-r") == 0))) {
38 printf(
"Usage: %s [-t | -r [reverse policy] ] word_list_file"
39 " dawg_file unicharset_file\n", argv[0]);
44 if (argc == 5) ++argv_index;
50 sscanf(argv[++argv_index],
"%d", &tmp_int);
52 tprintf(
"Set reverse_policy to %s\n",
55 if (argc == 7) argv_index += 3;
56 const char* wordlist_filename = argv[++argv_index];
57 const char* dawg_filename = argv[++argv_index];
58 const char* unicharset_file = argv[++argv_index];
59 tprintf(
"Loading unicharset from '%s'\n", unicharset_file);
61 tprintf(
"Failed to load unicharset from '%s'\n", unicharset_file);
66 if (argc == 4 || argc == 6) {
71 tprintf(
"Reading word list from '%s'\n", wordlist_filename);
72 if (!trie.read_and_add_word_list(wordlist_filename, unicharset,
74 tprintf(
"Failed to add word list from '%s'\n", wordlist_filename);
77 tprintf(
"Reducing Trie to SquishedDawg\n");
80 tprintf(
"Writing squished DAWG to '%s'\n", dawg_filename);
83 tprintf(
"Dawg is empty, skip producing the output file\n");
86 }
else if (argc == 5) {
87 tprintf(
"Loading dawg DAWG from '%s'\n", dawg_filename);
93 tprintf(
"Checking word list from '%s'\n", wordlist_filename);
94 words.check_for_words(wordlist_filename, unicharset,
true);
96 tprintf(
"Invalid command-line options\n");
bool load_from_file(const char *const filename, bool skip_fragments)
void write_squished_dawg(FILE *file)
Writes the squished/reduced Dawg to a file.
static const char * get_reverse_policy_name(RTLReversePolicy reverse_policy)
const UNICHARSET & getUnicharset() const