44 STRING output_fname = fname;
45 const char *lastdot = strrchr(output_fname.
string(),
'.');
46 if (lastdot !=
NULL) output_fname[lastdot - output_fname.
string()] =
'\0';
47 output_fname +=
".txt";
64 if (tbox->
left() < 0) {
84 const char *lastdot = strrchr(box_fname.
string(),
'.');
85 if (lastdot !=
NULL) box_fname[lastdot - box_fname.
string()] =
'\0';
92 page_res_it.restart_page();
100 int examined_words = 0;
102 keep_going =
read_t(&page_res_it, &tbox);
109 page_res_it.forward();
110 keep_going =
read_t(&page_res_it, &tbox);
119 page_res_it.forward();
120 keep_going =
read_t(&page_res_it, &tbox);
133 page_res_it.forward();
134 }
while (keep_going);
142 for (page_res_it.restart_page(); page_res_it.block() !=
NULL;
143 page_res_it.forward()) {
144 if (page_res_it.word()) {
145 if (page_res_it.word()->uch_set ==
NULL)
150 if (examined_words < 0.85 * total_words) {
151 tprintf(
"TODO(antonova): clean up recog_training_segmented; "
152 " It examined only a small fraction of the ambigs image.\n");
154 tprintf(
"recog_training_segmented: examined %d / %d words.\n",
155 examined_words, total_words);
159 static void PrintPath(
int length,
const BLOB_CHOICE** blob_choices,
161 const char *label, FILE *output_file) {
163 float certainty = 0.0f;
164 for (
int i = 0; i < length; ++i) {
166 fprintf(output_file,
"%s",
168 rating += blob_choice->
rating();
169 if (certainty > blob_choice->
certainty())
172 fprintf(output_file,
"\t%s\t%.4f\t%.4f\n",
173 label, rating, certainty);
178 static void PrintMatrixPaths(
int col,
int dim,
182 const char *label, FILE *output_file) {
183 for (
int row = col; row < dim && row - col < ratings.
bandwidth(); ++row) {
185 BLOB_CHOICE_IT bc_it(ratings.
get(col, row));
186 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
187 blob_choices[length] = bc_it.data();
189 PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices,
190 unicharset, label, output_file);
192 PrintPath(length + 1, blob_choices, unicharset, label, output_file);
218 tprintf(
"Not outputting illegal unichar %s\n", label);
225 PrintMatrixPaths(0, dim, *werd_res->
ratings, 0, blob_choices,
226 unicharset, label, output_file);
227 delete [] blob_choices;
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
WERD_CHOICE * best_choice
T get(int column, int row) const
bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box)
TBOX bounding_box() const
FILE * init_recog_training(const STRING &fname)
BLOCK_RES * block() const
const char *const id_to_unichar(UNICHAR_ID id) const
int tessedit_tess_adaption_mode
bool tessedit_enable_doc_dict
bool tessedit_ambigs_training
bool stopper_no_acceptable_choices
void SetupWordPassN(int pass_n, WordData *word)
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data)
bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox)
FILE * open_file(const char *filename, const char *mode)
const inT16 kMaxBoxEdgeDiff
const char * string() const
UNICHAR_ID unichar_id() const
void ambigs_classify_and_output(const char *label, PAGE_RES_IT *pr_it, FILE *output_file)
void rotate(const FCOORD &vec)