43 STRING output_fname = fname;
44 const char *lastdot = strrchr(output_fname.
string(),
'.');
45 if (lastdot !=
nullptr) output_fname[lastdot - output_fname.
string()] =
'\0';
46 output_fname +=
".txt";
47 FILE *output_file = fopen(output_fname.
string(),
"a+");
48 if (output_file ==
nullptr) {
49 tprintf(
"Error: Could not open file %s\n", output_fname.
string());
57 while (page_res_it->
block() !=
nullptr && page_res_it->
word() ==
nullptr)
60 if (page_res_it->
word() !=
nullptr) {
67 if (tbox->
left() < 0) {
87 const char *lastdot = strrchr(box_fname.
string(),
'.');
88 if (lastdot !=
nullptr) box_fname[lastdot - box_fname.
string()] =
'\0';
91 FILE *box_file = fopen(box_fname.
string(),
"r");
92 if (box_file ==
nullptr) {
93 tprintf(
"Error: Could not open file %s\n", box_fname.
string());
107 int examined_words = 0;
109 keep_going = read_t(&page_res_it, &tbox);
117 keep_going = read_t(&page_res_it, &tbox);
127 keep_going = read_t(&page_res_it, &tbox);
141 }
while (keep_going);
150 if (page_res_it.
word()) {
156 if (examined_words < 0.85 * total_words) {
157 tprintf(
"TODO(antonova): clean up recog_training_segmented; " 158 " It examined only a small fraction of the ambigs image.\n");
160 tprintf(
"recog_training_segmented: examined %d / %d words.\n",
161 examined_words, total_words);
165 static void PrintPath(
int length,
const BLOB_CHOICE** blob_choices,
167 const char *label, FILE *output_file) {
169 float certainty = 0.0f;
170 for (
int i = 0; i < length; ++i) {
172 fprintf(output_file,
"%s",
174 rating += blob_choice->
rating();
175 if (certainty > blob_choice->
certainty())
178 fprintf(output_file,
"\t%s\t%.4f\t%.4f\n",
179 label, rating, certainty);
184 static void PrintMatrixPaths(
int col,
int dim,
188 const char *label, FILE *output_file) {
189 for (
int row = col; row < dim && row - col < ratings.
bandwidth(); ++row) {
191 BLOB_CHOICE_IT bc_it(ratings.
get(col, row));
192 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
193 blob_choices[length] = bc_it.data();
195 PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices,
196 unicharset, label, output_file);
198 PrintPath(length + 1, blob_choices, unicharset, label, output_file);
224 tprintf(
"Not outputting illegal unichar %s\n", label);
231 PrintMatrixPaths(0, dim, *werd_res->
ratings, 0, blob_choices,
233 delete [] blob_choices;
BLOCK_RES * block() const
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
void rotate(const FCOORD &vec)
Dict & getDict() override
const char * string() const
TBOX bounding_box() const
bool stopper_no_acceptable_choices
WERD_RES * restart_page()
bool tessedit_enable_doc_dict
bool tessedit_ambigs_training
void ambigs_classify_and_output(const char *label, PAGE_RES_IT *pr_it, FILE *output_file)
const int16_t kMaxBoxEdgeDiff
DLLSYM void tprintf(const char *format,...)
FILE * init_recog_training(const STRING &fname)
void SetupFake(const UNICHARSET &uch)
void SetupWordPassN(int pass_n, WordData *word)
bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box)
const UNICHARSET * uch_set
const char * id_to_unichar(UNICHAR_ID id) const
int tessedit_tess_adaption_mode
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
UNICHAR_ID unichar_id() const
WERD_CHOICE * best_choice
void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data)