19 #ifndef TESSERACT_WORDREC_WORDREC_H_
20 #define TESSERACT_WORDREC_WORDREC_H_
22 #ifdef DISABLED_LEGACY_ENGINE
29 namespace tesseract {
class TessdataManager; }
35 class Wordrec :
public Classify {
48 void program_editup(
const char *textbase, TessdataManager *init_classifier,
49 TessdataManager *init_dict);
60 #else // DISABLED_LEGACY_ENGINE not defined
63 #include "config_auto.h"
92 namespace tesseract {
class LMPainPoints; }
93 namespace tesseract {
class TessdataManager; }
94 namespace tesseract {
struct BestChoiceBundle; }
115 : classified_row_(-1),
116 revisit_whole_column_(false),
117 column_classified_(false) {}
122 column_classified_ =
true;
128 classified_row_ = row;
134 revisit_whole_column_ =
true;
139 classified_row_ = -1;
140 revisit_whole_column_ =
false;
141 column_classified_ =
false;
147 return revisit_whole_column_ || column_classified_ || classified_row_ >= 0;
151 return row == classified_row_ || column_classified_;
155 return revisit_whole_column_ || column_classified_ ? -1 : classified_row_;
167 bool revisit_whole_column_;
171 bool column_classified_;
196 "Merge the fragments in the ratings matrix and delete them "
200 "force associator to run regardless of what enable_assoc is."
201 "This is used for CJK where component grouping is necessary.");
202 INT_VAR_H(repair_unchopped_blobs, 1,
"Fix blobs that aren't chopped");
209 INT_VAR_H(chop_min_outline_points, 6,
"Min Number of Points on Outline");
210 INT_VAR_H(chop_seam_pile_size, 150,
"Max number of seams in seam_pile");
212 INT_VAR_H(chop_inside_angle, -50,
"Min Inside Angle Bend");
213 INT_VAR_H(chop_min_outline_area, 2000,
"Min Outline Area");
217 INT_VAR_H(chop_centered_maxwidth, 90,
"Width of (smaller) chopped blobs "
218 "above which we don't care that a chop is not near the center.");
224 BOOL_VAR_H(assume_fixed_pitch_char_segment,
false,
225 "include fixed-pitch heuristics in char segmentation");
226 INT_VAR_H(wordrec_debug_level, 0,
"Debug level for wordrec");
228 "Max number of broken pieces to associate");
229 BOOL_VAR_H(wordrec_skip_no_truth_words,
false,
230 "Only run OCR for words that had truth recorded in BlamerBundle");
231 BOOL_VAR_H(wordrec_debug_blamer,
false,
"Print blamer debug messages");
232 BOOL_VAR_H(wordrec_run_blamer,
false,
"Try to set the blame for errors");
233 INT_VAR_H(segsearch_debug_level, 0,
"SegSearch debug level");
234 INT_VAR_H(segsearch_max_pain_points, 2000,
235 "Maximum number of pain points stored in the queue");
236 INT_VAR_H(segsearch_max_futile_classifications, 10,
237 "Maximum number of pain point classifications per word.");
239 "Maximum character width-to-height ratio");
241 "Save alternative paths found during chopping "
242 "and segmentation search");
250 void SaveAltChoices(
const LIST &best_choices,
WERD_RES *word);
254 void FillLattice(
const MATRIX &ratings,
const WERD_CHOICE_LIST &best_choices,
260 const WERD_CHOICE_LIST &best_choices,
263 (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
267 void program_editup(
const char *textbase,
TessdataManager *init_classifier,
270 void program_editdown(int32_t elasped_time);
274 BLOB_CHOICE_LIST *call_matcher(
TBLOB* blob);
277 BLOB_CHOICE_LIST *classify_blob(
TBLOB *blob,
348 void DoSegSearch(
WERD_RES* word_res);
355 bool is_inside_angle(
EDGEPT *pt);
363 void vertical_projection_point(
EDGEPT *split_point,
EDGEPT *target_point,
365 EDGEPT_CLIST *new_points);
368 SEAM *attempt_blob_chop(
TWERD *word,
TBLOB *blob, int32_t blob_number,
370 SEAM *chop_numbered_blob(
TWERD *word, int32_t blob_number,
374 WERD_RES *word_res,
int *blob_number);
377 bool split_next_to_fragment,
385 void chop_word_main(
WERD_RES *word);
386 void improve_by_chopping(
float rating_cert_scale,
393 float rating_ceiling,
394 bool split_next_to_fragment);
395 int select_blob_to_split_from_fixpt(
DANGERR *fixpt);
398 void add_seam_to_queue(
float new_priority,
SEAM *new_seam,
SeamQueue* seams);
402 void combine_seam(
const SeamPile& seam_pile,
412 EDGEPT_CLIST *new_points,
429 const char* description,
434 void merge_fragments(
MATRIX *ratings,
444 void get_fragment_lists(int16_t current_frag,
447 int16_t num_frag_parts,
450 BLOB_CHOICE_LIST *choice_lists);
453 void merge_and_put_fragment_lists(int16_t row,
455 int16_t num_frag_parts,
456 BLOB_CHOICE_LIST *choice_lists,
464 void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices,
467 BLOB_CHOICE_LIST *filtered_choices);
481 const WERD_CHOICE_LIST &best_choices,
488 num_futile_classifications >=
489 segsearch_max_futile_classifications);
517 void UpdateSegSearchNodes(
518 float rating_cert_scale,
528 void ProcessSegSearchPainPoint(
float pain_point_priority,
530 const char* pain_point_type,
538 void ResetNGramSearch(
WERD_RES* word_res,
545 void InitBlamerForSegSearch(
WERD_RES *word_res,
553 #endif // DISABLED_LEGACY_ENGINE
555 #endif // TESSERACT_WORDREC_WORDREC_H_