19 #ifndef TESSERACT_WORDREC_WORDREC_H_ 20 #define TESSERACT_WORDREC_WORDREC_H_ 22 #ifdef DISABLED_LEGACY_ENGINE 24 #include "config_auto.h" 47 namespace tesseract {
class LMPainPoints; }
48 namespace tesseract {
class TessdataManager; }
49 namespace tesseract {
struct BestChoiceBundle; }
63 class Wordrec :
public Classify {
76 void program_editup(
const char *textbase, TessdataManager *init_classifier,
77 TessdataManager *init_dict);
88 #else // DISABLED_LEGACY_ENGINE not defined 115 : classified_row_(-1),
116 revisit_whole_column_(false),
117 column_classified_(false) {}
122 column_classified_ =
true;
128 classified_row_ = row;
134 revisit_whole_column_ =
true;
139 classified_row_ = -1;
140 revisit_whole_column_ =
false;
141 column_classified_ =
false;
147 return revisit_whole_column_ || column_classified_ || classified_row_ >= 0;
151 return row == classified_row_ || column_classified_;
155 return revisit_whole_column_ || column_classified_ ? -1 : classified_row_;
167 bool revisit_whole_column_;
171 bool column_classified_;
196 "Merge the fragments in the ratings matrix and delete them " 201 "force associator to run regardless of what enable_assoc is." 202 "This is used for CJK where component grouping is necessary.");
205 "Use information from fragments to guide chopping process");
206 INT_VAR_H(repair_unchopped_blobs, 1,
"Fix blobs that aren't chopped");
213 INT_VAR_H(chop_min_outline_points, 6,
"Min Number of Points on Outline");
214 INT_VAR_H(chop_seam_pile_size, 150,
"Max number of seams in seam_pile");
216 INT_VAR_H(chop_inside_angle, -50,
"Min Inside Angle Bend");
217 INT_VAR_H(chop_min_outline_area, 2000,
"Min Outline Area");
221 INT_VAR_H(chop_centered_maxwidth, 90,
"Width of (smaller) chopped blobs " 222 "above which we don't care that a chop is not near the center.");
228 INT_VAR_H(segment_adjust_debug, 0,
"Segmentation adjustment debug");
230 "include fixed-pitch heuristics in char segmentation");
231 INT_VAR_H(wordrec_debug_level, 0,
"Debug level for wordrec");
233 "Max number of broken pieces to associate");
234 BOOL_VAR_H(wordrec_skip_no_truth_words,
false,
235 "Only run OCR for words that had truth recorded in BlamerBundle");
236 BOOL_VAR_H(wordrec_debug_blamer,
false,
"Print blamer debug messages");
237 BOOL_VAR_H(wordrec_run_blamer,
false,
"Try to set the blame for errors");
238 INT_VAR_H(segsearch_debug_level, 0,
"SegSearch debug level");
239 INT_VAR_H(segsearch_max_pain_points, 2000,
240 "Maximum number of pain points stored in the queue");
241 INT_VAR_H(segsearch_max_futile_classifications, 10,
242 "Maximum number of pain point classifications per word.");
244 "Maximum character width-to-height ratio");
246 "Save alternative paths found during chopping " 247 "and segmentation search");
255 void SaveAltChoices(
const LIST &best_choices,
WERD_RES *word);
259 void FillLattice(
const MATRIX &ratings,
const WERD_CHOICE_LIST &best_choices,
265 const WERD_CHOICE_LIST &best_choices,
268 (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
272 void program_editup(
const char *textbase,
TessdataManager *init_classifier,
275 void program_editdown(int32_t elasped_time);
279 BLOB_CHOICE_LIST *call_matcher(
TBLOB* blob);
282 BLOB_CHOICE_LIST *classify_blob(
TBLOB *blob,
353 void DoSegSearch(
WERD_RES* word_res);
360 bool is_inside_angle(
EDGEPT *pt);
368 void vertical_projection_point(
EDGEPT *split_point,
EDGEPT *target_point,
370 EDGEPT_CLIST *new_points);
373 SEAM *attempt_blob_chop(
TWERD *word,
TBLOB *blob, int32_t blob_number,
375 SEAM *chop_numbered_blob(
TWERD *word, int32_t blob_number,
379 WERD_RES *word_res,
int *blob_number);
382 bool split_next_to_fragment,
390 void chop_word_main(
WERD_RES *word);
391 void improve_by_chopping(
float rating_cert_scale,
398 float rating_ceiling,
399 bool split_next_to_fragment);
400 int select_blob_to_split_from_fixpt(
DANGERR *fixpt);
403 void add_seam_to_queue(
float new_priority,
SEAM *new_seam,
SeamQueue* seams);
407 void combine_seam(
const SeamPile& seam_pile,
417 EDGEPT_CLIST *new_points,
434 const char* description,
439 void merge_fragments(
MATRIX *ratings,
449 void get_fragment_lists(int16_t current_frag,
452 int16_t num_frag_parts,
455 BLOB_CHOICE_LIST *choice_lists);
458 void merge_and_put_fragment_lists(int16_t row,
460 int16_t num_frag_parts,
461 BLOB_CHOICE_LIST *choice_lists,
469 void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices,
472 BLOB_CHOICE_LIST *filtered_choices);
486 const WERD_CHOICE_LIST &best_choices,
493 num_futile_classifications >=
494 segsearch_max_futile_classifications);
522 void UpdateSegSearchNodes(
523 float rating_cert_scale,
533 void ProcessSegSearchPainPoint(
float pain_point_priority,
535 const char* pain_point_type,
543 void ResetNGramSearch(
WERD_RES* word_res,
550 void InitBlamerForSegSearch(
WERD_RES *word_res,
558 #endif // DISABLED_LEGACY_ENGINE 560 #endif // TESSERACT_WORDREC_WORDREC_H_
#define INT_VAR_H(name, val, comment)
void SetColumnClassified()
int dict_word(const WERD_CHOICE &word)
#define ELISTIZEH(CLASSNAME)
WERD_CHOICE * prev_word_best_choice_
#define BOOL_VAR_H(name, val, comment)
#define double_VAR_H(name, val, comment)
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
void RevisitWholeColumn()
std::unique_ptr< LanguageModel > language_model_
bool AcceptableChoiceFound()
void program_editdown(int32_t elasped_time)
void SetBlobClassified(int row)
virtual ~Wordrec()=default
void program_editup(const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
Bundle together all the things pertaining to the best choice/state.
GenericVector< int > blame_reasons_
bool SegSearchDone(int num_futile_classifications)
bool IsRowJustClassified(int row) const
bool wordrec_debug_blamer