19 #ifndef TESSERACT_WORDREC_WORDREC_H__
20 #define TESSERACT_WORDREC_WORDREC_H__
46 : classified_row_(-1),
47 revisit_whole_column_(false),
48 column_classified_(false) {}
53 column_classified_ =
true;
59 classified_row_ = row;
65 revisit_whole_column_ =
true;
71 revisit_whole_column_ =
false;
72 column_classified_ =
false;
78 return revisit_whole_column_ || column_classified_ || classified_row_ >= 0;
82 return row == classified_row_ || column_classified_;
86 return revisit_whole_column_ || column_classified_ ? -1 : classified_row_;
98 bool revisit_whole_column_;
102 bool column_classified_;
127 "Merge the fragments in the ratings matrix and delete them "
132 "force associator to run regardless of what enable_assoc is."
133 "This is used for CJK where component grouping is necessary.");
136 "Use information from fragments to guide chopping process");
137 INT_VAR_H(repair_unchopped_blobs, 1,
"Fix blobs that aren't chopped");
144 INT_VAR_H(chop_min_outline_points, 6,
"Min Number of Points on Outline");
145 INT_VAR_H(chop_seam_pile_size, 150,
"Max number of seams in seam_pile");
147 INT_VAR_H(chop_inside_angle, -50,
"Min Inside Angle Bend");
148 INT_VAR_H(chop_min_outline_area, 2000,
"Min Outline Area");
152 INT_VAR_H(chop_centered_maxwidth, 90,
"Width of (smaller) chopped blobs "
153 "above which we don't care that a chop is not near the center.");
159 INT_VAR_H(segment_adjust_debug, 0,
"Segmentation adjustment debug");
161 "include fixed-pitch heuristics in char segmentation");
162 INT_VAR_H(wordrec_debug_level, 0,
"Debug level for wordrec");
164 "Max number of broken pieces to associate");
165 BOOL_VAR_H(wordrec_skip_no_truth_words,
false,
166 "Only run OCR for words that had truth recorded in BlamerBundle");
167 BOOL_VAR_H(wordrec_debug_blamer,
false,
"Print blamer debug messages");
168 BOOL_VAR_H(wordrec_run_blamer,
false,
"Try to set the blame for errors");
169 INT_VAR_H(segsearch_debug_level, 0,
"SegSearch debug level");
170 INT_VAR_H(segsearch_max_pain_points, 2000,
171 "Maximum number of pain points stored in the queue");
172 INT_VAR_H(segsearch_max_futile_classifications, 10,
173 "Maximum number of pain point classifications per word.");
175 "Maximum character width-to-height ratio");
177 "Save alternative paths found during chopping "
178 "and segmentation search");
186 void SaveAltChoices(
const LIST &best_choices,
WERD_RES *word);
190 void FillLattice(
const MATRIX &ratings,
const WERD_CHOICE_LIST &best_choices,
196 const WERD_CHOICE_LIST &best_choices,
199 (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
203 void program_editup(
const char *textbase,
204 bool init_classifier,
207 void program_editdown(
inT32 elasped_time);
211 BLOB_CHOICE_LIST *call_matcher(
TBLOB* blob);
214 BLOB_CHOICE_LIST *classify_blob(
TBLOB *blob,
288 void DoSegSearch(
WERD_RES* word_res);
295 bool is_inside_angle(
EDGEPT *pt);
303 void vertical_projection_point(
EDGEPT *split_point,
EDGEPT *target_point,
305 EDGEPT_CLIST *new_points);
314 WERD_RES *word_res,
int *blob_number);
317 bool split_next_to_fragment,
325 void chop_word_main(
WERD_RES *word);
326 void improve_by_chopping(
float rating_cert_scale,
333 float rating_ceiling,
334 bool split_next_to_fragment);
335 int select_blob_to_split_from_fixpt(
DANGERR *fixpt);
338 void add_seam_to_queue(
float new_priority,
SEAM *new_seam,
SeamQueue* seams);
342 void combine_seam(
const SeamPile& seam_pile,
352 EDGEPT_CLIST *new_points,
369 const char* description,
374 void merge_fragments(
MATRIX *ratings,
384 void get_fragment_lists(
inT16 current_frag,
387 inT16 num_frag_parts,
390 BLOB_CHOICE_LIST *choice_lists);
393 void merge_and_put_fragment_lists(
inT16 row,
395 inT16 num_frag_parts,
396 BLOB_CHOICE_LIST *choice_lists,
404 void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices,
407 BLOB_CHOICE_LIST *filtered_choices);
421 const WERD_CHOICE_LIST &best_choices,
428 num_futile_classifications >=
429 segsearch_max_futile_classifications);
457 void UpdateSegSearchNodes(
458 float rating_cert_scale,
468 void ProcessSegSearchPainPoint(
float pain_point_priority,
470 const char* pain_point_type,
478 void ResetNGramSearch(
WERD_RES* word_res,
485 void InitBlamerForSegSearch(
WERD_RES *word_res,
494 #endif // TESSERACT_WORDREC_WORDREC_H__
void RevisitWholeColumn()
#define INT_VAR_H(name, val, comment)
GenericVector< int > blame_reasons_
WERD_CHOICE * prev_word_best_choice_
void SetBlobClassified(int row)
LanguageModel * language_model_
GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT > WordSearch
void SetColumnClassified()
#define double_VAR_H(name, val, comment)
bool IsRowJustClassified(int row) const
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
bool AcceptableChoiceFound()
bool SegSearchDone(int num_futile_classifications)
#define BOOL_VAR_H(name, val, comment)
Bundle together all the things pertaining to the best choice/state.