tesseract
5.0.0-alpha-619-ge9db
|
Go to the documentation of this file.
38 SegSearch(word_res, &best_choice_bundle,
nullptr);
58 blamer_bundle, &pain_points, &pending);
62 if (blamer_bundle !=
nullptr &&
70 float pain_point_priority;
71 int num_futile_classifications = 0;
75 (blamer_bundle !=
nullptr &&
78 bool found_nothing =
true;
80 while ((pp_type = pain_points.
Deque(&pain_point, &pain_point_priority)) !=
84 pain_point.
row - pain_point.
col + 1);
89 found_nothing =
false;
99 &pending, word_res, &pain_points, blamer_bundle);
102 word_res, &pain_points, best_choice_bundle,
104 if (!best_choice_bundle->
updated) ++num_futile_classifications;
107 tprintf(
"num_futile_classifications %d\n", num_futile_classifications);
110 best_choice_bundle->
updated =
false;
115 blamer_bundle !=
nullptr &&
121 if (blamer_bundle !=
nullptr) {
127 tprintf(
"Done with SegSearch (AcceptableChoiceFound: %d)\n",
140 tprintf(
"Starting SegSearch on ratings matrix%s:\n",
159 if (blamer_bundle !=
nullptr) {
174 (*pending)[0].SetColumnClassified();
176 pain_points, best_choice_bundle, blamer_bundle);
180 float rating_cert_scale,
190 for (
int col = starting_col; col < ratings->
dimension(); ++col) {
191 if (!(*pending)[col].WorkToDo())
continue;
193 int last_row = std::min(ratings->
dimension() - 1,
195 if ((*pending)[col].SingleRow() >= 0) {
196 first_row = last_row = (*pending)[col].SingleRow();
199 tprintf(
"\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n",
200 col, first_row, last_row,
201 (*pending)[col].IsRowJustClassified(INT32_MAX));
204 for (
int row = first_row; row <= last_row; ++row) {
206 BLOB_CHOICE_LIST *current_node = ratings->
get(col, row);
208 col == 0 ? nullptr : best_choice_bundle->
beam[col - 1];
209 if (current_node !=
nullptr &&
211 col, row, current_node, parent_node,
212 pain_points, word_res,
213 best_choice_bundle, blamer_bundle) &&
217 (*pending)[row + 1].RevisitWholeColumn();
219 tprintf(
"Added child col=%d to pending\n", row + 1);
224 if (best_choice_bundle->
best_vse !=
nullptr) {
228 best_choice_bundle->
best_vse, word_res);
231 best_choice_bundle->
best_vse, word_res);
237 for (
int col = 0; col < pending->
size(); ++col) {
238 (*pending)[col].Clear();
240 vse_it(&best_choice_bundle->
beam[col]->viterbi_state_entries);
241 for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
242 vse_it.data()->updated =
false;
248 float pain_point_priority,
249 const MATRIX_COORD &pain_point,
const char* pain_point_type,
253 tprintf(
"Classifying pain point %s priority=%.4f, col=%d, row=%d\n",
254 pain_point_type, pain_point_priority,
255 pain_point.
col, pain_point.
row);
260 if (!pain_point.
Valid(*ratings)) {
265 pain_point.
col, pain_point.
row,
269 BLOB_CHOICE_LIST *lst = ratings->
get(pain_point.
col, pain_point.
row);
270 if (lst ==
nullptr) {
271 ratings->
put(pain_point.
col, pain_point.
row, classified);
277 BLOB_CHOICE_IT it(lst);
278 it.add_list_before(classified);
280 classified =
nullptr;
285 ratings->
get(pain_point.
col, pain_point.
row),
292 if (classified !=
nullptr && !classified->empty()) {
293 if (pain_point.
col > 0) {
304 (*pending)[pain_point.
col].SetBlobClassified(pain_point.
row);
315 for (
int col = 0; col < best_choice_bundle->
beam.size(); ++col) {
316 best_choice_bundle->
beam[col]->Clear();
320 best_choice_bundle->
best_vse =
nullptr;
322 (*pending)[0].SetColumnClassified();
323 for (
int i = 1; i < pending->
size(); ++i)
324 (*pending)[i].Clear();
331 pain_points->
Clear();
334 blamer_debug, pain_points,
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
bool SegSearchDone(int num_futile_classifications)
void print(const UNICHARSET &unicharset) const
int segsearch_debug_level
bool Valid(const MATRIX &m) const
WERD_CHOICE * prev_word_best_choice_
void IncreaseBandSize(int bandwidth)
void SetChopperBlame(const WERD_RES *word, bool debug)
int segsearch_max_pain_points
ViterbiStateEntry * best_vse
Best ViterbiStateEntry and BLOB_CHOICE.
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
bool wordrec_enable_assoc
void GenerateFromPath(float rating_cert_scale, ViterbiStateEntry *vse, WERD_RES *word_res)
static const char * PainPointDescription(LMPainPointsType type)
PointerVector< LanguageModelState > beam
bool updated
Flag to indicate whether anything was changed.
WERD_CHOICE * best_choice
void GenerateFromAmbigs(const DANGERR &fixpt, ViterbiStateEntry *vse, WERD_RES *word_res)
Bundle together all the things pertaining to the best choice/state.
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
LMPainPointsType Deque(MATRIX_COORD *pp, float *priority)
bool GuidedSegsearchStillGoing() const
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, tesseract::LMPainPoints *pain_points, double max_char_wh_ratio, WERD_RES *word_res)
void GenerateInitial(WERD_RES *word_res)
bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res)
double segsearch_max_char_wh_ratio
Struct to store information maintained by various language model components.
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
bool updated
set to true if the entry has just been created/updated
GenericVector< SEAM * > seam_array
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
bool wordrec_debug_blamer
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
bool assume_fixed_pitch_char_segment
void init_to_size(int size, const T &t)
void put(ICOORD pos, const T &thing)
DLLSYM void tprintf(const char *format,...)
const UNICHARSET & getUnicharset() const
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
std::unique_ptr< LanguageModel > language_model_
void DoSegSearch(WERD_RES *word_res)
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const
static void PrintSeams(const char *label, const GenericVector< SEAM * > &seams)
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str)
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
void SetupCorrectSegmentation(const TWERD *word, bool debug)
bool Classified(int col, int row, int wildcard_id) const