tesseract  5.0.0-alpha-619-ge9db
tesseract::Wordrec Class Reference

#include <wordrec.h>

Inheritance diagram for tesseract::Wordrec:
tesseract::Classify tesseract::CCStruct tesseract::CCUtil tesseract::Tesseract

Public Member Functions

 Wordrec ()
 
 ~Wordrec () override=default
 
void SaveAltChoices (const LIST &best_choices, WERD_RES *word)
 
void FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
void CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
void SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void InitialSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void DoSegSearch (WERD_RES *word_res)
 
void add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams)
 
void choose_best_seam (SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
 
void combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
 
SEAMpick_good_seam (TBLOB *blob)
 
void try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
 
void try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
 
PRIORITY grade_split_length (SPLIT *split)
 
PRIORITY grade_sharpness (SPLIT *split)
 
bool near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
 
virtual BLOB_CHOICE_LIST * classify_piece (const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
 
void merge_fragments (MATRIX *ratings, int16_t num_blobs)
 
void get_fragment_lists (int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists)
 
void merge_and_put_fragment_lists (int16_t row, int16_t column, int16_t num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings)
 
void fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices)
 
program_editup

Initialize all the things in the program that need to be initialized. init_permute determines whether to initialize the permute functions and Dawg models.

void program_editup (const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
 
cc_recog

Recognize a word.

void cc_recog (WERD_RES *word)
 
program_editdown

This function holds any necessary post processing for the Wise Owl program.

void program_editdown (int32_t elasped_time)
 
set_pass1

Get ready to do some pass 1 stuff.

void set_pass1 ()
 
set_pass2

Get ready to do some pass 2 stuff.

void set_pass2 ()
 
end_recog

Cleanup and exit the recog program.

int end_recog ()
 
call_matcher

Called from Tess with a blob in tess form. The blob may need rotating to the correct orientation for classification.

BLOB_CHOICE_LIST * call_matcher (TBLOB *blob)
 
dict_word()

Test the dictionaries, returning NO_PERM (0) if not found, or one of the PermuterType values if found, according to the dictionary.

int dict_word (const WERD_CHOICE &word)
 
classify_blob

Classify the this blob if it is not already recorded in the match table. Attempt to recognize this blob as a character. The recognition rating for this blob will be stored as a part of the blob. This value will also be returned to the caller.

Parameters
blobCurrent blob
stringThe string to display in ScrollView
colorThe colour to use when displayed with ScrollView
BLOB_CHOICE_LIST * classify_blob (TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle)
 
point_priority

Assign a priority to and edge point that might be used as part of a split. The argument should be of type EDGEPT.

PRIORITY point_priority (EDGEPT *point)
 
add_point_to_list

Add an edge point to a POINT_GROUP containing a list of other points.

void add_point_to_list (PointHeap *point_heap, EDGEPT *point)
 
bool is_inside_angle (EDGEPT *pt)
 
angle_change

Return the change in angle (degrees) of the line segments between points one and two, and two and three.

int angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
 
pick_close_point

Choose the edge point that is closest to the critical point. This point may not be exactly vertical from the critical point.

EDGEPTpick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
 
prioritize_points

Find a list of edge points from the outer outline of this blob. For each of these points assign a priority. Sort these points using a heap structure so that they can be visited in order.

void prioritize_points (TESSLINE *outline, PointHeap *points)
 
new_min_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to nullptr.

void new_min_point (EDGEPT *local_min, PointHeap *points)
 
new_max_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to nullptr.

void new_max_point (EDGEPT *local_max, PointHeap *points)
 
vertical_projection_point

For one point on the outline, find the corresponding point on the other side of the outline that is a likely projection for a split point. This is done by iterating through the edge points until the X value of the point being looked at is greater than the X value of the split point. Ensure that the point being returned is not right next to the split point. Return the edge point in *best_point as a result, and any points that were newly created are also saved on the new_points list.

void vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
 
attempt_blob_chop

Try to split the this blob after this one. Check to make sure that it was successful.

SEAMattempt_blob_chop (TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
 
SEAMchop_numbered_blob (TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
 
SEAMchop_overlapping_blob (const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
 
improve_one_blob

Finds the best place to chop, based on the worst blob, fixpt, or next to a fragment, according to the input. Returns the SEAM corresponding to the chop point, if any is found, and the index in the ratings_matrix of the chopped blob. Note that blob_choices is just a copy of the pointers in the leading diagonal of the ratings MATRIX. Although the blob is chopped, the returned SEAM is yet to be inserted into word->seam_array and the resulting blobs are unclassified, so this function can be used by ApplyBox as well as during recognition.

SEAMimprove_one_blob (const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
 
chop_one_blob

Start with the current one-blob word and its classification. Find the worst blobs and try to divide it up to improve the ratings. Used for testing chopper.

SEAMchop_one_blob (const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number)
 
chop_word_main

Classify the blobs in this word and permute the results. Find the worst blob in the word and chop it up. Continue this process until a good answer has been found or all the blobs have been chopped up enough. The results are returned in the WERD_RES.

void chop_word_main (WERD_RES *word)
 
improve_by_chopping

Repeatedly chops the worst blob, classifying the new blobs fixing up all the data, and incrementally runs the segmentation search until a good word is found, or no more chops can be found.

void improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
 
int select_blob_to_split (const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
 
int select_blob_to_split_from_fixpt (DANGERR *fixpt)
 
- Public Member Functions inherited from tesseract::Classify
 Classify ()
 
 ~Classify () override
 
virtual DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (TFile *fp, uint16_t *Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (TFile *File)
 
float ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (TFile *fp)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (TessdataManager *mgr)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (float Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uint8_t *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (TFile *fp)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, float *XScale, float *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const STRING &filename)
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()=default
 
 ~CCStruct () override
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 

Public Attributes

bool merge_fragments_in_matrix = true
 
bool wordrec_enable_assoc = true
 
bool force_word_assoc = false
 
int repair_unchopped_blobs = 1
 
double tessedit_certainty_threshold = -2.25
 
int chop_debug = 0
 
bool chop_enable = 1
 
bool chop_vertical_creep = 0
 
int chop_split_length = 10000
 
int chop_same_distance = 2
 
int chop_min_outline_points = 6
 
int chop_seam_pile_size = 150
 
bool chop_new_seam_pile = 1
 
int chop_inside_angle = -50
 
int chop_min_outline_area = 2000
 
double chop_split_dist_knob = 0.5
 
double chop_overlap_knob = 0.9
 
double chop_center_knob = 0.15
 
int chop_centered_maxwidth = 90
 
double chop_sharpness_knob = 0.06
 
double chop_width_change_knob = 5.0
 
double chop_ok_split = 100.0
 
double chop_good_split = 50.0
 
int chop_x_y_weight = 3
 
bool assume_fixed_pitch_char_segment = false
 
int wordrec_debug_level = 0
 
int wordrec_max_join_chunks = 4
 
bool wordrec_skip_no_truth_words = false
 
bool wordrec_debug_blamer = false
 
bool wordrec_run_blamer = false
 
int segsearch_debug_level = 0
 
int segsearch_max_pain_points = 2000
 
int segsearch_max_futile_classifications = 10
 
double segsearch_max_char_wh_ratio = 2.0
 
bool save_alt_choices = true
 
std::unique_ptr< LanguageModellanguage_model_
 
PRIORITY pass2_ok_split
 
WERD_CHOICEprev_word_best_choice_
 
GenericVector< int > blame_reasons_
 
void(Wordrec::* fill_lattice_ )(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
- Public Attributes inherited from tesseract::Classify
bool allow_blob_division = true
 
bool prioritize_division = false
 
bool classify_enable_learning = true
 
int classify_debug_level = 0
 
int classify_norm_method = character
 
double classify_char_norm_range = 0.2
 
double classify_max_rating_ratio = 1.5
 
double classify_max_certainty_margin = 5.5
 
bool tess_cn_matching = 0
 
bool tess_bn_matching = 0
 
bool classify_enable_adaptive_matcher = 1
 
bool classify_use_pre_adapted_templates = 0
 
bool classify_save_adapted_templates = 0
 
bool classify_enable_adaptive_debugger = 0
 
bool classify_nonlinear_norm = 0
 
int matcher_debug_level = 0
 
int matcher_debug_flags = 0
 
int classify_learning_debug_level = 0
 
double matcher_good_threshold = 0.125
 
double matcher_reliable_adaptive_result = 0.0
 
double matcher_perfect_threshold = 0.02
 
double matcher_bad_match_pad = 0.15
 
double matcher_rating_margin = 0.1
 
double matcher_avg_noise_size = 12.0
 
int matcher_permanent_classes_min = 1
 
int matcher_min_examples_for_prototyping = 3
 
int matcher_sufficient_examples_for_prototyping = 5
 
double matcher_clustering_max_angle_delta = 0.015
 
double classify_misfit_junk_penalty = 0.0
 
double rating_scale = 1.5
 
double certainty_scale = 20.0
 
double tessedit_class_miss_scale = 0.00390625
 
double classify_adapted_pruning_factor = 2.5
 
double classify_adapted_pruning_threshold = -1.0
 
int classify_adapt_proto_threshold = 230
 
int classify_adapt_feature_threshold = 230
 
bool disable_character_fragments = true
 
double classify_character_fragments_garbage_certainty_threshold = -3.0
 
bool classify_debug_character_fragments = false
 
bool matcher_debug_separate_windows = false
 
char * classify_learn_debug_str = ""
 
int classify_class_pruner_threshold = 229
 
int classify_class_pruner_multiplier = 15
 
int classify_cp_cutoff_strength = 7
 
int classify_integer_matcher_multiplier = 10
 
bool classify_bln_numeric_mode = 0
 
double speckle_large_max_size = 0.30
 
double speckle_rating_penalty = 10.0
 
INT_TEMPLATES PreTrainedTemplates = nullptr
 
ADAPT_TEMPLATES AdaptedTemplates = nullptr
 
ADAPT_TEMPLATES BackupAdaptedTemplates = nullptr
 
BIT_VECTOR AllProtosOn = nullptr
 
BIT_VECTOR AllConfigsOn = nullptr
 
BIT_VECTOR AllConfigsOff = nullptr
 
BIT_VECTOR TempProtoMask = nullptr
 
NORM_PROTOSNormProtos = nullptr
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
bool EnableLearning = true
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
int ambigs_debug_level = 0
 
bool use_ambigs_for_adaption = false
 

Protected Member Functions

bool SegSearchDone (int num_futile_classifications)
 
void UpdateSegSearchNodes (float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
 
void ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
 
void InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::Classify
static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 
- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 
- Protected Attributes inherited from tesseract::Classify
IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_ = nullptr
 

Detailed Description

Definition at line 192 of file wordrec.h.

Constructor & Destructor Documentation

◆ Wordrec()

tesseract::Wordrec::Wordrec ( )

Definition at line 47 of file wordrec.cpp.

47  :
48  // control parameters
50  "Merge the fragments in the ratings matrix and delete them"
51  " after merging", params()),
52  BOOL_MEMBER(wordrec_enable_assoc, true, "Associator Enable",
53  params()),
55  "force associator to run regardless of what enable_assoc is."
56  " This is used for CJK where component grouping is necessary.",
57  CCUtil::params()),
58  INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped",
59  params()),
60  double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit",
61  params()),
62  INT_MEMBER(chop_debug, 0, "Chop debug",
63  params()),
64  BOOL_MEMBER(chop_enable, 1, "Chop enable",
65  params()),
66  BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep",
67  params()),
68  INT_MEMBER(chop_split_length, 10000, "Split Length",
69  params()),
70  INT_MEMBER(chop_same_distance, 2, "Same distance",
71  params()),
72  INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline",
73  params()),
74  INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile",
75  params()),
76  BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()),
77  INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend",
78  params()),
79  INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area",
80  params()),
81  double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment",
82  params()),
83  double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment",
84  params()),
85  double_MEMBER(chop_center_knob, 0.15, "Split center adjustment",
86  params()),
87  INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs "
88  "above which we don't care that a chop is not near the center.",
89  params()),
90  double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment",
91  params()),
92  double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment",
93  params()),
94  double_MEMBER(chop_ok_split, 100.0, "OK split limit",
95  params()),
96  double_MEMBER(chop_good_split, 50.0, "Good split limit",
97  params()),
98  INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight",
99  params()),
101  "include fixed-pitch heuristics in char segmentation",
102  params()),
104  "Debug level for wordrec", params()),
106  "Max number of broken pieces to associate", params()),
108  "Only run OCR for words that had truth recorded in BlamerBundle",
109  params()),
111  "Print blamer debug messages", params()),
113  "Try to set the blame for errors", params()),
115  "SegSearch debug level", params()),
117  "Maximum number of pain points stored in the queue",
118  params()),
120  "Maximum number of pain point classifications per chunk that"
121  " did not result in finding a better word choice.",
122  params()),
124  "Maximum character width-to-height ratio", params()),
126  "Save alternative paths found during chopping"
127  " and segmentation search",
128  params()),
129  pass2_ok_split(0.0f) {
130  prev_word_best_choice_ = nullptr;
131  language_model_.reset(new LanguageModel(&get_fontinfo_table(),
132  &(getDict())));
133  fill_lattice_ = nullptr;
134 }

◆ ~Wordrec()

tesseract::Wordrec::~Wordrec ( )
overridedefault

Member Function Documentation

◆ add_point_to_list()

void tesseract::Wordrec::add_point_to_list ( PointHeap point_heap,
EDGEPT point 
)

Definition at line 75 of file chop.cpp.

75  {
76  if (point_heap->size() < MAX_NUM_POINTS - 2) {
77  PointPair pair(point_priority(point), point);
78  point_heap->Push(&pair);
79  }
80 
81 #ifndef GRAPHICS_DISABLED
82  if (chop_debug > 2)
83  mark_outline(point);
84 #endif
85 }

◆ add_seam_to_queue()

void tesseract::Wordrec::add_seam_to_queue ( float  new_priority,
SEAM new_seam,
SeamQueue seams 
)

Definition at line 62 of file findseam.cpp.

66  {
67  if (new_seam == nullptr) return;
68  if (chop_debug) {
69  tprintf("Pushing new seam with priority %g :", new_priority);
70  new_seam->Print("seam: ");
71  }
72  if (seams->size() >= MAX_NUM_SEAMS) {
73  SeamPair old_pair(0, nullptr);
74  if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
75  if (chop_debug) {
76  tprintf("Old seam staying with priority %g\n", old_pair.key());
77  }
78  delete new_seam;
79  seams->Push(&old_pair);
80  return;
81  } else if (chop_debug) {
82  tprintf("New seam with priority %g beats old worst seam with %g\n",
83  new_priority, old_pair.key());
84  }
85  }

◆ angle_change()

int tesseract::Wordrec::angle_change ( EDGEPT point1,
EDGEPT point2,
EDGEPT point3 
)

Definition at line 99 of file chop.cpp.

99  {
100  VECTOR vector1;
101  VECTOR vector2;
102 
103  int angle;
104 
105  /* Compute angle */
106  vector1.x = point2->pos.x - point1->pos.x;
107  vector1.y = point2->pos.y - point1->pos.y;
108  vector2.x = point3->pos.x - point2->pos.x;
109  vector2.y = point3->pos.y - point2->pos.y;
110  /* Use cross product */
111  float length = std::sqrt(static_cast<float>(vector1.length()) * vector2.length());
112  if (static_cast<int>(length) == 0)
113  return (0);
114  angle = static_cast<int>(floor(asin(vector1.cross(vector2) /
115  length) / M_PI * 180.0 + 0.5));
116 
117  /* Use dot product */
118  if (vector1.dot(vector2) < 0)
119  angle = 180 - angle;
120  /* Adjust angle */
121  if (angle > 180)
122  angle -= 360;
123  if (angle <= -180)
124  angle += 360;
125  return (angle);
126 }

◆ attempt_blob_chop()

SEAM * tesseract::Wordrec::attempt_blob_chop ( TWERD word,
TBLOB blob,
int32_t  blob_number,
bool  italic_blob,
const GenericVector< SEAM * > &  seams 
)

Definition at line 209 of file chopper.cpp.

212  {
214  preserve_outline_tree (blob->outlines);
215  TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
216  // Insert it into the word.
217  word->blobs.insert(other_blob, blob_number + 1);
218 
219  SEAM *seam = nullptr;
220  if (prioritize_division) {
221  TPOINT location;
222  if (divisible_blob(blob, italic_blob, &location)) {
223  seam = new SEAM(0.0f, location);
224  }
225  }
226  if (seam == nullptr)
227  seam = pick_good_seam(blob);
228  if (chop_debug) {
229  if (seam != nullptr)
230  seam->Print("Good seam picked=");
231  else
232  tprintf("\n** no seam picked *** \n");
233  }
234  if (seam) {
235  seam->ApplySeam(italic_blob, blob, other_blob);
236  }
237 
238  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
239  seams, seam);
240  if (seam == nullptr) {
242  restore_outline_tree(blob->outlines);
244  // If the blob can simply be divided into outlines, then do that.
245  TPOINT location;
246  if (divisible_blob(blob, italic_blob, &location)) {
247  other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
248  word->blobs.insert(other_blob, blob_number + 1);
249  seam = new SEAM(0.0f, location);
250  seam->ApplySeam(italic_blob, blob, other_blob);
251  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
252  seams, seam);
253  }
254  }
255  }
256  if (seam != nullptr) {
257  // Make sure this seam doesn't get chopped again.
258  seam->Finalize();
259  }
260  return seam;

◆ call_matcher()

BLOB_CHOICE_LIST * tesseract::Wordrec::call_matcher ( TBLOB blob)

Definition at line 154 of file tface.cpp.

◆ CallFillLattice()

void tesseract::Wordrec::CallFillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)
inline

Definition at line 259 of file wordrec.h.

262  {
263  (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
264  }

◆ cc_recog()

void tesseract::Wordrec::cc_recog ( WERD_RES word)

Definition at line 139 of file tface.cpp.

140  {
141  // Rotate the blob for classification if necessary.
142  TBLOB* rotated_blob = tessblob->ClassifyNormalizeIfNeeded();
143  if (rotated_blob == nullptr) {
144  rotated_blob = tessblob;
145  }

◆ choose_best_seam()

void tesseract::Wordrec::choose_best_seam ( SeamQueue seam_queue,
const SPLIT split,
PRIORITY  priority,
SEAM **  seam_result,
TBLOB blob,
SeamPile seam_pile 
)

Definition at line 100 of file findseam.cpp.

106  {
107  SEAM *seam;
108  char str[80];
109  float my_priority;
110  /* Add seam of split */
111  my_priority = priority;
112  if (split != nullptr) {
113  TPOINT split_point = split->point1->pos;
114  split_point += split->point2->pos;
115  split_point /= 2;
116  seam = new SEAM(my_priority, split_point, *split);
117  if (chop_debug > 1) seam->Print("Partial priority ");
118  add_seam_to_queue(my_priority, seam, seam_queue);
119 
120  if (my_priority > chop_good_split)
121  return;
122  }
123 
124  TBOX bbox = blob->bounding_box();
125  /* Queue loop */
126  while (!seam_queue->empty()) {
127  SeamPair seam_pair;
128  seam_queue->Pop(&seam_pair);
129  seam = seam_pair.extract_data();
130  /* Set full priority */
131  my_priority = seam->FullPriority(bbox.left(), bbox.right(),
134  if (chop_debug) {
135  sprintf (str, "Full my_priority %0.0f, ", my_priority);
136  seam->Print(str);
137  }
138 
139  if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
140  my_priority < chop_ok_split) {
141  /* No crossing */
142  if (seam->IsHealthy(*blob, chop_min_outline_points,
144  delete *seam_result;
145  *seam_result = new SEAM(*seam);
146  (*seam_result)->set_priority(my_priority);
147  } else {
148  delete seam;
149  seam = nullptr;
150  my_priority = BAD_PRIORITY;
151  }
152  }
153 
154  if (my_priority < chop_good_split) {
155  delete seam;
156  return; /* Made good answer */
157  }
158 
159  if (seam) {
160  /* Combine with others */
161  if (seam_pile->size() < chop_seam_pile_size) {
162  combine_seam(*seam_pile, seam, seam_queue);
163  SeamDecPair pair(seam_pair.key(), seam);
164  seam_pile->Push(&pair);
165  } else if (chop_new_seam_pile &&
166  seam_pile->size() == chop_seam_pile_size &&
167  seam_pile->PeekTop().key() > seam_pair.key()) {
168  combine_seam(*seam_pile, seam, seam_queue);
169  SeamDecPair pair;
170  seam_pile->Pop(&pair); // pop the worst.
171  // Replace the seam in pair (deleting the old one) with
172  // the new seam and score, then push back into the heap.
173  pair.set_key(seam_pair.key());
174  pair.set_data(seam);
175  seam_pile->Push(&pair);
176  } else {
177  delete seam;
178  }
179  }
180 
181  my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
182  : seam_queue->PeekTop().key();
183  if ((my_priority > chop_ok_split) ||

◆ chop_numbered_blob()

SEAM * tesseract::Wordrec::chop_numbered_blob ( TWERD word,
int32_t  blob_number,
bool  italic_blob,
const GenericVector< SEAM * > &  seams 
)

Definition at line 263 of file chopper.cpp.

266  {
267  return attempt_blob_chop(word, word->blobs[blob_number], blob_number,
268  italic_blob, seams);

◆ chop_one_blob()

SEAM * tesseract::Wordrec::chop_one_blob ( const GenericVector< TBOX > &  boxes,
const GenericVector< BLOB_CHOICE * > &  blob_choices,
WERD_RES word_res,
int *  blob_number 
)

Definition at line 369 of file chopper.cpp.

373  {
374  if (prioritize_division) {
375  return chop_overlapping_blob(boxes, true, word_res, blob_number);
376  } else {
377  return improve_one_blob(blob_choices, nullptr, false, true, word_res,
378  blob_number);
379  }

◆ chop_overlapping_blob()

SEAM * tesseract::Wordrec::chop_overlapping_blob ( const GenericVector< TBOX > &  boxes,
bool  italic_blob,
WERD_RES word_res,
int *  blob_number 
)

Definition at line 271 of file chopper.cpp.

274  {
275  TWERD *word = word_res->chopped_word;
276  for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
277  TBLOB *blob = word->blobs[*blob_number];
278  TPOINT topleft, botright;
279  topleft.x = blob->bounding_box().left();
280  topleft.y = blob->bounding_box().top();
281  botright.x = blob->bounding_box().right();
282  botright.y = blob->bounding_box().bottom();
283 
284  TPOINT original_topleft, original_botright;
285  word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft);
286  word_res->denorm.DenormTransform(nullptr, botright, &original_botright);
287 
288  TBOX original_box = TBOX(original_topleft.x, original_botright.y,
289  original_botright.x, original_topleft.y);
290 
291  bool almost_equal_box = false;
292  int num_overlap = 0;
293  for (int i = 0; i < boxes.size(); i++) {
294  if (original_box.overlap_fraction(boxes[i]) > 0.125)
295  num_overlap++;
296  if (original_box.almost_equal(boxes[i], 3))
297  almost_equal_box = true;
298  }
299 
300  TPOINT location;
301  if (divisible_blob(blob, italic_blob, &location) ||
302  (!almost_equal_box && num_overlap > 1)) {
303  SEAM *seam = attempt_blob_chop(word, blob, *blob_number,
304  italic_blob, word_res->seam_array);
305  if (seam != nullptr)
306  return seam;
307  }
308  }
309 
310  *blob_number = -1;
311  return nullptr;

◆ chop_word_main()

void tesseract::Wordrec::chop_word_main ( WERD_RES word)

Definition at line 389 of file chopper.cpp.

390  {
391  int num_blobs = word->chopped_word->NumBlobs();
392  if (word->ratings == nullptr) {
393  word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
394  }
395  if (word->ratings->get(0, 0) == nullptr) {
396  // Run initial classification.
397  for (int b = 0; b < num_blobs; ++b) {
398  BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
399  "Initial:", word->chopped_word,
400  word->blamer_bundle);
401  word->ratings->put(b, b, choices);
402  }
403  } else {
404  // Blobs have been pre-classified. Set matrix cell for all blob choices
405  for (int col = 0; col < word->ratings->dimension(); ++col) {
406  for (int row = col; row < word->ratings->dimension() &&
407  row < col + word->ratings->bandwidth(); ++row) {
408  BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
409  if (choices != nullptr) {
410  BLOB_CHOICE_IT bc_it(choices);
411  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
412  bc_it.data()->set_matrix_cell(col, row);
413  }
414  }
415  }
416  }
417  }
418 
419  // Run Segmentation Search.
420  BestChoiceBundle best_choice_bundle(word->ratings->dimension());
421  SegSearch(word, &best_choice_bundle, word->blamer_bundle);
422 
423  if (word->best_choice == nullptr) {
424  // SegSearch found no valid paths, so just use the leading diagonal.
426  }
427  word->RebuildBestState();
428  // If we finished without a hyphen at the end of the word, let the next word
429  // be found in the dictionary.
430  if (word->word->flag(W_EOL) &&
431  !getDict().has_hyphen_end(*word->best_choice)) {
432  getDict().reset_hyphen_vars(true);
433  }
434 
435  if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) {
436  CallFillLattice(*word->ratings, word->best_choices,
437  *word->uch_set, word->blamer_bundle);
438  }
439  if (wordrec_debug_level > 0) {
440  tprintf("Final Ratings Matrix:\n");
441  word->ratings->print(getDict().getUnicharset());
442  }
443  word->FilterWordChoices(getDict().stopper_debug_level);

◆ classify_blob()

BLOB_CHOICE_LIST * tesseract::Wordrec::classify_blob ( TBLOB blob,
const char *  string,
C_COL  color,
BlamerBundle blamer_bundle 
)

Definition at line 52 of file wordclass.cpp.

55  {
56 #ifndef GRAPHICS_DISABLED
58  display_blob(blob, color);
59 #endif
60  // TODO(rays) collapse with call_matcher and move all to wordrec.cpp.
61  BLOB_CHOICE_LIST* choices = call_matcher(blob);
62  // If a blob with the same bounding box as one of the truth character
63  // bounding boxes is not classified as the corresponding truth character
64  // blame character classifier for incorrect answer.
65  if (blamer_bundle != nullptr) {
66  blamer_bundle->BlameClassifier(getDict().getUnicharset(),
67  blob->bounding_box(),
68  *choices,
70  }
71  #ifndef GRAPHICS_DISABLED
72  if (classify_debug_level && string)
73  print_ratings_list(string, choices, getDict().getUnicharset());
74 
77 #endif
78 
79  return choices;

◆ classify_piece()

BLOB_CHOICE_LIST * tesseract::Wordrec::classify_piece ( const GenericVector< SEAM * > &  seams,
int16_t  start,
int16_t  end,
const char *  description,
TWERD word,
BlamerBundle blamer_bundle 
)
virtual

Definition at line 52 of file pieces.cpp.

54  {
55  if (end > start) SEAM::JoinPieces(seams, word->blobs, start, end);
56  BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description,
57  White, blamer_bundle);
58  // Set the matrix_cell_ entries in all the BLOB_CHOICES.
59  BLOB_CHOICE_IT bc_it(choices);
60  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
61  bc_it.data()->set_matrix_cell(start, end);
62  }
63 
64  if (end > start) SEAM::BreakPieces(seams, word->blobs, start, end);
65 
66  return (choices);
67 }
68 
69 template<class BLOB_CHOICE>
70 int SortByUnicharID(const void *void1, const void *void2) {

◆ combine_seam()

void tesseract::Wordrec::combine_seam ( const SeamPile seam_pile,
const SEAM seam,
SeamQueue seam_queue 
)

Definition at line 192 of file findseam.cpp.

198  {
199  for (int x = 0; x < seam_pile.size(); ++x) {
200  const SEAM *this_one = seam_pile.get(x).data();
201  if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
202  SEAM *new_one = new SEAM(*seam);
203  new_one->CombineWith(*this_one);

◆ dict_word()

int tesseract::Wordrec::dict_word ( const WERD_CHOICE word)

Definition at line 103 of file tface.cpp.

◆ DoSegSearch()

void tesseract::Wordrec::DoSegSearch ( WERD_RES word_res)

Definition at line 35 of file segsearch.cpp.

35  {
36  BestChoiceBundle best_choice_bundle(word_res->ratings->dimension());
37  // Run Segmentation Search.
38  SegSearch(word_res, &best_choice_bundle, nullptr);
39 }

◆ end_recog()

int tesseract::Wordrec::end_recog ( )

Definition at line 76 of file tface.cpp.

◆ fill_filtered_fragment_list()

void tesseract::Wordrec::fill_filtered_fragment_list ( BLOB_CHOICE_LIST *  choices,
int  fragment_pos,
int  num_frag_parts,
BLOB_CHOICE_LIST *  filtered_choices 
)

Definition at line 100 of file pieces.cpp.

101  {
102  BLOB_CHOICE_IT filtered_choices_it(filtered_choices);
103  BLOB_CHOICE_IT choices_it(choices);
104 
105  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
106  choices_it.forward()) {
107  UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
108  const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id);
109 
110  if (frag != nullptr && frag->get_pos() == fragment_pos &&
111  frag->get_total() == num_frag_parts) {
112  // Recover the unichar_id of the unichar that this fragment is
113  // a part of
114  auto *b = new BLOB_CHOICE(*choices_it.data());
115  int original_unichar = unicharset.unichar_to_id(frag->get_unichar());
116  b->set_unichar_id(original_unichar);
117  filtered_choices_it.add_to_end(b);
118  }
119  }
120 
121  filtered_choices->sort(SortByUnicharID<BLOB_CHOICE>);
122 }
123 
124 

◆ FillLattice()

void tesseract::Wordrec::FillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)

◆ get_fragment_lists()

void tesseract::Wordrec::get_fragment_lists ( int16_t  current_frag,
int16_t  current_row,
int16_t  start,
int16_t  num_frag_parts,
int16_t  num_blobs,
MATRIX ratings,
BLOB_CHOICE_LIST *  choice_lists 
)

Definition at line 274 of file pieces.cpp.

277  {
278  if (current_frag == num_frag_parts) {
279  merge_and_put_fragment_lists(start, current_row - 1, num_frag_parts,
280  choice_lists, ratings);
281  return;
282  }
283 
284  for (int16_t x = current_row; x < num_blobs; x++) {
285  BLOB_CHOICE_LIST *choices = ratings->get(current_row, x);
286  if (choices == nullptr)
287  continue;
288 
289  fill_filtered_fragment_list(choices, current_frag, num_frag_parts,
290  &choice_lists[current_frag]);
291  if (!choice_lists[current_frag].empty()) {
292  get_fragment_lists(current_frag + 1, x + 1, start, num_frag_parts,
293  num_blobs, ratings, choice_lists);
294  choice_lists[current_frag].clear();
295  }
296  }
297 }

◆ grade_sharpness()

PRIORITY tesseract::Wordrec::grade_sharpness ( SPLIT split)

Definition at line 81 of file gradechop.cpp.

◆ grade_split_length()

PRIORITY tesseract::Wordrec::grade_split_length ( SPLIT split)

Definition at line 59 of file gradechop.cpp.

68  {
69  PRIORITY grade;
70 
71  grade = point_priority (split->point1) + point_priority (split->point2);
72 

◆ improve_by_chopping()

void tesseract::Wordrec::improve_by_chopping ( float  rating_cert_scale,
WERD_RES word,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle,
LMPainPoints pain_points,
GenericVector< SegSearchPending > *  pending 
)

Definition at line 452 of file chopper.cpp.

458  {
459  int blob_number;
460  do { // improvement loop.
461  // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
462  // one to chop.
463  GenericVector<BLOB_CHOICE*> blob_choices;
464  int num_blobs = word->ratings->dimension();
465  for (int i = 0; i < num_blobs; ++i) {
466  BLOB_CHOICE_LIST* choices = word->ratings->get(i, i);
467  if (choices == nullptr || choices->empty()) {
468  blob_choices.push_back(nullptr);
469  } else {
470  BLOB_CHOICE_IT bc_it(choices);
471  blob_choices.push_back(bc_it.data());
472  }
473  }
474  SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt,
475  false, false, word, &blob_number);
476  if (seam == nullptr) break;
477  // A chop has been made. We have to correct all the data structures to
478  // take into account the extra bottom-level blob.
479  // Put the seam into the seam_array and correct everything else on the
480  // word: ratings matrix (including matrix location in the BLOB_CHOICES),
481  // states in WERD_CHOICEs, and blob widths.
482  word->InsertSeam(blob_number, seam);
483  // Insert a new entry in the beam array.
484  best_choice_bundle->beam.insert(new LanguageModelState, blob_number);
485  // Fixpts are outdated, but will get recalculated.
486  best_choice_bundle->fixpt.clear();
487  // Remap existing pain points.
488  pain_points->RemapForSplit(blob_number);
489  // Insert a new pending at the chop point.
490  pending->insert(SegSearchPending(), blob_number);
491 
492  // Classify the two newly created blobs using ProcessSegSearchPainPoint,
493  // as that updates the pending correctly and adds new pain points.
494  MATRIX_COORD pain_point(blob_number, blob_number);
495  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word,
496  pain_points, blamer_bundle);
497  pain_point.col = blob_number + 1;
498  pain_point.row = blob_number + 1;
499  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word,
500  pain_points, blamer_bundle);
501  if (language_model_->language_model_ngram_on) {
502  // N-gram evaluation depends on the number of blobs in a chunk, so we
503  // have to re-evaluate everything in the word.
504  ResetNGramSearch(word, best_choice_bundle, pending);
505  blob_number = 0;
506  }
507  // Run language model incrementally. (Except with the n-gram model on.)
508  UpdateSegSearchNodes(rating_cert_scale, blob_number, pending,
509  word, pain_points, best_choice_bundle, blamer_bundle);
510  } while (!language_model_->AcceptableChoiceFound() &&
511  word->ratings->dimension() < kMaxNumChunks);
512 
513  // If after running only the chopper best_choice is incorrect and no blame
514  // has been yet set, blame the classifier if best_choice is classifier's
515  // top choice and is a dictionary word (i.e. language model could not have
516  // helped). Otherwise blame the tradeoff between the classifier and
517  // the old language model (permuters).
518  if (word->blamer_bundle != nullptr &&
520  !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
521  bool valid_permuter = word->best_choice != nullptr &&
524  getDict().getUnicharset(),
525  valid_permuter,
527  }

◆ improve_one_blob()

SEAM * tesseract::Wordrec::improve_one_blob ( const GenericVector< BLOB_CHOICE * > &  blob_choices,
DANGERR fixpt,
bool  split_next_to_fragment,
bool  italic_blob,
WERD_RES word,
int *  blob_number 
)

Definition at line 325 of file chopper.cpp.

331  {
332  float rating_ceiling = FLT_MAX;
333  SEAM *seam = nullptr;
334  do {
335  *blob_number = select_blob_to_split_from_fixpt(fixpt);
336  if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number);
337  bool split_point_from_dict = (*blob_number != -1);
338  if (split_point_from_dict) {
339  fixpt->clear();
340  } else {
341  *blob_number = select_blob_to_split(blob_choices, rating_ceiling,
342  split_next_to_fragment);
343  }
344  if (chop_debug) tprintf("blob_number = %d\n", *blob_number);
345  if (*blob_number == -1)
346  return nullptr;
347 
348  // TODO(rays) it may eventually help to allow italic_blob to be true,
349  seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob,
350  word->seam_array);
351  if (seam != nullptr)
352  return seam; // Success!
353  if (blob_choices[*blob_number] == nullptr)
354  return nullptr;
355  if (!split_point_from_dict) {
356  // We chopped the worst rated blob, try something else next time.
357  rating_ceiling = blob_choices[*blob_number]->rating();
358  }
359  } while (true);
360  return seam;

◆ InitBlamerForSegSearch()

void tesseract::Wordrec::InitBlamerForSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle,
STRING blamer_debug 
)
protected

Definition at line 327 of file segsearch.cpp.

330  {
331  pain_points->Clear(); // Clear pain points heap.
332  blamer_bundle->InitForSegSearch(word_res->best_choice, word_res->ratings,
333  getDict().WildcardID(), wordrec_debug_blamer,
334  blamer_debug, pain_points,
335  segsearch_max_char_wh_ratio, word_res);
336 }

◆ InitialSegSearch()

void tesseract::Wordrec::InitialSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
GenericVector< SegSearchPending > *  pending,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

Definition at line 135 of file segsearch.cpp.

138  {
139  if (segsearch_debug_level > 0) {
140  tprintf("Starting SegSearch on ratings matrix%s:\n",
141  wordrec_enable_assoc ? " (with assoc)" : "");
142  word_res->ratings->print(getDict().getUnicharset());
143  }
144 
145  pain_points->GenerateInitial(word_res);
146 
147  // Compute scaling factor that will help us recover blob outline length
148  // from classifier rating and certainty for the blob.
149  float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
150 
153  segsearch_max_char_wh_ratio, rating_cert_scale);
154 
155  // Initialize blamer-related information: map character boxes recorded in
156  // blamer_bundle->norm_truth_word to the corresponding i,j indices in the
157  // ratings matrix. We expect this step to succeed, since when running the
158  // chopper we checked that the correct chops are present.
159  if (blamer_bundle != nullptr) {
160  blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word,
162  }
163 
164  // pending[col] tells whether there is update work to do to combine
165  // best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *].
166  // As the language model state is updated, pending entries are modified to
167  // minimize duplication of work. It is important that during the update the
168  // children are considered in the non-decreasing order of their column, since
169  // this guarantees that all the parents would be up to date before an update
170  // of a child is done.
171  pending->init_to_size(word_res->ratings->dimension(), SegSearchPending());
172 
173  // Search the ratings matrix for the initial best path.
174  (*pending)[0].SetColumnClassified();
175  UpdateSegSearchNodes(rating_cert_scale, 0, pending, word_res,
176  pain_points, best_choice_bundle, blamer_bundle);
177 }

◆ is_inside_angle()

bool tesseract::Wordrec::is_inside_angle ( EDGEPT pt)

Definition at line 89 of file chop.cpp.

89  {
90  return angle_change(pt->prev, pt, pt->next) < chop_inside_angle;
91 }

◆ merge_and_put_fragment_lists()

void tesseract::Wordrec::merge_and_put_fragment_lists ( int16_t  row,
int16_t  column,
int16_t  num_frag_parts,
BLOB_CHOICE_LIST *  choice_lists,
MATRIX ratings 
)

Definition at line 132 of file pieces.cpp.

134  {
135  auto *choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts];
136 
137  for (int i = 0; i < num_frag_parts; i++) {
138  choice_lists_it[i].set_to_list(&choice_lists[i]);
139  choice_lists_it[i].mark_cycle_pt();
140  }
141 
142  BLOB_CHOICE_LIST *merged_choice = ratings->get(row, column);
143  if (merged_choice == nullptr)
144  merged_choice = new BLOB_CHOICE_LIST;
145 
146  bool end_of_list = false;
147  BLOB_CHOICE_IT merged_choice_it(merged_choice);
148  while (!end_of_list) {
149  // Find the maximum unichar_id of the current entry the iterators
150  // are pointing at
151  UNICHAR_ID max_unichar_id = choice_lists_it[0].data()->unichar_id();
152  for (int i = 0; i < num_frag_parts; i++) {
153  UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
154  if (max_unichar_id < unichar_id) {
155  max_unichar_id = unichar_id;
156  }
157  }
158 
159  // Move the each iterators until it gets to an entry that has a
160  // value greater than or equal to max_unichar_id
161  for (int i = 0; i < num_frag_parts; i++) {
162  UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
163  while (!choice_lists_it[i].cycled_list() &&
164  unichar_id < max_unichar_id) {
165  choice_lists_it[i].forward();
166  unichar_id = choice_lists_it[i].data()->unichar_id();
167  }
168  if (choice_lists_it[i].cycled_list()) {
169  end_of_list = true;
170  break;
171  }
172  }
173 
174  if (end_of_list)
175  break;
176 
177  // Checks if the fragments are parts of the same character
178  UNICHAR_ID first_unichar_id = choice_lists_it[0].data()->unichar_id();
179  bool same_unichar = true;
180  for (int i = 1; i < num_frag_parts; i++) {
181  UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
182  if (unichar_id != first_unichar_id) {
183  same_unichar = false;
184  break;
185  }
186  }
187 
188  if (same_unichar) {
189  // Add the merged character to the result
190  UNICHAR_ID merged_unichar_id = first_unichar_id;
191  GenericVector<ScoredFont> merged_fonts =
192  choice_lists_it[0].data()->fonts();
193  float merged_min_xheight = choice_lists_it[0].data()->min_xheight();
194  float merged_max_xheight = choice_lists_it[0].data()->max_xheight();
195  float positive_yshift = 0, negative_yshift = 0;
196  int merged_script_id = choice_lists_it[0].data()->script_id();
197  BlobChoiceClassifier classifier = choice_lists_it[0].data()->classifier();
198 
199  float merged_rating = 0, merged_certainty = 0;
200  for (int i = 0; i < num_frag_parts; i++) {
201  float rating = choice_lists_it[i].data()->rating();
202  float certainty = choice_lists_it[i].data()->certainty();
203 
204  if (i == 0 || certainty < merged_certainty)
205  merged_certainty = certainty;
206  merged_rating += rating;
207 
208  choice_lists_it[i].forward();
209  if (choice_lists_it[i].cycled_list())
210  end_of_list = true;
211  IntersectRange(choice_lists_it[i].data()->min_xheight(),
212  choice_lists_it[i].data()->max_xheight(),
213  &merged_min_xheight, &merged_max_xheight);
214  float yshift = choice_lists_it[i].data()->yshift();
215  if (yshift > positive_yshift) positive_yshift = yshift;
216  if (yshift < negative_yshift) negative_yshift = yshift;
217  // Use the min font rating over the parts.
218  // TODO(rays) font lists are unsorted. Need to be faster?
219  const GenericVector<ScoredFont>& frag_fonts =
220  choice_lists_it[i].data()->fonts();
221  for (int f = 0; f < frag_fonts.size(); ++f) {
222  int merged_f = 0;
223  for (merged_f = 0; merged_f < merged_fonts.size() &&
224  merged_fonts[merged_f].fontinfo_id != frag_fonts[f].fontinfo_id;
225  ++merged_f) {}
226  if (merged_f == merged_fonts.size()) {
227  merged_fonts.push_back(frag_fonts[f]);
228  } else if (merged_fonts[merged_f].score > frag_fonts[f].score) {
229  merged_fonts[merged_f].score = frag_fonts[f].score;
230  }
231  }
232  }
233 
234  float merged_yshift = positive_yshift != 0
235  ? (negative_yshift != 0 ? 0 : positive_yshift)
236  : negative_yshift;
237  auto* choice = new BLOB_CHOICE(merged_unichar_id,
238  merged_rating,
239  merged_certainty,
240  merged_script_id,
241  merged_min_xheight,
242  merged_max_xheight,
243  merged_yshift,
244  classifier);
245  choice->set_fonts(merged_fonts);
246  merged_choice_it.add_to_end(choice);
247  }
248  }
249 
251  print_ratings_list("Merged Fragments", merged_choice,
252  unicharset);
253 
254  if (merged_choice->empty())
255  delete merged_choice;
256  else
257  ratings->put(row, column, merged_choice);
258 
259  delete [] choice_lists_it;
260 }
261 

◆ merge_fragments()

void tesseract::Wordrec::merge_fragments ( MATRIX ratings,
int16_t  num_blobs 
)

Definition at line 305 of file pieces.cpp.

306  {
307  BLOB_CHOICE_LIST choice_lists[CHAR_FRAGMENT::kMaxChunks];
308  for (int16_t start = 0; start < num_blobs; start++) {
309  for (int frag_parts = 2; frag_parts <= CHAR_FRAGMENT::kMaxChunks;
310  frag_parts++) {
311  get_fragment_lists(0, start, start, frag_parts, num_blobs,
312  ratings, choice_lists);
313  }
314  }
315 
316  // Delete fragments from the rating matrix
317  for (int16_t x = 0; x < num_blobs; x++) {
318  for (int16_t y = x; y < num_blobs; y++) {
319  BLOB_CHOICE_LIST *choices = ratings->get(x, y);
320  if (choices != nullptr) {
321  BLOB_CHOICE_IT choices_it(choices);
322  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
323  choices_it.forward()) {
324  UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
325  const CHAR_FRAGMENT *frag =
326  unicharset.get_fragment(choice_unichar_id);
327  if (frag != nullptr)
328  delete choices_it.extract();
329  }
330  }
331  }
332  }

◆ near_point()

bool tesseract::Wordrec::near_point ( EDGEPT point,
EDGEPT line_pt_0,
EDGEPT line_pt_1,
EDGEPT **  near_pt 
)

Definition at line 50 of file outlines.cpp.

54  {
55  /* Slope and intercept */
56  slope = (y0 - y1) / (x0 - x1);
57  intercept = y1 - x1 * slope;
58 
59  /* Find perpendicular */
60  p.x = static_cast<int16_t>((point->pos.x + (point->pos.y - intercept) * slope) /
61  (slope * slope + 1));
62  p.y = static_cast<int16_t>(slope * p.x + intercept);
63  }
64 
65  if (is_on_line (p, line_pt_0->pos, line_pt_1->pos) &&
66  (!same_point (p, line_pt_0->pos)) && (!same_point (p, line_pt_1->pos))) {
67  /* Intersection on line */
68  *near_pt = make_edgept(p.x, p.y, line_pt_1, line_pt_0);
69  return true;
70  } else { /* Intersection not on line */
71  *near_pt = closest(point, line_pt_0, line_pt_1);
72  return false;
73  }
74 }
75 
76 } // namespace tesseract

◆ new_max_point()

void tesseract::Wordrec::new_max_point ( EDGEPT local_max,
PointHeap points 
)

Definition at line 255 of file chop.cpp.

255  {
256  int16_t dir;
257 
258  dir = direction (local_max);
259 
260  if (dir > 0) {
261  add_point_to_list(points, local_max);
262  return;
263  }
264 
265  if (dir == 0 && point_priority (local_max) < 0) {
266  add_point_to_list(points, local_max);
267  return;
268  }
269 }

◆ new_min_point()

void tesseract::Wordrec::new_min_point ( EDGEPT local_min,
PointHeap points 
)

Definition at line 231 of file chop.cpp.

231  {
232  int16_t dir;
233 
234  dir = direction (local_min);
235 
236  if (dir < 0) {
237  add_point_to_list(points, local_min);
238  return;
239  }
240 
241  if (dir == 0 && point_priority (local_min) < 0) {
242  add_point_to_list(points, local_min);
243  return;
244  }
245 }

◆ pick_close_point()

EDGEPT * tesseract::Wordrec::pick_close_point ( EDGEPT critical_point,
EDGEPT vertical_point,
int *  best_dist 
)

Definition at line 134 of file chop.cpp.

136  {
137  EDGEPT *best_point = nullptr;
138  int this_distance;
139  int found_better;
140 
141  do {
142  found_better = false;
143 
144  this_distance = edgept_dist (critical_point, vertical_point);
145  if (this_distance <= *best_dist) {
146 
147  if (!(same_point (critical_point->pos, vertical_point->pos) ||
148  same_point (critical_point->pos, vertical_point->next->pos) ||
149  (best_point && same_point (best_point->pos, vertical_point->pos)) ||
150  is_exterior_point (critical_point, vertical_point))) {
151  *best_dist = this_distance;
152  best_point = vertical_point;
154  found_better = true;
155  }
156  }
157  vertical_point = vertical_point->next;
158  }
159  while (found_better == true);
160 
161  return (best_point);
162 }

◆ pick_good_seam()

SEAM * tesseract::Wordrec::pick_good_seam ( TBLOB blob)

Definition at line 210 of file findseam.cpp.

216  {
217  SeamPile seam_pile(chop_seam_pile_size);
218  EDGEPT *points[MAX_NUM_POINTS];
219  EDGEPT_CLIST new_points;
220  SEAM *seam = nullptr;
221  TESSLINE *outline;
222  int16_t num_points = 0;
223 
224 #ifndef GRAPHICS_DISABLED
225  if (chop_debug > 2)
226  wordrec_display_splits.set_value(true);
227 
228  draw_blob_edges(blob);
229 #endif
230 
231  PointHeap point_heap(MAX_NUM_POINTS);
232  for (outline = blob->outlines; outline; outline = outline->next)
233  prioritize_points(outline, &point_heap);
234 
235  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
236  points[num_points++] = point_heap.PeekTop().data;
237  point_heap.Pop(nullptr);
238  }
239 
240  /* Initialize queue */
241  SeamQueue seam_queue(MAX_NUM_SEAMS);
242 
243  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
244  try_vertical_splits(points, num_points, &new_points,
245  &seam_queue, &seam_pile, &seam, blob);
246 
247  if (seam == nullptr) {
248  choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
249  } else if (seam->priority() > chop_good_split) {
250  choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob,
251  &seam_pile);
252  }
253 
254  EDGEPT_C_IT it(&new_points);
255  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256  EDGEPT *inserted_point = it.data();
257  if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
258  for (outline = blob->outlines; outline; outline = outline->next) {
259  if (outline->loop == inserted_point) {
260  outline->loop = outline->loop->next;
261  }
262  }
263  remove_edgept(inserted_point);
264  }
265  }
266 
267  if (seam) {
268  if (seam->priority() > chop_ok_split) {
269  delete seam;
270  seam = nullptr;
271  }
272 #ifndef GRAPHICS_DISABLED
273  else if (wordrec_display_splits) {
274  seam->Mark(edge_window);
275  if (chop_debug > 2) {
278  }
279  }
280 #endif
281  }

◆ point_priority()

PRIORITY tesseract::Wordrec::point_priority ( EDGEPT point)

Definition at line 65 of file chop.cpp.

65  {
66  return static_cast<PRIORITY>(angle_change(point->prev, point, point->next));
67 }

◆ prioritize_points()

void tesseract::Wordrec::prioritize_points ( TESSLINE outline,
PointHeap points 
)

Definition at line 172 of file chop.cpp.

172  {
173  EDGEPT *this_point;
174  EDGEPT *local_min = nullptr;
175  EDGEPT *local_max = nullptr;
176 
177  this_point = outline->loop;
178  local_min = this_point;
179  local_max = this_point;
180  do {
181  if (this_point->vec.y < 0) {
182  /* Look for minima */
183  if (local_max != nullptr)
184  new_max_point(local_max, points);
185  else if (is_inside_angle (this_point))
186  add_point_to_list(points, this_point);
187  local_max = nullptr;
188  local_min = this_point->next;
189  }
190  else if (this_point->vec.y > 0) {
191  /* Look for maxima */
192  if (local_min != nullptr)
193  new_min_point(local_min, points);
194  else if (is_inside_angle (this_point))
195  add_point_to_list(points, this_point);
196  local_min = nullptr;
197  local_max = this_point->next;
198  }
199  else {
200  /* Flat area */
201  if (local_max != nullptr) {
202  if (local_max->prev->vec.y != 0) {
203  new_max_point(local_max, points);
204  }
205  local_max = this_point->next;
206  local_min = nullptr;
207  }
208  else {
209  if (local_min->prev->vec.y != 0) {
210  new_min_point(local_min, points);
211  }
212  local_min = this_point->next;
213  local_max = nullptr;
214  }
215  }
216 
217  /* Next point */
218  this_point = this_point->next;
219  }
220  while (this_point != outline->loop);
221 }

◆ ProcessSegSearchPainPoint()

void tesseract::Wordrec::ProcessSegSearchPainPoint ( float  pain_point_priority,
const MATRIX_COORD pain_point,
const char *  pain_point_type,
GenericVector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle 
)
protected

Definition at line 247 of file segsearch.cpp.

251  {
252  if (segsearch_debug_level > 0) {
253  tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n",
254  pain_point_type, pain_point_priority,
255  pain_point.col, pain_point.row);
256  }
257  ASSERT_HOST(pain_points != nullptr);
258  MATRIX *ratings = word_res->ratings;
259  // Classify blob [pain_point.col pain_point.row]
260  if (!pain_point.Valid(*ratings)) {
261  ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col);
262  }
263  ASSERT_HOST(pain_point.Valid(*ratings));
264  BLOB_CHOICE_LIST *classified = classify_piece(word_res->seam_array,
265  pain_point.col, pain_point.row,
266  pain_point_type,
267  word_res->chopped_word,
268  blamer_bundle);
269  BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row);
270  if (lst == nullptr) {
271  ratings->put(pain_point.col, pain_point.row, classified);
272  } else {
273  // We can not delete old BLOB_CHOICEs, since they might contain
274  // ViterbiStateEntries that are parents of other "active" entries.
275  // Thus if the matrix cell already contains classifications we add
276  // the new ones to the beginning of the list.
277  BLOB_CHOICE_IT it(lst);
278  it.add_list_before(classified);
279  delete classified; // safe to delete, since empty after add_list_before()
280  classified = nullptr;
281  }
282 
283  if (segsearch_debug_level > 0) {
284  print_ratings_list("Updated ratings matrix with a new entry:",
285  ratings->get(pain_point.col, pain_point.row),
286  getDict().getUnicharset());
287  ratings->print(getDict().getUnicharset());
288  }
289 
290  // Insert initial "pain points" to join the newly classified blob
291  // with its left and right neighbors.
292  if (classified != nullptr && !classified->empty()) {
293  if (pain_point.col > 0) {
294  pain_points->GeneratePainPoint(
295  pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0,
296  true, segsearch_max_char_wh_ratio, word_res);
297  }
298  if (pain_point.row + 1 < ratings->dimension()) {
299  pain_points->GeneratePainPoint(
300  pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0,
301  true, segsearch_max_char_wh_ratio, word_res);
302  }
303  }
304  (*pending)[pain_point.col].SetBlobClassified(pain_point.row);
305 }

◆ program_editdown()

void tesseract::Wordrec::program_editdown ( int32_t  elasped_time)

Definition at line 89 of file tface.cpp.

89  {
90  return getDict().valid_word(word);
91 }
92 
93 
94 #ifndef DISABLED_LEGACY_ENGINE

◆ program_editup()

void tesseract::Wordrec::program_editup ( const char *  textbase,
TessdataManager init_classifier,
TessdataManager init_dict 
)

Definition at line 54 of file tface.cpp.

62  {
63  program_editdown (0);
64 
65  return (0);
66 }
67 
68 

◆ ResetNGramSearch()

void tesseract::Wordrec::ResetNGramSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
GenericVector< SegSearchPending > *  pending 
)
protected

Definition at line 310 of file segsearch.cpp.

312  {
313  // TODO(rays) More refactoring required here.
314  // Delete existing viterbi states.
315  for (int col = 0; col < best_choice_bundle->beam.size(); ++col) {
316  best_choice_bundle->beam[col]->Clear();
317  }
318  // Reset best_choice_bundle.
319  word_res->ClearWordChoices();
320  best_choice_bundle->best_vse = nullptr;
321  // Clear out all existing pendings and add a new one for the first column.
322  (*pending)[0].SetColumnClassified();
323  for (int i = 1; i < pending->size(); ++i)
324  (*pending)[i].Clear();
325 }

◆ SaveAltChoices()

void tesseract::Wordrec::SaveAltChoices ( const LIST best_choices,
WERD_RES word 
)

◆ SegSearch()

void tesseract::Wordrec::SegSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

Definition at line 41 of file segsearch.cpp.

43  {
44  LMPainPoints pain_points(segsearch_max_pain_points,
48  // Compute scaling factor that will help us recover blob outline length
49  // from classifier rating and certainty for the blob.
50  float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
52  InitialSegSearch(word_res, &pain_points, &pending, best_choice_bundle,
53  blamer_bundle);
54 
55  if (!SegSearchDone(0)) { // find a better choice
56  if (chop_enable && word_res->chopped_word != nullptr) {
57  improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle,
58  blamer_bundle, &pain_points, &pending);
59  }
60  if (chop_debug) SEAM::PrintSeams("Final seam list:", word_res->seam_array);
61 
62  if (blamer_bundle != nullptr &&
63  !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) {
64  blamer_bundle->SetChopperBlame(word_res, wordrec_debug_blamer);
65  }
66  }
67  // Keep trying to find a better path by fixing the "pain points".
68 
69  MATRIX_COORD pain_point;
70  float pain_point_priority;
71  int num_futile_classifications = 0;
72  STRING blamer_debug;
73  while (wordrec_enable_assoc &&
74  (!SegSearchDone(num_futile_classifications) ||
75  (blamer_bundle != nullptr &&
76  blamer_bundle->GuidedSegsearchStillGoing()))) {
77  // Get the next valid "pain point".
78  bool found_nothing = true;
79  LMPainPointsType pp_type;
80  while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) !=
81  LM_PPTYPE_NUM) {
82  if (!pain_point.Valid(*word_res->ratings)) {
83  word_res->ratings->IncreaseBandSize(
84  pain_point.row - pain_point.col + 1);
85  }
86  if (pain_point.Valid(*word_res->ratings) &&
87  !word_res->ratings->Classified(pain_point.col, pain_point.row,
88  getDict().WildcardID())) {
89  found_nothing = false;
90  break;
91  }
92  }
93  if (found_nothing) {
94  if (segsearch_debug_level > 0) tprintf("Pain points queue is empty\n");
95  break;
96  }
97  ProcessSegSearchPainPoint(pain_point_priority, pain_point,
99  &pending, word_res, &pain_points, blamer_bundle);
100 
101  UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending,
102  word_res, &pain_points, best_choice_bundle,
103  blamer_bundle);
104  if (!best_choice_bundle->updated) ++num_futile_classifications;
105 
106  if (segsearch_debug_level > 0) {
107  tprintf("num_futile_classifications %d\n", num_futile_classifications);
108  }
109 
110  best_choice_bundle->updated = false; // reset updated
111 
112  // See if it's time to terminate SegSearch or time for starting a guided
113  // search for the true path to find the blame for the incorrect best_choice.
114  if (SegSearchDone(num_futile_classifications) &&
115  blamer_bundle != nullptr &&
116  blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) {
117  InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle,
118  &blamer_debug);
119  }
120  } // end while loop exploring alternative paths
121  if (blamer_bundle != nullptr) {
122  blamer_bundle->FinishSegSearch(word_res->best_choice,
123  wordrec_debug_blamer, &blamer_debug);
124  }
125 
126  if (segsearch_debug_level > 0) {
127  tprintf("Done with SegSearch (AcceptableChoiceFound: %d)\n",
128  language_model_->AcceptableChoiceFound());
129  }
130 }

◆ SegSearchDone()

bool tesseract::Wordrec::SegSearchDone ( int  num_futile_classifications)
inlineprotected

Definition at line 486 of file wordrec.h.

486  {
487  return (language_model_->AcceptableChoiceFound() ||
488  num_futile_classifications >=
490  }

◆ select_blob_to_split()

int tesseract::Wordrec::select_blob_to_split ( const GenericVector< BLOB_CHOICE * > &  blob_choices,
float  rating_ceiling,
bool  split_next_to_fragment 
)

Definition at line 535 of file chopper.cpp.

539  {
540  BLOB_CHOICE *blob_choice;
541  int x;
542  float worst = -FLT_MAX;
543  int worst_index = -1;
544  float worst_near_fragment = -FLT_MAX;
545  int worst_index_near_fragment = -1;
546  const CHAR_FRAGMENT **fragments = nullptr;
547 
548  if (chop_debug) {
549  if (rating_ceiling < FLT_MAX)
550  tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
551  else
552  tprintf("rating_ceiling = No Limit\n");
553  }
554 
555  if (split_next_to_fragment && blob_choices.size() > 0) {
556  fragments = new const CHAR_FRAGMENT *[blob_choices.size()];
557  if (blob_choices[0] != nullptr) {
558  fragments[0] = getDict().getUnicharset().get_fragment(
559  blob_choices[0]->unichar_id());
560  } else {
561  fragments[0] = nullptr;
562  }
563  }
564 
565  for (x = 0; x < blob_choices.size(); ++x) {
566  if (blob_choices[x] == nullptr) {
567  delete[] fragments;
568  return x;
569  } else {
570  blob_choice = blob_choices[x];
571  // Populate fragments for the following position.
572  if (split_next_to_fragment && x+1 < blob_choices.size()) {
573  if (blob_choices[x + 1] != nullptr) {
574  fragments[x + 1] = getDict().getUnicharset().get_fragment(
575  blob_choices[x + 1]->unichar_id());
576  } else {
577  fragments[x + 1] = nullptr;
578  }
579  }
580  if (blob_choice->rating() < rating_ceiling &&
581  blob_choice->certainty() < tessedit_certainty_threshold) {
582  // Update worst and worst_index.
583  if (blob_choice->rating() > worst) {
584  worst_index = x;
585  worst = blob_choice->rating();
586  }
587  if (split_next_to_fragment) {
588  // Update worst_near_fragment and worst_index_near_fragment.
589  bool expand_following_fragment =
590  (x + 1 < blob_choices.size() &&
591  fragments[x+1] != nullptr && !fragments[x+1]->is_beginning());
592  bool expand_preceding_fragment =
593  (x > 0 && fragments[x-1] != nullptr && !fragments[x-1]->is_ending());
594  if ((expand_following_fragment || expand_preceding_fragment) &&
595  blob_choice->rating() > worst_near_fragment) {
596  worst_index_near_fragment = x;
597  worst_near_fragment = blob_choice->rating();
598  if (chop_debug) {
599  tprintf("worst_index_near_fragment=%d"
600  " expand_following_fragment=%d"
601  " expand_preceding_fragment=%d\n",
602  worst_index_near_fragment,
603  expand_following_fragment,
604  expand_preceding_fragment);
605  }
606  }
607  }
608  }
609  }
610  }
611  delete[] fragments;
612  // TODO(daria): maybe a threshold of badness for
613  // worst_near_fragment would be useful.
614  return worst_index_near_fragment != -1 ?

◆ select_blob_to_split_from_fixpt()

int tesseract::Wordrec::select_blob_to_split_from_fixpt ( DANGERR fixpt)

Definition at line 622 of file chopper.cpp.

625  {
626  if (!fixpt)
627  return -1;
628  for (int i = 0; i < fixpt->size(); i++) {
629  if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
630  (*fixpt)[i].dangerous &&
631  (*fixpt)[i].correct_is_ngram) {
632  return (*fixpt)[i].begin;
633  }

◆ set_pass1()

void tesseract::Wordrec::set_pass1 ( )

Definition at line 115 of file tface.cpp.

◆ set_pass2()

void tesseract::Wordrec::set_pass2 ( )

Definition at line 127 of file tface.cpp.

◆ try_point_pairs()

void tesseract::Wordrec::try_point_pairs ( EDGEPT points[MAX_NUM_POINTS],
int16_t  num_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

Definition at line 290 of file findseam.cpp.

302  {
303  int16_t x;
304  int16_t y;
305  PRIORITY priority;
306 
307  for (x = 0; x < num_points; x++) {
308  for (y = x + 1; y < num_points; y++) {
309  if (points[y] &&
310  points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
312  points[x] != points[y]->next && points[y] != points[x]->next &&
313  !is_exterior_point(points[x], points[y]) &&
314  !is_exterior_point(points[y], points[x])) {
315  SPLIT split(points[x], points[y]);

◆ try_vertical_splits()

void tesseract::Wordrec::try_vertical_splits ( EDGEPT points[MAX_NUM_POINTS],
int16_t  num_points,
EDGEPT_CLIST *  new_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

Definition at line 327 of file findseam.cpp.

341  {
342  EDGEPT *vertical_point = nullptr;
343  int16_t x;
344  PRIORITY priority;
345  TESSLINE *outline;
346 
347  for (x = 0; x < num_points; x++) {
348  vertical_point = nullptr;
349  for (outline = blob->outlines; outline; outline = outline->next) {
350  vertical_projection_point(points[x], outline->loop,
351  &vertical_point, new_points);
352  }
353 
354  if (vertical_point && points[x] != vertical_point->next &&
355  vertical_point != points[x]->next &&

◆ UpdateSegSearchNodes()

void tesseract::Wordrec::UpdateSegSearchNodes ( float  rating_cert_scale,
int  starting_col,
GenericVector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)
protected

Definition at line 179 of file segsearch.cpp.

186  {
187  MATRIX *ratings = word_res->ratings;
188  ASSERT_HOST(ratings->dimension() == pending->size());
189  ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size());
190  for (int col = starting_col; col < ratings->dimension(); ++col) {
191  if (!(*pending)[col].WorkToDo()) continue;
192  int first_row = col;
193  int last_row = std::min(ratings->dimension() - 1,
194  col + ratings->bandwidth() - 1);
195  if ((*pending)[col].SingleRow() >= 0) {
196  first_row = last_row = (*pending)[col].SingleRow();
197  }
198  if (segsearch_debug_level > 0) {
199  tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n",
200  col, first_row, last_row,
201  (*pending)[col].IsRowJustClassified(INT32_MAX));
202  }
203  // Iterate over the pending list for this column.
204  for (int row = first_row; row <= last_row; ++row) {
205  // Update language model state of this child+parent pair.
206  BLOB_CHOICE_LIST *current_node = ratings->get(col, row);
207  LanguageModelState *parent_node =
208  col == 0 ? nullptr : best_choice_bundle->beam[col - 1];
209  if (current_node != nullptr &&
210  language_model_->UpdateState((*pending)[col].IsRowJustClassified(row),
211  col, row, current_node, parent_node,
212  pain_points, word_res,
213  best_choice_bundle, blamer_bundle) &&
214  row + 1 < ratings->dimension()) {
215  // Since the language model state of this entry changed, process all
216  // the child column.
217  (*pending)[row + 1].RevisitWholeColumn();
218  if (segsearch_debug_level > 0) {
219  tprintf("Added child col=%d to pending\n", row + 1);
220  }
221  } // end if UpdateState.
222  } // end for row.
223  } // end for col.
224  if (best_choice_bundle->best_vse != nullptr) {
225  ASSERT_HOST(word_res->StatesAllValid());
226  if (best_choice_bundle->best_vse->updated) {
227  pain_points->GenerateFromPath(rating_cert_scale,
228  best_choice_bundle->best_vse, word_res);
229  if (!best_choice_bundle->fixpt.empty()) {
230  pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt,
231  best_choice_bundle->best_vse, word_res);
232  }
233  }
234  }
235  // The segsearch is completed. Reset all updated flags on all VSEs and reset
236  // all pendings.
237  for (int col = 0; col < pending->size(); ++col) {
238  (*pending)[col].Clear();
239  ViterbiStateEntry_IT
240  vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries);
241  for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
242  vse_it.data()->updated = false;
243  }
244  }
245 }

◆ vertical_projection_point()

void tesseract::Wordrec::vertical_projection_point ( EDGEPT split_point,
EDGEPT target_point,
EDGEPT **  best_point,
EDGEPT_CLIST *  new_points 
)

Definition at line 284 of file chop.cpp.

286  {
287  EDGEPT *p; /* Iterator */
288  EDGEPT *this_edgept; /* Iterator */
289  EDGEPT_C_IT new_point_it(new_points);
290  int x = split_point->pos.x; /* X value of vertical */
291  int best_dist = LARGE_DISTANCE;/* Best point found */
292 
293  if (*best_point != nullptr)
294  best_dist = edgept_dist(split_point, *best_point);
295 
296  p = target_point;
297  /* Look at each edge point */
298  do {
299  if (((p->pos.x <= x && x <= p->next->pos.x) ||
300  (p->next->pos.x <= x && x <= p->pos.x)) &&
301  !same_point(split_point->pos, p->pos) &&
302  !same_point(split_point->pos, p->next->pos) &&
303  !p->IsChopPt() &&
304  (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) {
305 
306  if (near_point(split_point, p, p->next, &this_edgept)) {
307  new_point_it.add_before_then_move(this_edgept);
308  }
309 
310  if (*best_point == nullptr)
311  best_dist = edgept_dist (split_point, this_edgept);
312 
313  this_edgept =
314  pick_close_point(split_point, this_edgept, &best_dist);
315  if (this_edgept)
316  *best_point = this_edgept;
317  }
318 
319  p = p->next;
320  }
321  while (p != target_point);
322 }

Member Data Documentation

◆ assume_fixed_pitch_char_segment

bool tesseract::Wordrec::assume_fixed_pitch_char_segment = false

"include fixed-pitch heuristics in char segmentation"

Definition at line 225 of file wordrec.h.

◆ blame_reasons_

GenericVector<int> tesseract::Wordrec::blame_reasons_

Definition at line 478 of file wordrec.h.

◆ chop_center_knob

double tesseract::Wordrec::chop_center_knob = 0.15

"Split center adjustment"

Definition at line 216 of file wordrec.h.

◆ chop_centered_maxwidth

int tesseract::Wordrec::chop_centered_maxwidth = 90

"Width of (smaller) chopped blobs " "above which we don't care that a chop is not near the center."

Definition at line 218 of file wordrec.h.

◆ chop_debug

int tesseract::Wordrec::chop_debug = 0

"Chop debug"

Definition at line 204 of file wordrec.h.

◆ chop_enable

bool tesseract::Wordrec::chop_enable = 1

"Chop enable"

Definition at line 205 of file wordrec.h.

◆ chop_good_split

double tesseract::Wordrec::chop_good_split = 50.0

"Good split limit"

Definition at line 222 of file wordrec.h.

◆ chop_inside_angle

int tesseract::Wordrec::chop_inside_angle = -50

"Min Inside Angle Bend"

Definition at line 212 of file wordrec.h.

◆ chop_min_outline_area

int tesseract::Wordrec::chop_min_outline_area = 2000

"Min Outline Area"

Definition at line 213 of file wordrec.h.

◆ chop_min_outline_points

int tesseract::Wordrec::chop_min_outline_points = 6

"Min Number of Points on Outline"

Definition at line 209 of file wordrec.h.

◆ chop_new_seam_pile

bool tesseract::Wordrec::chop_new_seam_pile = 1

"Use new seam_pile"

Definition at line 211 of file wordrec.h.

◆ chop_ok_split

double tesseract::Wordrec::chop_ok_split = 100.0

"OK split limit"

Definition at line 221 of file wordrec.h.

◆ chop_overlap_knob

double tesseract::Wordrec::chop_overlap_knob = 0.9

"Split overlap adjustment"

Definition at line 215 of file wordrec.h.

◆ chop_same_distance

int tesseract::Wordrec::chop_same_distance = 2

"Same distance"

Definition at line 208 of file wordrec.h.

◆ chop_seam_pile_size

int tesseract::Wordrec::chop_seam_pile_size = 150

"Max number of seams in seam_pile"

Definition at line 210 of file wordrec.h.

◆ chop_sharpness_knob

double tesseract::Wordrec::chop_sharpness_knob = 0.06

"Split sharpness adjustment"

Definition at line 219 of file wordrec.h.

◆ chop_split_dist_knob

double tesseract::Wordrec::chop_split_dist_knob = 0.5

"Split length adjustment"

Definition at line 214 of file wordrec.h.

◆ chop_split_length

int tesseract::Wordrec::chop_split_length = 10000

"Split Length"

Definition at line 207 of file wordrec.h.

◆ chop_vertical_creep

bool tesseract::Wordrec::chop_vertical_creep = 0

"Vertical creep"

Definition at line 206 of file wordrec.h.

◆ chop_width_change_knob

double tesseract::Wordrec::chop_width_change_knob = 5.0

"Width change adjustment"

Definition at line 220 of file wordrec.h.

◆ chop_x_y_weight

int tesseract::Wordrec::chop_x_y_weight = 3

"X / Y length weight"

Definition at line 223 of file wordrec.h.

◆ fill_lattice_

void(Wordrec::* tesseract::Wordrec::fill_lattice_) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

Definition at line 480 of file wordrec.h.

◆ force_word_assoc

bool tesseract::Wordrec::force_word_assoc = false

"force associator to run regardless of what enable_assoc is." "This is used for CJK where component grouping is necessary."

Definition at line 201 of file wordrec.h.

◆ language_model_

std::unique_ptr<LanguageModel> tesseract::Wordrec::language_model_

Definition at line 471 of file wordrec.h.

◆ merge_fragments_in_matrix

bool tesseract::Wordrec::merge_fragments_in_matrix = true

"Merge the fragments in the ratings matrix and delete them " "after merging"

Definition at line 197 of file wordrec.h.

◆ pass2_ok_split

PRIORITY tesseract::Wordrec::pass2_ok_split

Definition at line 472 of file wordrec.h.

◆ prev_word_best_choice_

WERD_CHOICE* tesseract::Wordrec::prev_word_best_choice_

Definition at line 476 of file wordrec.h.

◆ repair_unchopped_blobs

int tesseract::Wordrec::repair_unchopped_blobs = 1

"Fix blobs that aren't chopped"

Definition at line 202 of file wordrec.h.

◆ save_alt_choices

bool tesseract::Wordrec::save_alt_choices = true

"Save alternative paths found during chopping " "and segmentation search"

Definition at line 242 of file wordrec.h.

◆ segsearch_debug_level

int tesseract::Wordrec::segsearch_debug_level = 0

"SegSearch debug level"

Definition at line 233 of file wordrec.h.

◆ segsearch_max_char_wh_ratio

double tesseract::Wordrec::segsearch_max_char_wh_ratio = 2.0

"Maximum character width-to-height ratio"

Definition at line 239 of file wordrec.h.

◆ segsearch_max_futile_classifications

int tesseract::Wordrec::segsearch_max_futile_classifications = 10

"Maximum number of pain point classifications per word."

Definition at line 237 of file wordrec.h.

◆ segsearch_max_pain_points

int tesseract::Wordrec::segsearch_max_pain_points = 2000

"Maximum number of pain points stored in the queue"

Definition at line 235 of file wordrec.h.

◆ tessedit_certainty_threshold

double tesseract::Wordrec::tessedit_certainty_threshold = -2.25

"Good blob limit"

Definition at line 203 of file wordrec.h.

◆ wordrec_debug_blamer

bool tesseract::Wordrec::wordrec_debug_blamer = false

"Print blamer debug messages"

Definition at line 231 of file wordrec.h.

◆ wordrec_debug_level

int tesseract::Wordrec::wordrec_debug_level = 0

"Debug level for wordrec"

Definition at line 226 of file wordrec.h.

◆ wordrec_enable_assoc

bool tesseract::Wordrec::wordrec_enable_assoc = true

"Associator Enable"

Definition at line 198 of file wordrec.h.

◆ wordrec_max_join_chunks

int tesseract::Wordrec::wordrec_max_join_chunks = 4

"Max number of broken pieces to associate"

Definition at line 228 of file wordrec.h.

◆ wordrec_run_blamer

bool tesseract::Wordrec::wordrec_run_blamer = false

"Try to set the blame for errors"

Definition at line 232 of file wordrec.h.

◆ wordrec_skip_no_truth_words

bool tesseract::Wordrec::wordrec_skip_no_truth_words = false

"Only run OCR for words that had truth recorded in BlamerBundle"

Definition at line 230 of file wordrec.h.


The documentation for this class was generated from the following files:
tesseract::GenericHeap
Definition: genericheap.h:58
TBOX
Definition: cleanapi_test.cc:19
tesseract::Wordrec::point_priority
PRIORITY point_priority(EDGEPT *point)
Definition: chop.cpp:65
tesseract::Wordrec::chop_ok_split
double chop_ok_split
Definition: wordrec.h:221
TBLOB::ClassifyNormalizeIfNeeded
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:345
WERD_RES::FakeWordFromRatings
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:894
SPLIT_CLOSENESS
#define SPLIT_CLOSENESS
Definition: findseam.cpp:44
tesseract::Wordrec::merge_and_put_fragment_lists
void merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings)
Definition: pieces.cpp:132
BlamerBundle::BlameClassifierOrLangModel
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:375
tesseract::GenericHeap::Pop
bool Pop(Pair *entry)
Definition: genericheap.h:118
BlamerBundle::ChoiceIsCorrect
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:117
tesseract::Wordrec::wordrec_skip_no_truth_words
bool wordrec_skip_no_truth_words
Definition: wordrec.h:230
MAX_NUM_POINTS
#define MAX_NUM_POINTS
Definition: chop.h:31
BlobChoiceClassifier
BlobChoiceClassifier
Definition: ratngs.h:41
tesseract::Wordrec::chop_new_seam_pile
bool chop_new_seam_pile
Definition: wordrec.h:211
tesseract::Wordrec::get_fragment_lists
void get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists)
Definition: pieces.cpp:274
tesseract::Wordrec::improve_one_blob
SEAM * improve_one_blob(const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
Definition: chopper.cpp:325
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:116
CHAR_FRAGMENT::get_pos
int get_pos() const
Definition: unicharset.h:71
SEAM::ApplySeam
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:116
TPOINT
Definition: blobs.h:49
blob_window
ScrollView * blob_window
Definition: render.cpp:31
CHAR_FRAGMENT::kMaxChunks
static const int kMaxChunks
Definition: unicharset.h:55
TESSLINE::loop
EDGEPT * loop
Definition: blobs.h:278
BlamerBundle::BlameClassifier
void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug)
Definition: blamer.cpp:263
SPLIT::point2
EDGEPT * point2
Definition: split.h:101
TBLOB::ShallowCopy
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:334
tesseract::Wordrec::segsearch_max_futile_classifications
int segsearch_max_futile_classifications
Definition: wordrec.h:237
tesseract::Wordrec::try_point_pairs
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:290
tesseract::Classify::prioritize_division
bool prioritize_division
Definition: classify.h:428
tesseract::Wordrec::SegSearch
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:41
tesseract::Wordrec::SegSearchDone
bool SegSearchDone(int num_futile_classifications)
Definition: wordrec.h:486
tesseract::GenericHeap::PeekTop
const Pair & PeekTop() const
Definition: genericheap.h:108
draw_blob_edges
void draw_blob_edges(TBLOB *blob)
Definition: plotedges.cpp:65
same_point
#define same_point(p1, p2)
Definition: outlines.h:42
TWERD
Definition: blobs.h:416
tesseract::Wordrec::chop_centered_maxwidth
int chop_centered_maxwidth
Definition: wordrec.h:218
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::KDPtrPairDec
Definition: kdpair.h:162
wordrec_blob_pause
bool wordrec_blob_pause
Definition: render.cpp:39
INT_MEMBER
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:312
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:398
GenericVector::insert
void insert(const T &t, int index)
Definition: genericvector.h:750
WERD_RES::denorm
DENORM denorm
Definition: pageres.h:195
IntersectRange
void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2)
Definition: helpers.h:143
tesseract::Wordrec::try_vertical_splits
void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:327
tesseract::KDPtrPair::extract_data
Data * extract_data()
Definition: kdpair.h:131
tesseract::Wordrec::chop_seam_pile_size
int chop_seam_pile_size
Definition: wordrec.h:210
tesseract::Wordrec::chop_width_change_knob
double chop_width_change_knob
Definition: wordrec.h:220
BLOB_CHOICE::certainty
float certainty() const
Definition: ratngs.h:81
tesseract::Wordrec::pick_close_point
EDGEPT * pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
Definition: chop.cpp:134
divisible_blob
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:910
MATRIX::print
void print(const UNICHARSET &unicharset) const
Definition: matrix.cpp:110
tesseract::Wordrec::segsearch_debug_level
int segsearch_debug_level
Definition: wordrec.h:233
tesseract::KDPtrPair::set_data
void set_data(Data *new_data)
Definition: kdpair.h:126
tesseract::Wordrec::program_editdown
void program_editdown(int32_t elasped_time)
Definition: tface.cpp:89
TPOINT::length
int length() const
Definition: blobs.h:87
CHAR_FRAGMENT::get_unichar
const char * get_unichar() const
Definition: unicharset.h:70
tesseract::Wordrec::chop_debug
int chop_debug
Definition: wordrec.h:204
tesseract::Wordrec::chop_overlapping_blob
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:271
MATRIX_COORD::Valid
bool Valid(const MATRIX &m) const
Definition: matrix.h:614
tesseract::Wordrec::chop_vertical_creep
bool chop_vertical_creep
Definition: wordrec.h:206
MATRIX
Definition: matrix.h:574
TESSLINE
Definition: blobs.h:201
TBOX::top
int16_t top() const
Definition: rect.h:57
STRING
Definition: strngs.h:45
WERD_CHOICE::permuter
uint8_t permuter() const
Definition: ratngs.h:334
tesseract::Wordrec::prev_word_best_choice_
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:476
tesseract::GenericHeap::PopWorst
bool PopWorst(Pair *entry)
Definition: genericheap.h:140
tesseract::Wordrec::save_alt_choices
bool save_alt_choices
Definition: wordrec.h:242
MATRIX::IncreaseBandSize
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:47
BlamerBundle::SetChopperBlame
void SetChopperBlame(const WERD_RES *word, bool debug)
Definition: blamer.cpp:316
tesseract::Wordrec::attempt_blob_chop
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
Definition: chopper.cpp:209
WERD_RES::ratings
MATRIX * ratings
Definition: pageres.h:231
tesseract::Wordrec::force_word_assoc
bool force_word_assoc
Definition: wordrec.h:201
tesseract::Wordrec::chop_split_dist_knob
double chop_split_dist_knob
Definition: wordrec.h:214
tesseract::Wordrec::segsearch_max_pain_points
int segsearch_max_pain_points
Definition: wordrec.h:235
TESSLINE::next
TESSLINE * next
Definition: blobs.h:279
tesseract::Dict::valid_word_permuter
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:474
tesseract::Wordrec::angle_change
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
Definition: chop.cpp:99
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
SEAM
Definition: seam.h:36
WERD_RES::uch_set
const UNICHARSET * uch_set
Definition: pageres.h:197
tesseract::Wordrec::is_inside_angle
bool is_inside_angle(EDGEPT *pt)
Definition: chop.cpp:89
tesseract::Classify::getDict
virtual Dict & getDict()
Definition: classify.h:107
WERD_RES::blamer_bundle
BlamerBundle * blamer_bundle
Definition: pageres.h:246
tesseract::Wordrec::ResetNGramSearch
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
Definition: segsearch.cpp:310
tesseract::Wordrec::add_seam_to_queue
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:62
tesseract::GenericHeap::size
int size() const
Definition: genericheap.h:71
EDGEPT::prev
EDGEPT * prev
Definition: blobs.h:191
tesseract::Classify::get_fontinfo_table
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:386
tesseract::Wordrec::tessedit_certainty_threshold
double tessedit_certainty_threshold
Definition: wordrec.h:203
is_on_line
#define is_on_line(p, p0, p1)
Definition: outlines.h:107
TPOINT::dot
int dot(const TPOINT &other) const
Definition: blobs.h:82
tesseract::Wordrec::wordrec_enable_assoc
bool wordrec_enable_assoc
Definition: wordrec.h:198
remove_edgept
void remove_edgept(EDGEPT *point)
Definition: split.cpp:196
tesseract::Wordrec::vertical_projection_point
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:284
tesseract::Wordrec::near_point
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
Definition: outlines.cpp:50
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
tesseract::Wordrec::new_max_point
void new_max_point(EDGEPT *local_max, PointHeap *points)
Definition: chop.cpp:255
tesseract::LMPainPoints::PainPointDescription
static const char * PainPointDescription(LMPainPointsType type)
Definition: lm_pain_points.h:65
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::LM_PPTYPE_NUM
Definition: lm_pain_points.h:46
WERD_RES::InsertSeam
void InsertSeam(int blob_number, SEAM *seam)
Definition: pageres.cpp:414
TPOINT::x
int16_t x
Definition: blobs.h:91
display_blob
void display_blob(TBLOB *blob, C_COL color)
Definition: render.cpp:49
DENORM::DenormTransform
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:389
tesseract::Wordrec::classify_piece
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:52
tesseract::Wordrec::chop_overlap_knob
double chop_overlap_knob
Definition: wordrec.h:215
tesseract::Wordrec::chop_enable
bool chop_enable
Definition: wordrec.h:205
tesseract::Wordrec::fill_filtered_fragment_list
void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices)
Definition: pieces.cpp:100
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
LARGE_DISTANCE
#define LARGE_DISTANCE
Definition: outlines.h:30
is_exterior_point
#define is_exterior_point(edge, point)
Definition: outlines.h:86
tesseract::Wordrec::chop_same_distance
int chop_same_distance
Definition: wordrec.h:208
W_EOL
end of line
Definition: werd.h:47
tesseract::Wordrec::wordrec_run_blamer
bool wordrec_run_blamer
Definition: wordrec.h:232
BlamerBundle::GuidedSegsearchStillGoing
bool GuidedSegsearchStillGoing() const
Definition: blamer.cpp:512
tesseract::Wordrec::InitBlamerForSegSearch
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)
Definition: segsearch.cpp:327
TPOINT::y
int16_t y
Definition: blobs.h:92
GENERIC_2D_ARRAY::get
T get(ICOORD pos) const
Definition: matrix.h:227
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:457
tesseract::Wordrec::wordrec_debug_level
int wordrec_debug_level
Definition: wordrec.h:226
tesseract::Wordrec::fill_lattice_
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:480
BlamerBundle::InitForSegSearch
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, tesseract::LMPainPoints *pain_points, double max_char_wh_ratio, WERD_RES *word_res)
Definition: blamer.cpp:478
tesseract::Wordrec::chop_center_knob
double chop_center_knob
Definition: wordrec.h:216
tesseract::Dict::certainty_scale
double certainty_scale
Definition: dict.h:627
tesseract::SortByUnicharID
int SortByUnicharID(const void *void1, const void *void2)
Definition: pieces.cpp:73
tesseract::Wordrec::pick_good_seam
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:210
CHAR_FRAGMENT::is_ending
bool is_ending() const
Definition: unicharset.h:108
make_edgept
EDGEPT * make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:136
double_MEMBER
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:321
edgept_dist
#define edgept_dist(p1, p2)
Definition: outlines.h:77
tesseract::Wordrec::segsearch_max_char_wh_ratio
double segsearch_max_char_wh_ratio
Definition: wordrec.h:239
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
WERD_RES::FilterWordChoices
void FilterWordChoices(int debug_level)
Definition: pageres.cpp:509
TBOX::overlap_fraction
double overlap_fraction(const TBOX &box) const
Definition: rect.h:381
tesseract::Wordrec::repair_unchopped_blobs
int repair_unchopped_blobs
Definition: wordrec.h:202
tesseract::Wordrec::select_blob_to_split_from_fixpt
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:622
SEAM::BreakPieces
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:186
tesseract::Wordrec::prioritize_points
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:172
WERD_RES::chopped_word
TWERD * chopped_word
Definition: pageres.h:206
SEAM::Print
void Print(const char *label) const
Definition: seam.cpp:152
IRR_CORRECT
Definition: blamer.h:54
tesseract::Wordrec::add_point_to_list
void add_point_to_list(PointHeap *point_heap, EDGEPT *point)
Definition: chop.cpp:75
tesseract::Wordrec::choose_best_seam
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:100
tesseract::Wordrec::new_min_point
void new_min_point(EDGEPT *local_min, PointHeap *points)
Definition: chop.cpp:231
tesseract::CCUtil::params
ParamsVectors * params()
Definition: ccutil.h:51
CHAR_FRAGMENT::get_total
int get_total() const
Definition: unicharset.h:72
EDGEPT::vec
VECTOR vec
Definition: blobs.h:185
WERD_RES::best_choices
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
tesseract::LM_PPTYPE_SHAPE
Definition: lm_pain_points.h:44
TOP_CHOICE_PERM
Definition: ratngs.h:233
WERD_RES::seam_array
GenericVector< SEAM * > seam_array
Definition: pageres.h:208
BLOB_CHOICE::rating
float rating() const
Definition: ratngs.h:78
tesseract::KDPtrPairInc
Definition: kdpair.h:145
tesseract::Wordrec::chop_numbered_blob
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
Definition: chopper.cpp:263
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
GenericVector
Definition: baseapi.h:40
tesseract::KDPairInc
Definition: kdpair.h:51
wordrec_display_splits
bool wordrec_display_splits
Definition: split.cpp:39
CHAR_FRAGMENT
Definition: unicharset.h:48
tesseract::Wordrec::chop_inside_angle
int chop_inside_angle
Definition: wordrec.h:212
TPOINT::cross
int cross(const TPOINT &other) const
Definition: blobs.h:77
tesseract::Wordrec::improve_by_chopping
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
Definition: chopper.cpp:452
MAX_NUM_SEAMS
#define MAX_NUM_SEAMS
Definition: findseam.cpp:46
tesseract::GenericHeap::Push
void Push(Pair *entry)
Definition: genericheap.h:95
NO_FULL_PRIORITY
#define NO_FULL_PRIORITY
Definition: findseam.cpp:48
tesseract::Wordrec::select_blob_to_split
int select_blob_to_split(const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:535
WERD_RES::RebuildBestState
void RebuildBestState()
Definition: pageres.cpp:804
tesseract::Wordrec::wordrec_debug_blamer
bool wordrec_debug_blamer
Definition: wordrec.h:231
BandTriMatrix::bandwidth
int bandwidth() const
Definition: matrix.h:534
SEAM::UsesPoint
bool UsesPoint(const EDGEPT *point) const
Definition: seam.h:80
SEAM::Mark
void Mark(ScrollView *window) const
Definition: seam.cpp:178
tesseract::Dict::has_hyphen_end
bool has_hyphen_end(const UNICHARSET *unicharset, UNICHAR_ID unichar_id, bool first_pos) const
Check whether the word has a hyphen at the end.
Definition: dict.h:152
BLOB_CHOICE
Definition: ratngs.h:49
MATRIX_COORD
Definition: matrix.h:604
TBLOB
Definition: blobs.h:282
tesseract::Dict::reset_hyphen_vars
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:42
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::KDPtrPair::set_key
void set_key(const Key &new_key)
Definition: kdpair.h:119
tesseract::Wordrec::call_matcher
BLOB_CHOICE_LIST * call_matcher(TBLOB *blob)
Definition: tface.cpp:154
tesseract::Wordrec::classify_blob
BLOB_CHOICE_LIST * classify_blob(TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle)
Definition: wordclass.cpp:52
edge_window_wait
#define edge_window_wait()
Definition: plotedges.h:52
SPLIT
Definition: split.h:34
MATRIX_COORD::col
int col
Definition: matrix.h:632
GenericVector::clear
void clear()
Definition: genericvector.h:857
BAD_PRIORITY
#define BAD_PRIORITY
Definition: findseam.cpp:50
SEAM::CombineableWith
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.cpp:38
White
Definition: callcpp.h:28
print_ratings_list
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:835
TBOX::right
int16_t right() const
Definition: rect.h:78
TBOX::almost_equal
bool almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:250
tesseract::Wordrec::assume_fixed_pitch_char_segment
bool assume_fixed_pitch_char_segment
Definition: wordrec.h:225
GenericVector::init_to_size
void init_to_size(int size, const T &t)
Definition: genericvector.h:706
SEAM::IsHealthy
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:64
GENERIC_2D_ARRAY::put
void put(ICOORD pos, const T &thing)
Definition: matrix.h:219
tesseract::Wordrec::pass2_ok_split
PRIORITY pass2_ok_split
Definition: wordrec.h:472
update_edge_window
#define update_edge_window()
Definition: plotedges.h:41
EDGEPT
Definition: blobs.h:97
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::Dict::getUnicharset
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
UNICHARSET::get_fragment
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:724
SEAM::priority
float priority() const
Definition: seam.h:57
WERD_RES::ClearWordChoices
void ClearWordChoices()
Definition: pageres.cpp:1125
SEAM::Finalize
void Finalize()
Definition: seam.h:108
MATRIX_COORD::row
int row
Definition: matrix.h:633
tesseract::Classify::rating_scale
double rating_scale
Definition: classify.h:472
tesseract::Wordrec::chop_min_outline_points
int chop_min_outline_points
Definition: wordrec.h:209
tesseract::Dict::valid_word
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:778
tesseract::Wordrec::InitialSegSearch
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:135
WERD_RES::word
WERD * word
Definition: pageres.h:180
EDGEPT::IsChopPt
bool IsChopPt() const
Definition: blobs.h:180
tesseract::LMPainPointsType
LMPainPointsType
Definition: lm_pain_points.h:40
SEAM::JoinPieces
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:208
BlamerBundle::incorrect_result_reason
IncorrectResultReason incorrect_result_reason() const
Definition: blamer.h:121
CHAR_FRAGMENT::is_beginning
bool is_beginning() const
Definition: unicharset.h:105
tesseract::Wordrec::language_model_
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:471
tesseract::Wordrec::chop_min_outline_area
int chop_min_outline_area
Definition: wordrec.h:213
SEAM::CombineWith
void CombineWith(const SEAM &other)
Definition: seam.cpp:52
mark_outline
void mark_outline(EDGEPT *edgept)
Definition: plotedges.cpp:81
BOOL_MEMBER
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:315
tesseract::Wordrec::chop_x_y_weight
int chop_x_y_weight
Definition: wordrec.h:223
tesseract::Wordrec::ProcessSegSearchPainPoint
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:247
BlamerBundle::GuidedSegsearchNeeded
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const
Definition: blamer.cpp:469
tesseract::Classify::allow_blob_division
bool allow_blob_division
Definition: classify.h:423
GenericVector::size
int size() const
Definition: genericvector.h:71
window_wait
char window_wait(ScrollView *win)
Definition: callcpp.cpp:103
tesseract::GenericHeap::get
const Pair & get(int index) const
Definition: genericheap.h:87
tesseract::Wordrec::wordrec_max_join_chunks
int wordrec_max_join_chunks
Definition: wordrec.h:228
tesseract::Wordrec::chop_split_length
int chop_split_length
Definition: wordrec.h:207
tesseract::GenericHeap::empty
bool empty() const
Definition: genericheap.h:68
tesseract::Wordrec::CallFillLattice
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:259
tesseract::Wordrec::chop_sharpness_knob
double chop_sharpness_knob
Definition: wordrec.h:219
closest
#define closest(test_p, p1, p2)
Definition: outlines.h:62
SEAM::PrintSeams
static void PrintSeams(const char *label, const GenericVector< SEAM * > &seams)
Definition: seam.cpp:165
tesseract::Wordrec::merge_fragments_in_matrix
bool merge_fragments_in_matrix
Definition: wordrec.h:197
tesseract::Wordrec::combine_seam
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:192
PRIORITY
float PRIORITY
Definition: seam.h:34
wordrec_display_all_blobs
bool wordrec_display_all_blobs
Definition: render.cpp:37
BlamerBundle::FinishSegSearch
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str)
Definition: blamer.cpp:517
SPLIT::point1
EDGEPT * point1
Definition: split.h:100
tesseract::Classify::classify_debug_level
int classify_debug_level
Definition: classify.h:430
edge_window
ScrollView * edge_window
Definition: plotedges.cpp:33
tesseract::Wordrec::UpdateSegSearchNodes
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:179
EDGEPT::pos
TPOINT pos
Definition: blobs.h:184
TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:446
EDGEPT::next
EDGEPT * next
Definition: blobs.h:190
tesseract::Wordrec::chop_good_split
double chop_good_split
Definition: wordrec.h:222
BandTriMatrix::dimension
int dimension() const
Definition: matrix.h:532
BlamerBundle::SetupCorrectSegmentation
void SetupCorrectSegmentation(const TWERD *word, bool debug)
Definition: blamer.cpp:413
TBOX
Definition: rect.h:33
tesseract::KDPtrPair::key
const Key & key() const
Definition: kdpair.h:116
MATRIX::Classified
bool Classified(int col, int row, int wildcard_id) const
Definition: matrix.cpp:34
SEAM::FullPriority
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:237