tesseract  4.0.0-1-g2a2b
wordrec.cpp
Go to the documentation of this file.
1 // File: wordrec.cpp
3 // Description: wordrec class.
4 // Author: Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "wordrec.h"
20 
21 #ifdef DISABLED_LEGACY_ENGINE
22 
23 #include "params.h"
24 
25 
26 namespace tesseract {
28  // control parameters
29 
30  BOOL_MEMBER(wordrec_debug_blamer, false,
31  "Print blamer debug messages", params()),
32 
33  BOOL_MEMBER(wordrec_run_blamer, false,
34  "Try to set the blame for errors", params()) {
35  prev_word_best_choice_ = nullptr;
36 }
37 
38 } // namespace tesseract
39 
40 #else // DISABLED_LEGACY_ENGINE not defined
41 
42 #include "language_model.h"
43 #include "params.h"
44 
45 
46 namespace tesseract {
48  // control parameters
49  BOOL_MEMBER(merge_fragments_in_matrix, TRUE,
50  "Merge the fragments in the ratings matrix and delete them"
51  " after merging", params()),
52  BOOL_MEMBER(wordrec_no_block, FALSE, "Don't output block information",
53  params()),
54  BOOL_MEMBER(wordrec_enable_assoc, TRUE, "Associator Enable",
55  params()),
56  BOOL_MEMBER(force_word_assoc, FALSE,
57  "force associator to run regardless of what enable_assoc is."
58  " This is used for CJK where component grouping is necessary.",
59  CCUtil::params()),
60  double_MEMBER(wordrec_worst_state, 1.0, "Worst segmentation state",
61  params()),
62  BOOL_MEMBER(fragments_guide_chopper, FALSE,
63  "Use information from fragments to guide chopping process",
64  params()),
65  INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped",
66  params()),
67  double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit",
68  params()),
69  INT_MEMBER(chop_debug, 0, "Chop debug",
70  params()),
71  BOOL_MEMBER(chop_enable, 1, "Chop enable",
72  params()),
73  BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep",
74  params()),
75  INT_MEMBER(chop_split_length, 10000, "Split Length",
76  params()),
77  INT_MEMBER(chop_same_distance, 2, "Same distance",
78  params()),
79  INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline",
80  params()),
81  INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile",
82  params()),
83  BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params()),
84  INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend",
85  params()),
86  INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area",
87  params()),
88  double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment",
89  params()),
90  double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment",
91  params()),
92  double_MEMBER(chop_center_knob, 0.15, "Split center adjustment",
93  params()),
94  INT_MEMBER(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs "
95  "above which we don't care that a chop is not near the center.",
96  params()),
97  double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment",
98  params()),
99  double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment",
100  params()),
101  double_MEMBER(chop_ok_split, 100.0, "OK split limit",
102  params()),
103  double_MEMBER(chop_good_split, 50.0, "Good split limit",
104  params()),
105  INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight",
106  params()),
107  INT_MEMBER(segment_adjust_debug, 0, "Segmentation adjustment debug",
108  params()),
109  BOOL_MEMBER(assume_fixed_pitch_char_segment, FALSE,
110  "include fixed-pitch heuristics in char segmentation",
111  params()),
112  INT_MEMBER(wordrec_debug_level, 0,
113  "Debug level for wordrec", params()),
114  INT_MEMBER(wordrec_max_join_chunks, 4,
115  "Max number of broken pieces to associate", params()),
116  BOOL_MEMBER(wordrec_skip_no_truth_words, false,
117  "Only run OCR for words that had truth recorded in BlamerBundle",
118  params()),
119  BOOL_MEMBER(wordrec_debug_blamer, false,
120  "Print blamer debug messages", params()),
121  BOOL_MEMBER(wordrec_run_blamer, false,
122  "Try to set the blame for errors", params()),
123  INT_MEMBER(segsearch_debug_level, 0,
124  "SegSearch debug level", params()),
125  INT_MEMBER(segsearch_max_pain_points, 2000,
126  "Maximum number of pain points stored in the queue",
127  params()),
128  INT_MEMBER(segsearch_max_futile_classifications, 20,
129  "Maximum number of pain point classifications per chunk that"
130  " did not result in finding a better word choice.",
131  params()),
132  double_MEMBER(segsearch_max_char_wh_ratio, 2.0,
133  "Maximum character width-to-height ratio", params()),
134  BOOL_MEMBER(save_alt_choices, true,
135  "Save alternative paths found during chopping"
136  " and segmentation search",
137  params()),
138  pass2_ok_split(0.0f) {
139  prev_word_best_choice_ = nullptr;
141  &(getDict())));
142  fill_lattice_ = nullptr;
143 }
144 
145 } // namespace tesseract
146 
147 #endif // DISABLED_LEGACY_ENGINE
#define TRUE
Definition: capi.h:51
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:386
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:288
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:291
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:485
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:297
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:476
#define FALSE
Definition: capi.h:52
virtual Dict & getDict()
Definition: classify.h:107