22 #include "config_auto.h" 36 : ccstruct_(ccstruct),
37 use_cjk_fp_model_(false),
40 "Script has no xheight, so use a single mode",
43 BOOL_MEMBER(tosp_old_to_method, false,
"Space stats use prechopping?",
46 "Constrain relative values of inter and intra-word gaps for " 50 "Block stats to use fixed pitch rows?", ccstruct_->params()),
52 "Force word breaks on punct to break long lines in non-space " 55 BOOL_MEMBER(tosp_use_pre_chopping, false,
"Space stats use prechopping?",
57 BOOL_MEMBER(tosp_old_to_bug_fix, false,
"Fix suspected bug in old code",
59 BOOL_MEMBER(tosp_block_use_cert_spaces, true,
"Only stat OBVIOUS spaces",
61 BOOL_MEMBER(tosp_row_use_cert_spaces, true,
"Only stat OBVIOUS spaces",
63 BOOL_MEMBER(tosp_narrow_blobs_not_cert, true,
"Only stat OBVIOUS spaces",
65 BOOL_MEMBER(tosp_row_use_cert_spaces1, true,
"Only stat OBVIOUS spaces",
68 "Use row alone when inadequate cert spaces",
70 BOOL_MEMBER(tosp_only_small_gaps_for_kern, false,
"Better guess",
72 BOOL_MEMBER(tosp_all_flips_fuzzy, false,
"Pass ANY flip to context?",
75 "Don't restrict kn->sp fuzzy limit to tables",
78 "Use within xht gap for wd breaks", ccstruct_->params()),
79 BOOL_MEMBER(tosp_use_xht_gaps, true,
"Use within xht gap for wd breaks",
82 "Only use within xht gap for wd breaks", ccstruct_->params()),
84 "Don't chng kn to space next to punct", ccstruct_->params()),
85 BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true,
"Default flip",
87 BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true,
"Default flip",
89 BOOL_MEMBER(tosp_improve_thresh, false,
"Enable improvement heuristic",
91 INT_MEMBER(tosp_debug_level, 0,
"Debug data", ccstruct_->params()),
92 INT_MEMBER(tosp_enough_space_samples_for_median, 3,
93 "or should we use mean", ccstruct_->params()),
95 "No.samples reqd to reestimate for row", ccstruct_->params()),
97 "No.gaps reqd with 1 large gap to treat as a table",
100 "No.gaps reqd with few cert spaces to use certs",
101 ccstruct_->params()),
102 INT_MEMBER(tosp_sanity_method, 1,
"How to avoid being silly",
103 ccstruct_->params()),
105 "Factor for defining space threshold in terms of space and " 107 ccstruct_->params()),
108 double_MEMBER(tosp_threshold_bias1, 0,
"how far between kern and space?",
109 ccstruct_->params()),
110 double_MEMBER(tosp_threshold_bias2, 0,
"how far between kern and space?",
111 ccstruct_->params()),
112 double_MEMBER(tosp_narrow_fraction, 0.3,
"Fract of xheight for narrow",
113 ccstruct_->params()),
115 "narrow if w/h less than this", ccstruct_->params()),
116 double_MEMBER(tosp_wide_fraction, 0.52,
"Fract of xheight for wide",
117 ccstruct_->params()),
118 double_MEMBER(tosp_wide_aspect_ratio, 0.0,
"wide if w/h less than this",
119 ccstruct_->params()),
121 "Fract of xheight for fuzz sp", ccstruct_->params()),
123 "Fract of xheight for fuzz sp", ccstruct_->params()),
125 "Fract of xheight for fuzz sp", ccstruct_->params()),
126 double_MEMBER(tosp_gap_factor, 0.83,
"gap ratio to flip sp->kern",
127 ccstruct_->params()),
128 double_MEMBER(tosp_kern_gap_factor1, 2.0,
"gap ratio to flip kern->sp",
129 ccstruct_->params()),
130 double_MEMBER(tosp_kern_gap_factor2, 1.3,
"gap ratio to flip kern->sp",
131 ccstruct_->params()),
132 double_MEMBER(tosp_kern_gap_factor3, 2.5,
"gap ratio to flip kern->sp",
133 ccstruct_->params()),
135 ccstruct_->params()),
136 double_MEMBER(tosp_ignore_very_big_gaps, 3.5,
"xht multiplier",
137 ccstruct_->params()),
138 double_MEMBER(tosp_rep_space, 1.6,
"rep gap multiplier for space",
139 ccstruct_->params()),
141 "Fract of kerns reqd for isolated row stats",
142 ccstruct_->params()),
144 "Min difference of kn & sp in table", ccstruct_->params()),
146 "Expect spaces bigger than this", ccstruct_->params()),
148 "Fuzzy if less than this", ccstruct_->params()),
149 double_MEMBER(tosp_fuzzy_kn_fraction, 0.5,
"New fuzzy kn alg",
150 ccstruct_->params()),
151 double_MEMBER(tosp_fuzzy_sp_fraction, 0.5,
"New fuzzy sp alg",
152 ccstruct_->params()),
154 "Don't trust spaces less than this time kn",
155 ccstruct_->params()),
157 "Thresh guess - mult kn by this", ccstruct_->params()),
159 "Thresh guess - mult xht by this", ccstruct_->params()),
161 "Multiplier on kn to limit thresh", ccstruct_->params()),
163 "Don't autoflip kn to sp when large separation",
164 ccstruct_->params()),
166 "Limit use of xht gap with large kns", ccstruct_->params()),
168 "Limit use of xht gap with odd small kns",
169 ccstruct_->params()),
171 "Don't reduce box if the top left is non blank",
172 ccstruct_->params()),
174 "Don't let sp minus kn get too small", ccstruct_->params()),
176 "How wide fuzzies need context", ccstruct_->params()),
178 BOOL_MEMBER(textord_no_rejects, false,
"Don't remove noise blobs",
179 ccstruct_->params()),
180 BOOL_MEMBER(textord_show_blobs, false,
"Display unsorted blobs",
181 ccstruct_->params()),
182 BOOL_MEMBER(textord_show_boxes, false,
"Display unsorted blobs",
183 ccstruct_->params()),
184 INT_MEMBER(textord_max_noise_size, 7,
"Pixel size of noise",
185 ccstruct_->params()),
186 INT_MEMBER(textord_baseline_debug, 0,
"Baseline debug level",
187 ccstruct_->params()),
188 double_MEMBER(textord_blob_size_bigile, 95,
"Percentile for large blobs",
189 ccstruct_->params()),
191 "Fraction of bounding box for noise", ccstruct_->params()),
193 "Percentile for small blobs", ccstruct_->params()),
195 "Ile of sizes for xheight guess", ccstruct_->params()),
197 "Ile of sizes for xheight guess", ccstruct_->params()),
198 INT_MEMBER(textord_noise_sizefraction, 10,
"Fraction of size for maxima",
199 ccstruct_->params()),
201 "Fraction of x for big t count", ccstruct_->params()),
202 INT_MEMBER(textord_noise_translimit, 16,
"Transitions for normal blob",
203 ccstruct_->params()),
205 "Dot to norm ratio for deletion", ccstruct_->params()),
206 BOOL_MEMBER(textord_noise_rejwords, true,
"Reject noise-like words",
207 ccstruct_->params()),
208 BOOL_MEMBER(textord_noise_rejrows, true,
"Reject noise-like rows",
209 ccstruct_->params()),
211 "xh fract height error for norm blobs",
212 ccstruct_->params()),
214 "xh fract width error for norm blobs", ccstruct_->params()),
216 "Height fraction to discard outlines as speckle noise",
217 ccstruct_->params()),
218 INT_MEMBER(textord_noise_sncount, 1,
"super norm blobs to save row",
219 ccstruct_->params()),
221 "Dot to norm ratio for deletion", ccstruct_->params()),
222 BOOL_MEMBER(textord_noise_debug, false,
"Debug row garbage detector",
223 ccstruct_->params()),
224 double_MEMBER(textord_blshift_maxshift, 0.00,
"Max baseline shift",
225 ccstruct_->params()),
227 "Min size of baseline shift", ccstruct_->params()) {}
231 int width,
int height, Pix* binary_pix,
232 Pix* thresholds_pix, Pix* grey_pix,
233 bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs,
234 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
235 page_tr_.
set_x(width);
236 page_tr_.
set_y(height);
237 if (to_blocks->empty()) {
240 TO_BLOCK_IT it(to_blocks);
241 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256 const FCOORD anticlockwise90(0.0f, 1.0f);
257 const FCOORD clockwise90(0.0f, -1.0f);
258 TO_BLOCK_IT it(to_blocks);
259 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
266 to_block->
rotate(anticlockwise90);
274 TO_BLOCK_IT to_block_it(to_blocks);
275 TO_BLOCK* to_block = to_block_it.data();
280 gradient =
make_rows(page_tr_, to_blocks);
284 to_block, to_blocks);
297 make_words(
this, page_tr_, gradient, blocks, to_blocks);
302 TO_BLOCK* to_block = to_block_it.data();
308 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
311 BLOCK_IT b_it(blocks);
312 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
313 b_it.data()->compute_row_margins();
315 #ifndef GRAPHICS_DISABLED 328 float row_total_conf = 0.0f;
329 int row_word_count = 0;
331 float best_conf = 0.0f;
337 row_total_conf /= row_word_count;
338 if (best_row ==
nullptr || best_conf < row_total_conf) {
340 best_conf = row_total_conf;
342 row_total_conf = 0.0f;
348 if (it.
row() != best_row)
void set_poly_block(POLY_BLOCK *blk)
set the poly block
bool PSM_SPARSE(int pageseg_mode)
void set_x(int16_t xin)
rewrite function
Textord(CCStruct *ccstruct)
ROW_LIST * row_list()
get rows
#define INT_MEMBER(name, val, comment, vec)
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void rotate(const FCOORD &rotation)
void set_re_rotation(const FCOORD &rotation)
#define BOOL_MEMBER(name, val, comment, vec)
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
ROW_RES * next_row() const
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
#define double_MEMBER(name, val, comment, vec)
WERD_RES * restart_page()
int textord_baseline_debug
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
void set_classify_rotation(const FCOORD &rotation)
bool textord_show_final_rows
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
void ComputeStraightBaselines(bool use_box_bottoms)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
void set_y(int16_t yin)
rewrite function
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
WERD_CHOICE * best_choice
Treat the image as a single character.