22 #include "config_auto.h"
36 : ccstruct_(ccstruct), use_cjk_fp_model_(false),
39 "Script has no xheight, so use a single mode",
42 BOOL_MEMBER(tosp_old_to_method, false,
"Space stats use prechopping?",
45 "Constrain relative values of inter and intra-word gaps for "
49 "Block stats to use fixed pitch rows?",
52 "Force word breaks on punct to break long lines in non-space "
56 "Space stats use prechopping?",
58 BOOL_MEMBER(tosp_old_to_bug_fix, false,
"Fix suspected bug in old code",
61 "Only stat OBVIOUS spaces",
63 BOOL_MEMBER(tosp_row_use_cert_spaces, true,
"Only stat OBVIOUS spaces",
66 "Only stat OBVIOUS spaces",
68 BOOL_MEMBER(tosp_row_use_cert_spaces1, true,
"Only stat OBVIOUS spaces",
71 "Use row alone when inadequate cert spaces",
73 BOOL_MEMBER(tosp_only_small_gaps_for_kern, false,
"Better guess",
75 BOOL_MEMBER(tosp_all_flips_fuzzy, false,
"Pass ANY flip to context?",
78 "Dont restrict kn->sp fuzzy limit to tables",
81 "Use within xht gap for wd breaks",
83 BOOL_MEMBER(tosp_use_xht_gaps, true,
"Use within xht gap for wd breaks",
86 "Only use within xht gap for wd breaks",
89 "Dont chng kn to space next to punct",
91 BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true,
"Default flip",
93 BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true,
"Default flip",
95 BOOL_MEMBER(tosp_improve_thresh, false,
"Enable improvement heuristic",
99 INT_MEMBER(tosp_enough_space_samples_for_median, 3,
100 "or should we use mean",
101 ccstruct_->params()),
103 "No.samples reqd to reestimate for row",
104 ccstruct_->params()),
106 "No.gaps reqd with 1 large gap to treat as a table",
107 ccstruct_->params()),
109 "No.gaps reqd with few cert spaces to use certs",
110 ccstruct_->params()),
111 INT_MEMBER(tosp_sanity_method, 1,
"How to avoid being silly",
112 ccstruct_->params()),
114 "Factor for defining space threshold in terms of space and "
116 ccstruct_->params()),
118 "how far between kern and space?",
119 ccstruct_->params()),
121 "how far between kern and space?",
122 ccstruct_->params()),
123 double_MEMBER(tosp_narrow_fraction, 0.3,
"Fract of xheight for narrow",
124 ccstruct_->params()),
126 "narrow if w/h less than this",
127 ccstruct_->params()),
128 double_MEMBER(tosp_wide_fraction, 0.52,
"Fract of xheight for wide",
129 ccstruct_->params()),
130 double_MEMBER(tosp_wide_aspect_ratio, 0.0,
"wide if w/h less than this",
131 ccstruct_->params()),
133 "Fract of xheight for fuzz sp",
134 ccstruct_->params()),
136 "Fract of xheight for fuzz sp",
137 ccstruct_->params()),
139 "Fract of xheight for fuzz sp",
140 ccstruct_->params()),
141 double_MEMBER(tosp_gap_factor, 0.83,
"gap ratio to flip sp->kern",
142 ccstruct_->params()),
143 double_MEMBER(tosp_kern_gap_factor1, 2.0,
"gap ratio to flip kern->sp",
144 ccstruct_->params()),
145 double_MEMBER(tosp_kern_gap_factor2, 1.3,
"gap ratio to flip kern->sp",
146 ccstruct_->params()),
147 double_MEMBER(tosp_kern_gap_factor3, 2.5,
"gap ratio to flip kern->sp",
148 ccstruct_->params()),
150 ccstruct_->params()),
151 double_MEMBER(tosp_ignore_very_big_gaps, 3.5,
"xht multiplier",
152 ccstruct_->params()),
153 double_MEMBER(tosp_rep_space, 1.6,
"rep gap multiplier for space",
154 ccstruct_->params()),
156 "Fract of kerns reqd for isolated row stats",
157 ccstruct_->params()),
159 "Min difference of kn & sp in table",
160 ccstruct_->params()),
162 "Expect spaces bigger than this",
163 ccstruct_->params()),
165 "Fuzzy if less than this",
166 ccstruct_->params()),
167 double_MEMBER(tosp_fuzzy_kn_fraction, 0.5,
"New fuzzy kn alg",
168 ccstruct_->params()),
169 double_MEMBER(tosp_fuzzy_sp_fraction, 0.5,
"New fuzzy sp alg",
170 ccstruct_->params()),
172 "Dont trust spaces less than this time kn",
173 ccstruct_->params()),
175 "Thresh guess - mult kn by this",
176 ccstruct_->params()),
178 "Thresh guess - mult xht by this",
179 ccstruct_->params()),
181 "Multiplier on kn to limit thresh",
182 ccstruct_->params()),
184 "Dont autoflip kn to sp when large separation",
185 ccstruct_->params()),
187 "Limit use of xht gap with large kns",
188 ccstruct_->params()),
190 "Limit use of xht gap with odd small kns",
191 ccstruct_->params()),
193 "Dont reduce box if the top left is non blank",
194 ccstruct_->params()),
196 "Dont let sp minus kn get too small",
197 ccstruct_->params()),
199 "How wide fuzzies need context",
200 ccstruct_->params()),
202 BOOL_MEMBER(textord_no_rejects, false,
"Don't remove noise blobs",
203 ccstruct_->params()),
204 BOOL_MEMBER(textord_show_blobs, false,
"Display unsorted blobs",
205 ccstruct_->params()),
206 BOOL_MEMBER(textord_show_boxes, false,
"Display unsorted blobs",
207 ccstruct_->params()),
208 INT_MEMBER(textord_max_noise_size, 7,
"Pixel size of noise",
209 ccstruct_->params()),
210 INT_MEMBER(textord_baseline_debug, 0,
"Baseline debug level",
211 ccstruct_->params()),
212 double_MEMBER(textord_blob_size_bigile, 95,
"Percentile for large blobs",
213 ccstruct_->params()),
215 "Fraction of bounding box for noise",
216 ccstruct_->params()),
218 "Percentile for small blobs",
219 ccstruct_->params()),
221 "Ile of sizes for xheight guess",
222 ccstruct_->params()),
224 "Ile of sizes for xheight guess",
225 ccstruct_->params()),
227 "Fraction of size for maxima",
228 ccstruct_->params()),
230 "Fraction of x for big t count",
231 ccstruct_->params()),
232 INT_MEMBER(textord_noise_translimit, 16,
"Transitions for normal blob",
233 ccstruct_->params()),
235 "Dot to norm ratio for deletion",
236 ccstruct_->params()),
237 BOOL_MEMBER(textord_noise_rejwords, true,
"Reject noise-like words",
238 ccstruct_->params()),
239 BOOL_MEMBER(textord_noise_rejrows, true,
"Reject noise-like rows",
240 ccstruct_->params()),
242 "xh fract height error for norm blobs",
243 ccstruct_->params()),
245 "xh fract width error for norm blobs",
246 ccstruct_->params()),
248 "Height fraction to discard outlines as speckle noise",
249 ccstruct_->params()),
250 INT_MEMBER(textord_noise_sncount, 1,
"super norm blobs to save row",
251 ccstruct_->params()),
253 "Dot to norm ratio for deletion",
254 ccstruct_->params()),
255 BOOL_MEMBER(textord_noise_debug, false,
"Debug row garbage detector",
256 ccstruct_->params()),
257 double_MEMBER(textord_blshift_maxshift, 0.00,
"Max baseline shift",
258 ccstruct_->params()),
260 "Min size of baseline shift",
261 ccstruct_->params()) {
269 int width,
int height, Pix* binary_pix,
270 Pix* thresholds_pix, Pix* grey_pix,
271 bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs,
272 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
273 page_tr_.
set_x(width);
274 page_tr_.
set_y(height);
275 if (to_blocks->empty()) {
278 TO_BLOCK_IT it(to_blocks);
279 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
294 const FCOORD anticlockwise90(0.0f, 1.0f);
295 const FCOORD clockwise90(0.0f, -1.0f);
296 TO_BLOCK_IT it(to_blocks);
297 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
304 to_block->
rotate(anticlockwise90);
312 TO_BLOCK_IT to_block_it(to_blocks);
313 TO_BLOCK* to_block = to_block_it.data();
318 gradient =
make_rows(page_tr_, to_blocks);
322 to_block, to_blocks);
334 make_words(
this, page_tr_, gradient, blocks, to_blocks);
339 TO_BLOCK* to_block = to_block_it.data();
345 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
348 BLOCK_IT b_it(blocks);
349 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
350 b_it.data()->compute_row_margins();
352 #ifndef GRAPHICS_DISABLED
365 float row_total_conf = 0.0f;
366 int row_word_count = 0;
368 float best_conf = 0.0f;
374 row_total_conf /= row_word_count;
375 if (best_row ==
NULL || best_conf < row_total_conf) {
377 best_conf = row_total_conf;
379 row_total_conf = 0.0f;
385 if (it.
row() != best_row)
void set_x(inT16 xin)
rewrite function
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
bool PSM_SPARSE(int pageseg_mode)
WERD_CHOICE * best_choice
ROW_RES * next_row() const
void set_poly_block(POLY_BLOCK *blk)
set the poly block
int textord_baseline_debug
#define BOOL_MEMBER(name, val, comment, vec)
void set_re_rotation(const FCOORD &rotation)
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Treat the image as a single character.
void ComputeStraightBaselines(bool use_box_bottoms)
WERD_RES * restart_page()
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
#define INT_MEMBER(name, val, comment, vec)
void rotate(const FCOORD &rotation)
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
bool textord_show_final_rows
void set_y(inT16 yin)
rewrite function
#define double_MEMBER(name, val, comment, vec)
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
ROW_LIST * row_list()
get rows
Textord(CCStruct *ccstruct)
void set_classify_rotation(const FCOORD &rotation)
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)