22 #include "config_auto.h"
36 : ccstruct_(ccstruct),
37 use_cjk_fp_model_(false),
40 "Script has no xheight, so use a single mode",
43 BOOL_MEMBER(tosp_old_to_method, false,
"Space stats use prechopping?",
46 "Constrain relative values of inter and intra-word gaps for "
50 "Block stats to use fixed pitch rows?", ccstruct_->params()),
52 "Force word breaks on punct to break long lines in non-space "
55 BOOL_MEMBER(tosp_use_pre_chopping, false,
"Space stats use prechopping?",
57 BOOL_MEMBER(tosp_old_to_bug_fix, false,
"Fix suspected bug in old code",
59 BOOL_MEMBER(tosp_block_use_cert_spaces, true,
"Only stat OBVIOUS spaces",
61 BOOL_MEMBER(tosp_row_use_cert_spaces, true,
"Only stat OBVIOUS spaces",
63 BOOL_MEMBER(tosp_narrow_blobs_not_cert, true,
"Only stat OBVIOUS spaces",
65 BOOL_MEMBER(tosp_row_use_cert_spaces1, true,
"Only stat OBVIOUS spaces",
68 "Use row alone when inadequate cert spaces",
70 BOOL_MEMBER(tosp_only_small_gaps_for_kern, false,
"Better guess",
72 BOOL_MEMBER(tosp_all_flips_fuzzy, false,
"Pass ANY flip to context?",
75 "Don't restrict kn->sp fuzzy limit to tables",
78 "Use within xht gap for wd breaks", ccstruct_->params()),
79 BOOL_MEMBER(tosp_use_xht_gaps, true,
"Use within xht gap for wd breaks",
82 "Only use within xht gap for wd breaks", ccstruct_->params()),
84 "Don't chng kn to space next to punct", ccstruct_->params()),
85 BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true,
"Default flip",
87 BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true,
"Default flip",
89 BOOL_MEMBER(tosp_improve_thresh, false,
"Enable improvement heuristic",
91 INT_MEMBER(tosp_debug_level, 0,
"Debug data", ccstruct_->params()),
92 INT_MEMBER(tosp_enough_space_samples_for_median, 3,
93 "or should we use mean", ccstruct_->params()),
95 "No.samples reqd to reestimate for row", ccstruct_->params()),
97 "No.gaps reqd with 1 large gap to treat as a table",
100 "No.gaps reqd with few cert spaces to use certs",
101 ccstruct_->params()),
102 INT_MEMBER(tosp_sanity_method, 1,
"How to avoid being silly",
103 ccstruct_->params()),
105 "Factor for defining space threshold in terms of space and "
107 ccstruct_->params()),
108 double_MEMBER(tosp_threshold_bias1, 0,
"how far between kern and space?",
109 ccstruct_->params()),
110 double_MEMBER(tosp_threshold_bias2, 0,
"how far between kern and space?",
111 ccstruct_->params()),
112 double_MEMBER(tosp_narrow_fraction, 0.3,
"Fract of xheight for narrow",
113 ccstruct_->params()),
115 "narrow if w/h less than this", ccstruct_->params()),
116 double_MEMBER(tosp_wide_fraction, 0.52,
"Fract of xheight for wide",
117 ccstruct_->params()),
118 double_MEMBER(tosp_wide_aspect_ratio, 0.0,
"wide if w/h less than this",
119 ccstruct_->params()),
121 "Fract of xheight for fuzz sp", ccstruct_->params()),
123 "Fract of xheight for fuzz sp", ccstruct_->params()),
125 "Fract of xheight for fuzz sp", ccstruct_->params()),
126 double_MEMBER(tosp_gap_factor, 0.83,
"gap ratio to flip sp->kern",
127 ccstruct_->params()),
128 double_MEMBER(tosp_kern_gap_factor1, 2.0,
"gap ratio to flip kern->sp",
129 ccstruct_->params()),
130 double_MEMBER(tosp_kern_gap_factor2, 1.3,
"gap ratio to flip kern->sp",
131 ccstruct_->params()),
132 double_MEMBER(tosp_kern_gap_factor3, 2.5,
"gap ratio to flip kern->sp",
133 ccstruct_->params()),
135 ccstruct_->params()),
136 double_MEMBER(tosp_ignore_very_big_gaps, 3.5,
"xht multiplier",
137 ccstruct_->params()),
138 double_MEMBER(tosp_rep_space, 1.6,
"rep gap multiplier for space",
139 ccstruct_->params()),
141 "Fract of kerns reqd for isolated row stats",
142 ccstruct_->params()),
144 "Min difference of kn & sp in table", ccstruct_->params()),
146 "Expect spaces bigger than this", ccstruct_->params()),
148 "Fuzzy if less than this", ccstruct_->params()),
149 double_MEMBER(tosp_fuzzy_kn_fraction, 0.5,
"New fuzzy kn alg",
150 ccstruct_->params()),
151 double_MEMBER(tosp_fuzzy_sp_fraction, 0.5,
"New fuzzy sp alg",
152 ccstruct_->params()),
154 "Don't trust spaces less than this time kn",
155 ccstruct_->params()),
157 "Thresh guess - mult kn by this", ccstruct_->params()),
159 "Thresh guess - mult xht by this", ccstruct_->params()),
161 "Multiplier on kn to limit thresh", ccstruct_->params()),
163 "Don't autoflip kn to sp when large separation",
164 ccstruct_->params()),
166 "Limit use of xht gap with large kns", ccstruct_->params()),
168 "Limit use of xht gap with odd small kns",
169 ccstruct_->params()),
171 "Don't reduce box if the top left is non blank",
172 ccstruct_->params()),
174 "Don't let sp minus kn get too small", ccstruct_->params()),
176 "How wide fuzzies need context", ccstruct_->params()),
178 BOOL_MEMBER(textord_no_rejects, false,
"Don't remove noise blobs",
179 ccstruct_->params()),
180 BOOL_MEMBER(textord_show_blobs, false,
"Display unsorted blobs",
181 ccstruct_->params()),
182 BOOL_MEMBER(textord_show_boxes, false,
"Display unsorted blobs",
183 ccstruct_->params()),
184 INT_MEMBER(textord_max_noise_size, 7,
"Pixel size of noise",
185 ccstruct_->params()),
186 INT_MEMBER(textord_baseline_debug, 0,
"Baseline debug level",
187 ccstruct_->params()),
189 "Fraction of bounding box for noise", ccstruct_->params()),
191 "Ile of sizes for xheight guess", ccstruct_->params()),
193 "Ile of sizes for xheight guess", ccstruct_->params()),
194 INT_MEMBER(textord_noise_sizefraction, 10,
"Fraction of size for maxima",
195 ccstruct_->params()),
197 "Fraction of x for big t count", ccstruct_->params()),
198 INT_MEMBER(textord_noise_translimit, 16,
"Transitions for normal blob",
199 ccstruct_->params()),
201 "Dot to norm ratio for deletion", ccstruct_->params()),
202 BOOL_MEMBER(textord_noise_rejwords, true,
"Reject noise-like words",
203 ccstruct_->params()),
204 BOOL_MEMBER(textord_noise_rejrows, true,
"Reject noise-like rows",
205 ccstruct_->params()),
207 "xh fract height error for norm blobs",
208 ccstruct_->params()),
210 "xh fract width error for norm blobs", ccstruct_->params()),
212 "Height fraction to discard outlines as speckle noise",
213 ccstruct_->params()),
214 INT_MEMBER(textord_noise_sncount, 1,
"super norm blobs to save row",
215 ccstruct_->params()),
217 "Dot to norm ratio for deletion", ccstruct_->params()),
218 BOOL_MEMBER(textord_noise_debug, false,
"Debug row garbage detector",
219 ccstruct_->params()),
220 double_MEMBER(textord_blshift_maxshift, 0.00,
"Max baseline shift",
221 ccstruct_->params()),
223 "Min size of baseline shift", ccstruct_->params()) {}
227 int width,
int height, Pix* binary_pix,
228 Pix* thresholds_pix, Pix* grey_pix,
229 bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs,
230 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
231 page_tr_.
set_x(width);
232 page_tr_.
set_y(height);
233 if (to_blocks->empty()) {
236 TO_BLOCK_IT it(to_blocks);
237 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
252 const FCOORD anticlockwise90(0.0f, 1.0f);
253 const FCOORD clockwise90(0.0f, -1.0f);
254 TO_BLOCK_IT it(to_blocks);
255 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
262 to_block->
rotate(anticlockwise90);
270 TO_BLOCK_IT to_block_it(to_blocks);
271 TO_BLOCK* to_block = to_block_it.data();
276 gradient =
make_rows(page_tr_, to_blocks);
280 to_block, to_blocks);
293 make_words(
this, page_tr_, gradient, blocks, to_blocks);
298 TO_BLOCK* to_block = to_block_it.data();
304 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
307 BLOCK_IT b_it(blocks);
308 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
309 b_it.data()->compute_row_margins();
311 #ifndef GRAPHICS_DISABLED
324 float row_total_conf = 0.0f;
325 int row_word_count = 0;
327 float best_conf = 0.0f;
333 row_total_conf /= row_word_count;
334 if (best_row ==
nullptr || best_conf < row_total_conf) {
336 best_conf = row_total_conf;
338 row_total_conf = 0.0f;
344 if (it.
row() != best_row)