21 #ifndef TESSERACT_TEXTORD_TEXTORD_H__
22 #define TESSERACT_TEXTORD_TEXTORD_H__
46 int height = bounding_box_.
height();
47 bounding_box_.
pad(height, height);
70 explicit Textord(
CCStruct* ccstruct);
83 int height, Pix *binary_pix, Pix *thresholds_pix,
84 Pix *grey_pix,
bool use_box_bottoms,
85 BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
86 TO_BLOCK_LIST *to_blocks);
93 return use_cjk_fp_model_;
96 use_cjk_fp_model_ = flag;
102 TO_BLOCK_LIST *blocks
111 void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
112 void filter_blobs(
ICOORD page_tr, TO_BLOCK_LIST *blocks,
BOOL8 testing_on);
121 bool use_cjk_fp_model_;
126 int width,
int height, TO_BLOCK_LIST* to_blocks);
128 void MakeBlockRows(
int min_spacing,
int max_spacing,
133 void compute_block_xheight(
TO_BLOCK *block,
float gradient);
134 void compute_row_xheight(
TO_ROW *row,
137 int block_line_size);
138 void make_spline_rows(
TO_BLOCK *block,
143 void make_old_baselines(
TO_BLOCK *block,
146 void correlate_lines(
TO_BLOCK *block,
float gradient);
147 void correlate_neighbours(
TO_BLOCK *block,
150 int correlate_with_stats(
TO_ROW **rows,
153 void find_textlines(
TO_BLOCK *block,
159 void block_spacing_stats(
TO_BLOCK *block,
161 BOOL8 &old_text_ord_proportional,
163 inT16 &block_space_gap_width,
165 inT16 &block_non_space_gap_width
167 void row_spacing_stats(
TO_ROW *row,
172 inT16 block_space_gap_width,
174 inT16 block_non_space_gap_width
176 void old_to_method(
TO_ROW *row,
177 STATS *all_gap_stats,
178 STATS *space_gap_stats,
179 STATS *small_gap_stats,
180 inT16 block_space_gap_width,
182 inT16 block_non_space_gap_width
186 STATS *all_gap_stats,
187 BOOL8 suspected_table,
191 void improve_row_threshold(
TO_ROW *row,
STATS *all_gap_stats);
196 inT16 real_current_gap,
197 inT16 within_xht_current_gap,
203 BOOL8& prev_gap_was_a_space,
204 BOOL8& break_at_next_gap);
208 void peek_at_next_gap(
TO_ROW *row,
212 inT16 &next_within_xht_gap);
213 void mark_gap(
TBOX blob,
216 inT16 prev_blob_width,
218 inT16 next_blob_width,
220 float find_mean_blob_spacing(
WERD *word);
232 float filter_noise_blobs(BLOBNBOX_LIST *src_list,
233 BLOBNBOX_LIST *noise_list,
234 BLOBNBOX_LIST *small_list,
235 BLOBNBOX_LIST *large_list);
240 void cleanup_nontext_block(
BLOCK* block);
241 void cleanup_blocks(
bool clean_noise, BLOCK_LIST *blocks);
242 BOOL8 clean_noise_from_row(
ROW *row);
243 void clean_noise_from_words(
ROW *row);
246 void clean_small_noise_from_words(
ROW *row);
250 void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
255 void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs,
256 const FCOORD &rotation, WordGrid *word_grid);
261 "Script has no xheight, so use a single mode for horizontal text");
263 BOOL_VAR_H(tosp_old_to_method,
false,
"Space stats use prechopping?");
264 BOOL_VAR_H(tosp_old_to_constrain_sp_kn,
false,
265 "Constrain relative values of inter and intra-word gaps for "
268 "Block stats to use fixed pitch rows?");
269 BOOL_VAR_H(tosp_force_wordbreak_on_punct,
false,
270 "Force word breaks on punct to break long lines in non-space "
273 "Space stats use prechopping?");
275 "Fix suspected bug in old code");
277 "Only stat OBVIOUS spaces");
279 "Only stat OBVIOUS spaces");
281 "Only stat OBVIOUS spaces");
283 "Only stat OBVIOUS spaces");
284 BOOL_VAR_H(tosp_recovery_isolated_row_stats,
true,
285 "Use row alone when inadequate cert spaces");
286 BOOL_VAR_H(tosp_only_small_gaps_for_kern,
false,
"Better guess");
287 BOOL_VAR_H(tosp_all_flips_fuzzy,
false,
"Pass ANY flip to context?");
289 "Dont restrict kn->sp fuzzy limit to tables");
291 "Use within xht gap for wd breaks");
293 "Use within xht gap for wd breaks");
295 "Only use within xht gap for wd breaks");
297 "Dont chng kn to space next to punct");
301 "Enable improvement heuristic");
303 INT_VAR_H(tosp_enough_space_samples_for_median, 3,
304 "or should we use mean");
306 "No.samples reqd to reestimate for row");
308 "No.gaps reqd with 1 large gap to treat as a table");
310 "No.gaps reqd with few cert spaces to use certs");
311 INT_VAR_H(tosp_sanity_method, 1,
"How to avoid being silly");
313 "Factor for defining space threshold in terms of space and "
316 "how far between kern and space?");
318 "how far between kern and space?");
320 "Fract of xheight for narrow");
322 "narrow if w/h less than this");
325 "wide if w/h less than this");
327 "Fract of xheight for fuzz sp");
329 "Fract of xheight for fuzz sp");
331 "Fract of xheight for fuzz sp");
334 "gap ratio to flip kern->sp");
336 "gap ratio to flip kern->sp");
338 "gap ratio to flip kern->sp");
343 "Fract of kerns reqd for isolated row stats");
345 "Min difference of kn & sp in table");
347 "Expect spaces bigger than this");
349 "Fuzzy if less than this");
353 "Dont trust spaces less than this time kn");
355 "Thresh guess - mult kn by this");
357 "Thresh guess - mult xht by this");
359 "Multiplier on kn to limit thresh");
361 "Dont autoflip kn to sp when large separation");
363 "Limit use of xht gap with large kns");
365 "Limit use of xht gap with odd small kns");
367 "Dont reduce box if the top left is non blank");
369 "Dont let sp minus kn get too small");
371 "How wide fuzzies need context");
373 BOOL_VAR_H(textord_no_rejects,
false,
"Don't remove noise blobs");
374 BOOL_VAR_H(textord_show_blobs,
false,
"Display unsorted blobs");
376 INT_VAR_H(textord_max_noise_size, 7,
"Pixel size of noise");
377 INT_VAR_H(textord_baseline_debug, 0,
"Baseline debug level");
378 double_VAR_H(textord_blob_size_bigile, 95,
"Percentile for large blobs");
380 "Fraction of bounding box for noise");
381 double_VAR_H(textord_blob_size_smallile, 20,
"Percentile for small blobs");
382 double_VAR_H(textord_initialx_ile, 0.75,
"Ile of sizes for xheight guess");
383 double_VAR_H(textord_initialasc_ile, 0.90,
"Ile of sizes for xheight guess");
384 INT_VAR_H(textord_noise_sizefraction, 10,
"Fraction of size for maxima");
385 double_VAR_H(textord_noise_sizelimit, 0.5,
"Fraction of x for big t count");
386 INT_VAR_H(textord_noise_translimit, 16,
"Transitions for normal blob");
387 double_VAR_H(textord_noise_normratio, 2.0,
"Dot to norm ratio for deletion");
388 BOOL_VAR_H(textord_noise_rejwords,
true,
"Reject noise-like words");
389 BOOL_VAR_H(textord_noise_rejrows,
true,
"Reject noise-like rows");
390 double_VAR_H(textord_noise_syfract, 0.2,
"xh fract error for norm blobs");
392 "xh fract width error for norm blobs");
394 "Height fraction to discard outlines as speckle noise");
395 INT_VAR_H(textord_noise_sncount, 1,
"super norm blobs to save row");
396 double_VAR_H(textord_noise_rowratio, 6.0,
"Dot to norm ratio for deletion");
399 double_VAR_H(textord_blshift_xfraction, 9.99,
"Min size of baseline shift");
403 #endif // TESSERACT_TEXTORD_TEXTORD_H__
void set_use_cjk_fp_model(bool flag)
#define CLISTIZEH(CLASSNAME)
TBOX true_bounding_box() const
#define INT_VAR_H(name, val, comment)
C_BLOB_LIST * RejBlobs() const
void pad(int xpad, int ypad)
bool use_cjk_fp_model() const
#define double_VAR_H(name, val, comment)
const TBOX & bounding_box() const
TBOX true_bounding_box() const
const WERD * word() const
#define BOOL_VAR_H(name, val, comment)
C_BLOB_LIST * rej_cblob_list()