tesseract  4.0.0-1-g2a2b
tesseract::Textord Class Reference

#include <textord.h>

Public Member Functions

 Textord (CCStruct *ccstruct)
 
 ~Textord ()=default
 
void TextordPage (PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res)
 
bool use_cjk_fp_model () const
 
void set_use_cjk_fp_model (bool flag)
 
void to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
 
ROWmake_prop_words (TO_ROW *row, FCOORD rotation)
 
ROWmake_blob_words (TO_ROW *row, FCOORD rotation)
 
void find_components (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
 
void compute_block_xheight (TO_BLOCK *block, float gradient)
 
void make_spline_rows (TO_BLOCK *block, float gradient, bool testing_on)
 
compute_row_xheight

Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time. Set xheigh_evidence to the number of blobs with the chosen xheight that appear in this row.

void compute_row_xheight (TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
 

Public Attributes

bool textord_single_height_mode = false
 
bool tosp_old_to_method = false
 
bool tosp_old_to_constrain_sp_kn = false
 
bool tosp_only_use_prop_rows = true
 
bool tosp_force_wordbreak_on_punct = false
 
bool tosp_use_pre_chopping = false
 
bool tosp_old_to_bug_fix = false
 
bool tosp_block_use_cert_spaces = true
 
bool tosp_row_use_cert_spaces = true
 
bool tosp_narrow_blobs_not_cert = true
 
bool tosp_row_use_cert_spaces1 = true
 
bool tosp_recovery_isolated_row_stats = true
 
bool tosp_only_small_gaps_for_kern = false
 
bool tosp_all_flips_fuzzy = false
 
bool tosp_fuzzy_limit_all = true
 
bool tosp_stats_use_xht_gaps = true
 
bool tosp_use_xht_gaps = true
 
bool tosp_only_use_xht_gaps = false
 
bool tosp_rule_9_test_punct = false
 
bool tosp_flip_fuzz_kn_to_sp = true
 
bool tosp_flip_fuzz_sp_to_kn = true
 
bool tosp_improve_thresh = false
 
int tosp_debug_level = 0
 
int tosp_enough_space_samples_for_median = 3
 
int tosp_redo_kern_limit = 10
 
int tosp_few_samples = 40
 
int tosp_short_row = 20
 
int tosp_sanity_method = 1
 
double tosp_old_sp_kn_th_factor = 2.0
 
double tosp_threshold_bias1 = 0
 
double tosp_threshold_bias2 = 0
 
double tosp_narrow_fraction = 0.3
 
double tosp_narrow_aspect_ratio = 0.48
 
double tosp_wide_fraction = 0.52
 
double tosp_wide_aspect_ratio = 0.0
 
double tosp_fuzzy_space_factor = 0.6
 
double tosp_fuzzy_space_factor1 = 0.5
 
double tosp_fuzzy_space_factor2 = 0.72
 
double tosp_gap_factor = 0.83
 
double tosp_kern_gap_factor1 = 2.0
 
double tosp_kern_gap_factor2 = 1.3
 
double tosp_kern_gap_factor3 = 2.5
 
double tosp_ignore_big_gaps = -1
 
double tosp_ignore_very_big_gaps = 3.5
 
double tosp_rep_space = 1.6
 
double tosp_enough_small_gaps = 0.65
 
double tosp_table_kn_sp_ratio = 2.25
 
double tosp_table_xht_sp_ratio = 0.33
 
double tosp_table_fuzzy_kn_sp_ratio = 3.0
 
double tosp_fuzzy_kn_fraction = 0.5
 
double tosp_fuzzy_sp_fraction = 0.5
 
double tosp_min_sane_kn_sp = 1.5
 
double tosp_init_guess_kn_mult = 2.2
 
double tosp_init_guess_xht_mult = 0.28
 
double tosp_max_sane_kn_thresh = 5.0
 
double tosp_flip_caution = 0.0
 
double tosp_large_kerning = 0.19
 
double tosp_dont_fool_with_small_kerns = -1
 
double tosp_near_lh_edge = 0
 
double tosp_silly_kn_sp_gap = 0.2
 
double tosp_pass_wide_fuzz_sp_to_context = 0.75
 
bool textord_no_rejects = false
 
bool textord_show_blobs = false
 
bool textord_show_boxes = false
 
int textord_max_noise_size = 7
 
int textord_baseline_debug = 0
 
double textord_blob_size_bigile = 95
 
double textord_noise_area_ratio = 0.7
 
double textord_blob_size_smallile = 20
 
double textord_initialx_ile = 0.75
 
double textord_initialasc_ile = 0.90
 
int textord_noise_sizefraction = 10
 
double textord_noise_sizelimit = 0.5
 
int textord_noise_translimit = 16
 
double textord_noise_normratio = 2.0
 
bool textord_noise_rejwords = true
 
bool textord_noise_rejrows = true
 
double textord_noise_syfract = 0.2
 
double textord_noise_sxfract = 0.4
 
double textord_noise_hfract = 1.0/64
 
int textord_noise_sncount = 1
 
double textord_noise_rowratio = 6.0
 
bool textord_noise_debug = FALSE
 
double textord_blshift_maxshift = 0.00
 
double textord_blshift_xfraction = 9.99
 

Detailed Description

Definition at line 68 of file textord.h.

Constructor & Destructor Documentation

◆ Textord()

tesseract::Textord::Textord ( CCStruct ccstruct)
explicit

Definition at line 35 of file textord.cpp.

36  : ccstruct_(ccstruct),
37  use_cjk_fp_model_(false),
38  // makerow.cpp ///////////////////////////////////////////
40  "Script has no xheight, so use a single mode",
41  ccstruct_->params()),
42  // tospace.cpp ///////////////////////////////////////////
43  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
44  ccstruct_->params()),
46  "Constrain relative values of inter and intra-word gaps for "
47  "old_to_method.",
48  ccstruct_->params()),
50  "Block stats to use fixed pitch rows?", ccstruct_->params()),
52  "Force word breaks on punct to break long lines in non-space "
53  "delimited langs",
54  ccstruct_->params()),
55  BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
56  ccstruct_->params()),
57  BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
58  ccstruct_->params()),
59  BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
60  ccstruct_->params()),
61  BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
62  ccstruct_->params()),
63  BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
64  ccstruct_->params()),
65  BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
66  ccstruct_->params()),
68  "Use row alone when inadequate cert spaces",
69  ccstruct_->params()),
70  BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
71  ccstruct_->params()),
72  BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
73  ccstruct_->params()),
75  "Don't restrict kn->sp fuzzy limit to tables",
76  ccstruct_->params()),
78  "Use within xht gap for wd breaks", ccstruct_->params()),
79  BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
80  ccstruct_->params()),
82  "Only use within xht gap for wd breaks", ccstruct_->params()),
84  "Don't chng kn to space next to punct", ccstruct_->params()),
85  BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
86  ccstruct_->params()),
87  BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
88  ccstruct_->params()),
89  BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
90  ccstruct_->params()),
91  INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
93  "or should we use mean", ccstruct_->params()),
95  "No.samples reqd to reestimate for row", ccstruct_->params()),
97  "No.gaps reqd with 1 large gap to treat as a table",
98  ccstruct_->params()),
100  "No.gaps reqd with few cert spaces to use certs",
101  ccstruct_->params()),
102  INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
103  ccstruct_->params()),
105  "Factor for defining space threshold in terms of space and "
106  "kern sizes",
107  ccstruct_->params()),
108  double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
109  ccstruct_->params()),
110  double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
111  ccstruct_->params()),
112  double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
113  ccstruct_->params()),
115  "narrow if w/h less than this", ccstruct_->params()),
116  double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
117  ccstruct_->params()),
118  double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
119  ccstruct_->params()),
121  "Fract of xheight for fuzz sp", ccstruct_->params()),
123  "Fract of xheight for fuzz sp", ccstruct_->params()),
125  "Fract of xheight for fuzz sp", ccstruct_->params()),
126  double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
127  ccstruct_->params()),
128  double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
129  ccstruct_->params()),
130  double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
131  ccstruct_->params()),
132  double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
133  ccstruct_->params()),
134  double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
135  ccstruct_->params()),
136  double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
137  ccstruct_->params()),
138  double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
139  ccstruct_->params()),
141  "Fract of kerns reqd for isolated row stats",
142  ccstruct_->params()),
144  "Min difference of kn & sp in table", ccstruct_->params()),
146  "Expect spaces bigger than this", ccstruct_->params()),
148  "Fuzzy if less than this", ccstruct_->params()),
149  double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
150  ccstruct_->params()),
151  double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
152  ccstruct_->params()),
154  "Don't trust spaces less than this time kn",
155  ccstruct_->params()),
157  "Thresh guess - mult kn by this", ccstruct_->params()),
159  "Thresh guess - mult xht by this", ccstruct_->params()),
161  "Multiplier on kn to limit thresh", ccstruct_->params()),
163  "Don't autoflip kn to sp when large separation",
164  ccstruct_->params()),
166  "Limit use of xht gap with large kns", ccstruct_->params()),
168  "Limit use of xht gap with odd small kns",
169  ccstruct_->params()),
171  "Don't reduce box if the top left is non blank",
172  ccstruct_->params()),
174  "Don't let sp minus kn get too small", ccstruct_->params()),
176  "How wide fuzzies need context", ccstruct_->params()),
177  // tordmain.cpp ///////////////////////////////////////////
178  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
179  ccstruct_->params()),
180  BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
181  ccstruct_->params()),
182  BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
183  ccstruct_->params()),
184  INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
185  ccstruct_->params()),
186  INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
187  ccstruct_->params()),
188  double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
189  ccstruct_->params()),
191  "Fraction of bounding box for noise", ccstruct_->params()),
193  "Percentile for small blobs", ccstruct_->params()),
195  "Ile of sizes for xheight guess", ccstruct_->params()),
197  "Ile of sizes for xheight guess", ccstruct_->params()),
198  INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
199  ccstruct_->params()),
201  "Fraction of x for big t count", ccstruct_->params()),
202  INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
203  ccstruct_->params()),
205  "Dot to norm ratio for deletion", ccstruct_->params()),
206  BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
207  ccstruct_->params()),
208  BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
209  ccstruct_->params()),
211  "xh fract height error for norm blobs",
212  ccstruct_->params()),
214  "xh fract width error for norm blobs", ccstruct_->params()),
216  "Height fraction to discard outlines as speckle noise",
217  ccstruct_->params()),
218  INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
219  ccstruct_->params()),
221  "Dot to norm ratio for deletion", ccstruct_->params()),
222  BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
223  ccstruct_->params()),
224  double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
225  ccstruct_->params()),
227  "Min size of baseline shift", ccstruct_->params()) {}
double textord_blob_size_bigile
Definition: textord.h:378
int tosp_enough_space_samples_for_median
Definition: textord.h:304
double tosp_threshold_bias2
Definition: textord.h:318
bool tosp_row_use_cert_spaces1
Definition: textord.h:283
bool tosp_block_use_cert_spaces
Definition: textord.h:277
double tosp_gap_factor
Definition: textord.h:332
double tosp_wide_fraction
Definition: textord.h:323
bool tosp_only_small_gaps_for_kern
Definition: textord.h:286
double tosp_fuzzy_sp_fraction
Definition: textord.h:351
double tosp_ignore_big_gaps
Definition: textord.h:339
bool tosp_stats_use_xht_gaps
Definition: textord.h:291
double tosp_pass_wide_fuzz_sp_to_context
Definition: textord.h:371
double tosp_old_sp_kn_th_factor
Definition: textord.h:314
bool textord_single_height_mode
Definition: textord.h:261
double tosp_dont_fool_with_small_kerns
Definition: textord.h:365
double tosp_rep_space
Definition: textord.h:341
double tosp_ignore_very_big_gaps
Definition: textord.h:340
double tosp_enough_small_gaps
Definition: textord.h:343
double textord_noise_sxfract
Definition: textord.h:392
int textord_max_noise_size
Definition: textord.h:376
double tosp_large_kerning
Definition: textord.h:363
double tosp_threshold_bias1
Definition: textord.h:316
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:288
bool tosp_flip_fuzz_kn_to_sp
Definition: textord.h:298
int textord_noise_sizefraction
Definition: textord.h:384
bool textord_show_boxes
Definition: textord.h:375
bool textord_no_rejects
Definition: textord.h:373
int textord_noise_sncount
Definition: textord.h:395
double textord_noise_normratio
Definition: textord.h:387
bool tosp_narrow_blobs_not_cert
Definition: textord.h:281
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:291
double tosp_near_lh_edge
Definition: textord.h:367
double tosp_kern_gap_factor1
Definition: textord.h:334
double tosp_min_sane_kn_sp
Definition: textord.h:353
double tosp_table_kn_sp_ratio
Definition: textord.h:345
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:297
double textord_blob_size_smallile
Definition: textord.h:381
int textord_baseline_debug
Definition: textord.h:377
bool tosp_rule_9_test_punct
Definition: textord.h:297
bool tosp_old_to_bug_fix
Definition: textord.h:275
double tosp_fuzzy_kn_fraction
Definition: textord.h:350
double textord_noise_hfract
Definition: textord.h:394
double textord_blshift_maxshift
Definition: textord.h:398
bool tosp_only_use_prop_rows
Definition: textord.h:268
double tosp_table_fuzzy_kn_sp_ratio
Definition: textord.h:349
bool textord_show_blobs
Definition: textord.h:374
double tosp_kern_gap_factor3
Definition: textord.h:338
double tosp_fuzzy_space_factor
Definition: textord.h:327
bool tosp_only_use_xht_gaps
Definition: textord.h:295
int textord_noise_translimit
Definition: textord.h:386
ParamsVectors * params()
Definition: ccutil.h:62
double tosp_table_xht_sp_ratio
Definition: textord.h:347
double textord_initialasc_ile
Definition: textord.h:383
double tosp_init_guess_kn_mult
Definition: textord.h:355
double tosp_fuzzy_space_factor2
Definition: textord.h:331
double textord_noise_rowratio
Definition: textord.h:396
bool tosp_flip_fuzz_sp_to_kn
Definition: textord.h:299
bool tosp_row_use_cert_spaces
Definition: textord.h:279
bool tosp_fuzzy_limit_all
Definition: textord.h:289
int tosp_redo_kern_limit
Definition: textord.h:306
bool tosp_recovery_isolated_row_stats
Definition: textord.h:285
double tosp_narrow_aspect_ratio
Definition: textord.h:322
int tosp_sanity_method
Definition: textord.h:311
double textord_blshift_xfraction
Definition: textord.h:399
double tosp_max_sane_kn_thresh
Definition: textord.h:359
bool tosp_use_pre_chopping
Definition: textord.h:273
double textord_noise_area_ratio
Definition: textord.h:380
bool tosp_old_to_method
Definition: textord.h:263
bool textord_noise_rejwords
Definition: textord.h:388
double tosp_kern_gap_factor2
Definition: textord.h:336
bool textord_noise_rejrows
Definition: textord.h:389
double tosp_narrow_fraction
Definition: textord.h:320
double tosp_silly_kn_sp_gap
Definition: textord.h:369
double tosp_fuzzy_space_factor1
Definition: textord.h:329
bool tosp_force_wordbreak_on_punct
Definition: textord.h:271
double textord_initialx_ile
Definition: textord.h:382
double textord_noise_sizelimit
Definition: textord.h:385
double tosp_wide_aspect_ratio
Definition: textord.h:325
double textord_noise_syfract
Definition: textord.h:390
bool tosp_improve_thresh
Definition: textord.h:301
double tosp_init_guess_xht_mult
Definition: textord.h:357
bool tosp_use_xht_gaps
Definition: textord.h:293
double tosp_flip_caution
Definition: textord.h:361
bool tosp_all_flips_fuzzy
Definition: textord.h:287
bool textord_noise_debug
Definition: textord.h:397
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:266

◆ ~Textord()

tesseract::Textord::~Textord ( )
default

Member Function Documentation

◆ CleanupSingleRowResult()

void tesseract::Textord::CleanupSingleRowResult ( PageSegMode  pageseg_mode,
PAGE_RES page_res 
)

Definition at line 322 of file textord.cpp.

323  {
324  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
325  return; // No cleanup required.
326  PAGE_RES_IT it(page_res);
327  // Find the best row, being the greatest mean word conf.
328  float row_total_conf = 0.0f;
329  int row_word_count = 0;
330  ROW_RES* best_row = nullptr;
331  float best_conf = 0.0f;
332  for (it.restart_page(); it.word() != nullptr; it.forward()) {
333  WERD_RES* word = it.word();
334  row_total_conf += word->best_choice->certainty();
335  ++row_word_count;
336  if (it.next_row() != it.row()) {
337  row_total_conf /= row_word_count;
338  if (best_row == nullptr || best_conf < row_total_conf) {
339  best_row = it.row();
340  best_conf = row_total_conf;
341  }
342  row_total_conf = 0.0f;
343  row_word_count = 0;
344  }
345  }
346  // Now eliminate any word not in the best row.
347  for (it.restart_page(); it.word() != nullptr; it.forward()) {
348  if (it.row() != best_row)
349  it.DeleteCurrentWord();
350  }
351 }
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:200
float certainty() const
Definition: ratngs.h:330
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:206
WERD_CHOICE * best_choice
Definition: pageres.h:235
ROW * row
Definition: pageres.h:143
WERD * word
Definition: pageres.h:189

◆ compute_block_xheight()

void tesseract::Textord::compute_block_xheight ( TO_BLOCK block,
float  gradient 
)

Definition at line 1256 of file makerow.cpp.

1256  {
1257  TO_ROW *row; // current row
1258  float asc_frac_xheight = CCStruct::kAscenderFraction /
1260  float desc_frac_xheight = CCStruct::kDescenderFraction /
1262  int32_t min_height, max_height; // limits on xheight
1263  TO_ROW_IT row_it = block->get_rows();
1264  if (row_it.empty()) return; // no rows
1265 
1266  // Compute the best guess of xheight of each row individually.
1267  // Use xheight and ascrise values of the rows where ascenders were found.
1268  get_min_max_xheight(block->line_size, &min_height, &max_height);
1269  STATS row_asc_xheights(min_height, max_height + 1);
1270  STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
1271  static_cast<int>(max_height * asc_frac_xheight) + 1);
1272  int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
1273  int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
1274  STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
1275  STATS row_desc_xheights(min_height, max_height + 1);
1276  STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
1277  STATS row_cap_xheights(min_height, max_height + 1);
1278  STATS row_cap_floating_xheights(min_height, max_height + 1);
1279  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1280  row = row_it.data();
1281  // Compute the xheight of this row if it has not been computed before.
1282  if (row->xheight <= 0.0) {
1284  gradient, block->line_size);
1285  }
1286  ROW_CATEGORY row_category = get_row_category(row);
1287  if (row_category == ROW_ASCENDERS_FOUND) {
1288  row_asc_xheights.add(static_cast<int32_t>(row->xheight),
1289  row->xheight_evidence);
1290  row_asc_ascrise.add(static_cast<int32_t>(row->ascrise),
1291  row->xheight_evidence);
1292  row_asc_descdrop.add(static_cast<int32_t>(-row->descdrop),
1293  row->xheight_evidence);
1294  } else if (row_category == ROW_DESCENDERS_FOUND) {
1295  row_desc_xheights.add(static_cast<int32_t>(row->xheight),
1296  row->xheight_evidence);
1297  row_desc_descdrop.add(static_cast<int32_t>(-row->descdrop),
1298  row->xheight_evidence);
1299  } else if (row_category == ROW_UNKNOWN) {
1300  fill_heights(row, gradient, min_height, max_height,
1301  &row_cap_xheights, &row_cap_floating_xheights);
1302  }
1303  }
1304 
1305  float xheight = 0.0;
1306  float ascrise = 0.0;
1307  float descdrop = 0.0;
1308  // Compute our best guess of xheight of this block.
1309  if (row_asc_xheights.get_total() > 0) {
1310  // Determine xheight from rows where ascenders were found.
1311  xheight = row_asc_xheights.median();
1312  ascrise = row_asc_ascrise.median();
1313  descdrop = -row_asc_descdrop.median();
1314  } else if (row_desc_xheights.get_total() > 0) {
1315  // Determine xheight from rows where descenders were found.
1316  xheight = row_desc_xheights.median();
1317  descdrop = -row_desc_descdrop.median();
1318  } else if (row_cap_xheights.get_total() > 0) {
1319  // All the rows in the block were (a/de)scenderless.
1320  // Try to search for two modes in row_cap_heights that could
1321  // be the xheight and the capheight (e.g. some of the rows
1322  // were lowercase, but did not have enough (a/de)scenders.
1323  // If such two modes can not be found, this block is most
1324  // likely all caps (or all small caps, in which case the code
1325  // still works as intended).
1326  compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights,
1328  block->block->classify_rotation().y() == 0.0,
1329  min_height, max_height, &(xheight), &(ascrise));
1330  if (ascrise == 0) { // assume only caps in the whole block
1331  xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio;
1332  }
1333  } else { // default block sizes
1334  xheight = block->line_size * CCStruct::kXHeightFraction;
1335  }
1336  // Correct xheight, ascrise and descdrop if necessary.
1337  bool corrected_xheight = false;
1338  if (xheight < textord_min_xheight) {
1339  xheight = static_cast<float>(textord_min_xheight);
1340  corrected_xheight = true;
1341  }
1342  if (corrected_xheight || ascrise <= 0.0) {
1343  ascrise = xheight * asc_frac_xheight;
1344  }
1345  if (corrected_xheight || descdrop >= 0.0) {
1346  descdrop = -(xheight * desc_frac_xheight);
1347  }
1348  block->xheight = xheight;
1349 
1350  if (textord_debug_xheights) {
1351  tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n",
1352  xheight, ascrise, descdrop);
1353  }
1354  // Correct xheight, ascrise, descdrop of rows based on block averages.
1355  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1356  correct_row_xheight(row_it.data(), xheight, ascrise, descdrop);
1357  }
1358 }
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:122
float descdrop
Definition: blobbox.h:673
int xheight_evidence
Definition: blobbox.h:671
bool textord_single_height_mode
Definition: textord.h:261
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1687
int textord_min_xheight
Definition: makerow.cpp:68
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1368
Definition: statistc.h:33
static const double kDescenderFraction
Definition: ccstruct.h:33
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
static const double kXHeightFraction
Definition: ccstruct.h:34
float xheight
Definition: blobbox.h:670
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1469
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:115
FCOORD classify_rotation() const
Definition: ocrblock.h:142
float xheight
Definition: blobbox.h:801
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float ascrise
Definition: blobbox.h:672
BLOCK * block
Definition: blobbox.h:790
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1408
static const double kAscenderFraction
Definition: ccstruct.h:35
static const double kXHeightCapRatio
Definition: ccstruct.h:37
ROW_CATEGORY
Definition: makerow.h:35
float y() const
Definition: points.h:211
bool textord_debug_xheights
Definition: makerow.cpp:56
float line_size
Definition: blobbox.h:798

◆ compute_row_xheight()

void tesseract::Textord::compute_row_xheight ( TO_ROW row,
const FCOORD rotation,
float  gradient,
int  block_line_size 
)

Definition at line 1368 of file makerow.cpp.

1371  {
1372  // Find blobs representing repeated characters in rows and mark them.
1373  // This information is used for computing row xheight and at a later
1374  // stage when words are formed by make_words.
1375  if (!row->rep_chars_marked()) {
1376  mark_repeated_chars(row);
1377  }
1378 
1379  int min_height, max_height;
1380  get_min_max_xheight(block_line_size, &min_height, &max_height);
1381  STATS heights(min_height, max_height + 1);
1382  STATS floating_heights(min_height, max_height + 1);
1383  fill_heights(row, gradient, min_height, max_height,
1384  &heights, &floating_heights);
1385  row->ascrise = 0.0f;
1386  row->xheight = 0.0f;
1387  row->xheight_evidence =
1388  compute_xheight_from_modes(&heights, &floating_heights,
1390  rotation.y() == 0.0,
1391  min_height, max_height,
1392  &(row->xheight), &(row->ascrise));
1393  row->descdrop = 0.0f;
1394  if (row->xheight > 0.0) {
1395  row->descdrop = static_cast<float>(
1396  compute_row_descdrop(row, gradient, row->xheight_evidence, &heights));
1397  }
1398 }
float descdrop
Definition: blobbox.h:673
int xheight_evidence
Definition: blobbox.h:671
bool textord_single_height_mode
Definition: textord.h:261
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2641
Definition: statistc.h:33
float xheight
Definition: blobbox.h:670
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1469
bool rep_chars_marked() const
Definition: blobbox.h:644
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:115
float ascrise
Definition: blobbox.h:672
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1408
int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
Definition: makerow.cpp:1565
float y() const
Definition: points.h:211

◆ filter_blobs()

void tesseract::Textord::filter_blobs ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks,
bool  testing_on 
)

Definition at line 250 of file tordmain.cpp.

252  { // for plotting
253  TO_BLOCK_IT block_it = blocks; // destination iterator
254  TO_BLOCK *block; // created block
255 
256  #ifndef GRAPHICS_DISABLED
257  if (to_win != nullptr)
258  to_win->Clear();
259  #endif // GRAPHICS_DISABLED
260 
261  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
262  block_it.forward()) {
263  block = block_it.data();
264  block->line_size = filter_noise_blobs(&block->blobs,
265  &block->noise_blobs,
266  &block->small_blobs,
267  &block->large_blobs);
268  if (block->line_size == 0) block->line_size = 1;
269  block->line_spacing = block->line_size *
276 
277  #ifndef GRAPHICS_DISABLED
278  if (textord_show_blobs && testing_on) {
279  if (to_win == nullptr)
280  create_to_win(page_tr);
281  block->plot_graded_blobs(to_win);
282  }
283  if (textord_show_boxes && testing_on) {
284  if (to_win == nullptr)
285  create_to_win(page_tr);
290  }
291  #endif // GRAPHICS_DISABLED
292  }
293 }
double textord_min_linesize
Definition: makerow.cpp:82
double textord_excess_blobsize
Definition: makerow.cpp:84
void plot_box_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
Definition: drawtord.cpp:69
bool textord_show_boxes
Definition: textord.h:375
static const double kDescenderFraction
Definition: ccstruct.h:33
float line_spacing
Definition: blobbox.h:792
static const double kXHeightFraction
Definition: ccstruct.h:34
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1072
float max_blob_size
Definition: blobbox.h:799
bool textord_show_blobs
Definition: textord.h:374
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
static const double kAscenderFraction
Definition: ccstruct.h:35
void Clear()
Definition: scrollview.cpp:591
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:46
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:788
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:787
float line_size
Definition: blobbox.h:798

◆ find_components()

void tesseract::Textord::find_components ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 219 of file tordmain.cpp.

220  {
221  int width = pixGetWidth(pix);
222  int height = pixGetHeight(pix);
223  if (width > INT16_MAX || height > INT16_MAX) {
224  tprintf("Input image too large! (%d, %d)\n", width, height);
225  return; // Can't handle it.
226  }
227 
229 
230  BLOCK_IT block_it(blocks); // iterator
231  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
232  block_it.forward()) {
233  BLOCK* block = block_it.data();
234  if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) {
235  extract_edges(pix, block);
236  }
237  }
238 
239  assign_blobs_to_blocks2(pix, blocks, to_blocks);
240  ICOORD page_tr(width, height);
241  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
242 }
#define LOC_EDGE_PROG
Definition: errcode.h:44
bool textord_test_landscape
Definition: makerow.cpp:49
integer coordinate
Definition: points.h:32
POLY_BLOCK * poly_block() const
Definition: pdblock.h:56
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool IsText() const
Definition: polyblk.h:49
Definition: ocrblock.h:30
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
Definition: tordmain.cpp:250
void set_global_loc_code(int loc_code)
Definition: globaloc.cpp:80
void assign_blobs_to_blocks2(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: tordmain.cpp:168
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
PDBLK pdblk
Definition: ocrblock.h:192

◆ make_blob_words()

ROW * tesseract::Textord::make_blob_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 1185 of file tospace.cpp.

1188  {
1189  bool bol; // start of line
1190  ROW *real_row; // output row
1191  C_OUTLINE_IT cout_it;
1192  C_BLOB_LIST cblobs;
1193  C_BLOB_IT cblob_it = &cblobs;
1194  WERD_LIST words;
1195  WERD *word; // new word
1196  BLOBNBOX *bblob; // current blob
1197  TBOX blob_box; // bounding box
1198  BLOBNBOX_IT box_it; // iterator
1199  int16_t word_count = 0;
1200 
1201  cblob_it.set_to_list(&cblobs);
1202  box_it.set_to_list(row->blob_list());
1203  // new words
1204  WERD_IT word_it(&words);
1205  bol = TRUE;
1206  if (!box_it.empty()) {
1207 
1208  do {
1209  bblob = box_it.data();
1210  blob_box = bblob->bounding_box();
1211  if (bblob->joined_to_prev()) {
1212  if (bblob->cblob() != nullptr) {
1213  cout_it.set_to_list(cblob_it.data()->out_list());
1214  cout_it.move_to_last();
1215  cout_it.add_list_after(bblob->cblob()->out_list());
1216  delete bblob->cblob();
1217  }
1218  } else {
1219  if (bblob->cblob() != nullptr)
1220  cblob_it.add_after_then_move(bblob->cblob());
1221  }
1222  box_it.forward(); // next one
1223  bblob = box_it.data();
1224  blob_box = bblob->bounding_box();
1225 
1226  if (!bblob->joined_to_prev() && !cblobs.empty()) {
1227  word = new WERD(&cblobs, 1, nullptr);
1228  word_count++;
1229  word_it.add_after_then_move(word);
1230  if (bol) {
1231  word->set_flag(W_BOL, TRUE);
1232  bol = FALSE;
1233  }
1234  if (box_it.at_first()) { // at end of line
1235  word->set_flag(W_EOL, TRUE);
1236  }
1237  }
1238  }
1239  while (!box_it.at_first()); // until back at start
1240  /* Setup the row with created words. */
1241  real_row = new ROW(row, (int16_t) row->kern_size, (int16_t) row->space_size);
1242  word_it.set_to_list(real_row->word_list());
1243  //put words in row
1244  word_it.add_list_after(&words);
1245  real_row->recalc_bounding_box();
1246  if (tosp_debug_level > 4) {
1247  tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1248  word_count,
1249  real_row->bounding_box().left(),
1250  real_row->bounding_box().bottom(),
1251  real_row->bounding_box().right(),
1252  real_row->bounding_box().top());
1253  }
1254  return real_row;
1255  }
1256  return nullptr;
1257 }
#define TRUE
Definition: capi.h:51
Definition: rect.h:34
WERD_LIST * word_list()
Definition: ocrrow.h:55
Definition: werd.h:35
float space_size
Definition: blobbox.h:680
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:127
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
#define FALSE
Definition: capi.h:52
float kern_size
Definition: blobbox.h:679
bool joined_to_prev() const
Definition: blobbox.h:257
Definition: werd.h:59
TBOX bounding_box() const
Definition: ocrrow.h:88
Definition: ocrrow.h:36
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: werd.h:34
void recalc_bounding_box()
Definition: ocrrow.cpp:101
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
C_BLOB * cblob() const
Definition: blobbox.h:269
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ make_prop_words()

ROW * tesseract::Textord::make_prop_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 892 of file tospace.cpp.

895  {
896  bool bol; // start of line
897  /* prev_ values are for start of word being built. non prev_ values are for
898  the gap between the word being built and the next one. */
899  bool prev_fuzzy_sp; // probably space
900  bool prev_fuzzy_non; // probably not
901  uint8_t prev_blanks; // in front of word
902  bool fuzzy_sp = false; // probably space
903  bool fuzzy_non = false; // probably not
904  uint8_t blanks = 0; // in front of word
905  bool prev_gap_was_a_space = false;
906  bool break_at_next_gap = false;
907  ROW *real_row; // output row
908  C_OUTLINE_IT cout_it;
909  C_BLOB_LIST cblobs;
910  C_BLOB_IT cblob_it = &cblobs;
911  WERD_LIST words;
912  WERD *word; // new word
913  int32_t next_rep_char_word_right = INT32_MAX;
914  float repetition_spacing; // gap between repetitions
915  int32_t xstarts[2]; // row ends
916  int32_t prev_x; // end of prev blob
917  BLOBNBOX *bblob; // current blob
918  TBOX blob_box; // bounding box
919  BLOBNBOX_IT box_it; // iterator
920  TBOX prev_blob_box;
921  TBOX next_blob_box;
922  int16_t prev_gap = INT16_MAX;
923  int16_t current_gap = INT16_MAX;
924  int16_t next_gap = INT16_MAX;
925  int16_t prev_within_xht_gap = INT16_MAX;
926  int16_t current_within_xht_gap = INT16_MAX;
927  int16_t next_within_xht_gap = INT16_MAX;
928  int16_t word_count = 0;
929 
930  // repeated char words
931  WERD_IT rep_char_it(&(row->rep_words));
932  if (!rep_char_it.empty ()) {
933  next_rep_char_word_right =
934  rep_char_it.data ()->bounding_box ().right ();
935  }
936 
937  prev_x = -INT16_MAX;
938  cblob_it.set_to_list (&cblobs);
939  box_it.set_to_list (row->blob_list ());
940  // new words
941  WERD_IT word_it(&words);
942  bol = true;
943  prev_blanks = 0;
944  prev_fuzzy_sp = false;
945  prev_fuzzy_non = false;
946  if (!box_it.empty ()) {
947  xstarts[0] = box_it.data ()->bounding_box ().left ();
948  if (xstarts[0] > next_rep_char_word_right) {
949  /* We need to insert a repeated char word at the start of the row */
950  word = rep_char_it.extract ();
951  word_it.add_after_then_move (word);
952  /* Set spaces before repeated char word */
953  word->set_flag (W_BOL, true);
954  bol = false;
955  word->set_blanks (0);
956  //NO uncertainty
957  word->set_flag (W_FUZZY_SP, false);
958  word->set_flag (W_FUZZY_NON, false);
959  xstarts[0] = word->bounding_box ().left ();
960  /* Set spaces after repeated char word (and leave current word set) */
961  repetition_spacing = find_mean_blob_spacing (word);
962  current_gap = box_it.data ()->bounding_box ().left () -
963  next_rep_char_word_right;
964  current_within_xht_gap = current_gap;
965  if (current_gap > tosp_rep_space * repetition_spacing) {
966  prev_blanks = (uint8_t) floor (current_gap / row->space_size);
967  if (prev_blanks < 1)
968  prev_blanks = 1;
969  }
970  else
971  prev_blanks = 0;
972  if (tosp_debug_level > 5)
973  tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
974  box_it.data ()->bounding_box ().left (),
975  box_it.data ()->bounding_box ().bottom (),
976  repetition_spacing, current_gap);
977  prev_fuzzy_sp = false;
978  prev_fuzzy_non = false;
979  if (rep_char_it.empty ()) {
980  next_rep_char_word_right = INT32_MAX;
981  }
982  else {
983  rep_char_it.forward ();
984  next_rep_char_word_right =
985  rep_char_it.data ()->bounding_box ().right ();
986  }
987  }
988 
989  peek_at_next_gap(row,
990  box_it,
991  next_blob_box,
992  next_gap,
993  next_within_xht_gap);
994  do {
995  bblob = box_it.data ();
996  blob_box = bblob->bounding_box ();
997  if (bblob->joined_to_prev ()) {
998  if (bblob->cblob () != nullptr) {
999  cout_it.set_to_list (cblob_it.data ()->out_list ());
1000  cout_it.move_to_last ();
1001  cout_it.add_list_after (bblob->cblob ()->out_list ());
1002  delete bblob->cblob ();
1003  }
1004  } else {
1005  if (bblob->cblob() != nullptr)
1006  cblob_it.add_after_then_move (bblob->cblob ());
1007  prev_x = blob_box.right ();
1008  }
1009  box_it.forward (); //next one
1010  bblob = box_it.data ();
1011  blob_box = bblob->bounding_box ();
1012 
1013  if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) {
1014  /* Real Blob - not multiple outlines or pre-chopped */
1015  prev_gap = current_gap;
1016  prev_within_xht_gap = current_within_xht_gap;
1017  prev_blob_box = next_blob_box;
1018  current_gap = next_gap;
1019  current_within_xht_gap = next_within_xht_gap;
1020  peek_at_next_gap(row,
1021  box_it,
1022  next_blob_box,
1023  next_gap,
1024  next_within_xht_gap);
1025 
1026  int16_t prev_gap_arg = prev_gap;
1027  int16_t next_gap_arg = next_gap;
1028  if (tosp_only_use_xht_gaps) {
1029  prev_gap_arg = prev_within_xht_gap;
1030  next_gap_arg = next_within_xht_gap;
1031  }
1032  // Decide if a word-break should be inserted
1033  if (blob_box.left () > next_rep_char_word_right ||
1034  make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1035  current_gap, current_within_xht_gap,
1036  next_blob_box, next_gap_arg,
1037  blanks, fuzzy_sp, fuzzy_non,
1038  prev_gap_was_a_space,
1039  break_at_next_gap) ||
1040  box_it.at_first()) {
1041  /* Form a new word out of the blobs collected */
1042  word = new WERD (&cblobs, prev_blanks, nullptr);
1043  word_count++;
1044  word_it.add_after_then_move (word);
1045  if (bol) {
1046  word->set_flag (W_BOL, true);
1047  bol = false;
1048  }
1049  if (prev_fuzzy_sp)
1050  //probably space
1051  word->set_flag (W_FUZZY_SP, true);
1052  else if (prev_fuzzy_non)
1053  word->set_flag (W_FUZZY_NON, true);
1054  //probably not
1055 
1056  if (blob_box.left () > next_rep_char_word_right) {
1057  /* We need to insert a repeated char word */
1058  word = rep_char_it.extract ();
1059  word_it.add_after_then_move (word);
1060 
1061  /* Set spaces before repeated char word */
1062  repetition_spacing = find_mean_blob_spacing (word);
1063  current_gap = word->bounding_box ().left () - prev_x;
1064  current_within_xht_gap = current_gap;
1065  if (current_gap > tosp_rep_space * repetition_spacing) {
1066  blanks =
1067  (uint8_t) floor (current_gap / row->space_size);
1068  if (blanks < 1)
1069  blanks = 1;
1070  }
1071  else
1072  blanks = 0;
1073  if (tosp_debug_level > 5)
1074  tprintf
1075  ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1076  word->bounding_box ().left (),
1077  word->bounding_box ().bottom (),
1078  repetition_spacing, current_gap, blanks);
1079  word->set_blanks (blanks);
1080  //NO uncertainty
1081  word->set_flag (W_FUZZY_SP, false);
1082  word->set_flag (W_FUZZY_NON, false);
1083 
1084  /* Set spaces after repeated char word (and leave current word set) */
1085  current_gap =
1086  blob_box.left () - next_rep_char_word_right;
1087  if (current_gap > tosp_rep_space * repetition_spacing) {
1088  blanks = (uint8_t) (current_gap / row->space_size);
1089  if (blanks < 1)
1090  blanks = 1;
1091  }
1092  else
1093  blanks = 0;
1094  if (tosp_debug_level > 5)
1095  tprintf (" Rgap:%d (%d blanks)\n",
1096  current_gap, blanks);
1097  fuzzy_sp = FALSE;
1098  fuzzy_non = FALSE;
1099 
1100  if (rep_char_it.empty ()) {
1101  next_rep_char_word_right = INT32_MAX;
1102  }
1103  else {
1104  rep_char_it.forward ();
1105  next_rep_char_word_right =
1106  rep_char_it.data ()->bounding_box ().right ();
1107  }
1108  }
1109 
1110  if (box_it.at_first () && rep_char_it.empty ()) {
1111  //at end of line
1112  word->set_flag (W_EOL, true);
1113  xstarts[1] = prev_x;
1114  }
1115  else {
1116  prev_blanks = blanks;
1117  prev_fuzzy_sp = fuzzy_sp;
1118  prev_fuzzy_non = fuzzy_non;
1119  }
1120  }
1121  }
1122  }
1123  while (!box_it.at_first ()); //until back at start
1124 
1125  /* Insert any further repeated char words */
1126  while (!rep_char_it.empty ()) {
1127  word = rep_char_it.extract ();
1128  word_it.add_after_then_move (word);
1129 
1130  /* Set spaces before repeated char word */
1131  repetition_spacing = find_mean_blob_spacing (word);
1132  current_gap = word->bounding_box ().left () - prev_x;
1133  if (current_gap > tosp_rep_space * repetition_spacing) {
1134  blanks = (uint8_t) floor (current_gap / row->space_size);
1135  if (blanks < 1)
1136  blanks = 1;
1137  }
1138  else
1139  blanks = 0;
1140  if (tosp_debug_level > 5)
1141  tprintf(
1142  "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1143  word->bounding_box().left(), word->bounding_box().bottom(),
1144  repetition_spacing, current_gap, blanks);
1145  word->set_blanks (blanks);
1146  //NO uncertainty
1147  word->set_flag (W_FUZZY_SP, false);
1148  word->set_flag (W_FUZZY_NON, false);
1149  prev_x = word->bounding_box ().right ();
1150  if (rep_char_it.empty ()) {
1151  //at end of line
1152  word->set_flag (W_EOL, true);
1153  xstarts[1] = prev_x;
1154  }
1155  else {
1156  rep_char_it.forward ();
1157  }
1158  }
1159  real_row = new ROW (row,
1160  (int16_t) row->kern_size, (int16_t) row->space_size);
1161  word_it.set_to_list (real_row->word_list ());
1162  //put words in row
1163  word_it.add_list_after (&words);
1164  real_row->recalc_bounding_box ();
1165 
1166  if (tosp_debug_level > 4) {
1167  tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1168  word_count,
1169  real_row->bounding_box ().left (),
1170  real_row->bounding_box ().bottom (),
1171  real_row->bounding_box ().right (),
1172  real_row->bounding_box ().top ());
1173  }
1174  return real_row;
1175  }
1176  return nullptr;
1177 }
double tosp_rep_space
Definition: textord.h:341
TBOX bounding_box() const
Definition: werd.cpp:159
WERD_LIST rep_words
Definition: blobbox.h:681
Definition: rect.h:34
WERD_LIST * word_list()
Definition: ocrrow.h:55
Definition: werd.h:35
float space_size
Definition: blobbox.h:680
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:127
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
#define FALSE
Definition: capi.h:52
float kern_size
Definition: blobbox.h:679
bool joined_to_prev() const
Definition: blobbox.h:257
void set_blanks(uint8_t new_blanks)
Definition: werd.h:105
bool tosp_only_use_xht_gaps
Definition: textord.h:295
Definition: werd.h:59
TBOX bounding_box() const
Definition: ocrrow.h:88
Definition: ocrrow.h:36
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: werd.h:34
void recalc_bounding_box()
Definition: ocrrow.cpp:101
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
C_BLOB * cblob() const
Definition: blobbox.h:269
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ make_spline_rows()

void tesseract::Textord::make_spline_rows ( TO_BLOCK block,
float  gradient,
bool  testing_on 
)

Definition at line 2005 of file makerow.cpp.

2007  {
2008 #ifndef GRAPHICS_DISABLED
2009  ScrollView::Color colour; //of row
2010 #endif
2011  TO_ROW_IT row_it = block->get_rows ();
2012 
2013  row_it.move_to_first ();
2014  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2015  if (row_it.data ()->blob_list ()->empty ())
2016  delete row_it.extract (); //nothing in it
2017  else
2018  make_baseline_spline (row_it.data (), block);
2019  }
2020  if (textord_old_baselines) {
2021 #ifndef GRAPHICS_DISABLED
2022  if (testing_on) {
2023  colour = ScrollView::RED;
2024  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
2025  row_it.forward ()) {
2026  row_it.data ()->baseline.plot (to_win, colour);
2027  colour = (ScrollView::Color) (colour + 1);
2028  if (colour > ScrollView::MAGENTA)
2029  colour = ScrollView::RED;
2030  }
2031  }
2032 #endif
2033  make_old_baselines(block, testing_on, gradient);
2034  }
2035 #ifndef GRAPHICS_DISABLED
2036  if (testing_on) {
2037  colour = ScrollView::RED;
2038  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2039  row_it.data ()->baseline.plot (to_win, colour);
2040  colour = (ScrollView::Color) (colour + 1);
2041  if (colour > ScrollView::MAGENTA)
2042  colour = ScrollView::RED;
2043  }
2044  }
2045 #endif
2046 }
bool textord_old_baselines
Definition: makerow.cpp:52
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2058
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37

◆ set_use_cjk_fp_model()

void tesseract::Textord::set_use_cjk_fp_model ( bool  flag)
inline

Definition at line 95 of file textord.h.

95  {
96  use_cjk_fp_model_ = flag;
97  }

◆ TextordPage()

void tesseract::Textord::TextordPage ( PageSegMode  pageseg_mode,
const FCOORD reskew,
int  width,
int  height,
Pix *  binary_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
bool  use_box_bottoms,
BLOBNBOX_LIST *  diacritic_blobs,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 230 of file textord.cpp.

234  {
235  page_tr_.set_x(width);
236  page_tr_.set_y(height);
237  if (to_blocks->empty()) {
238  // AutoPageSeg was not used, so we need to find_components first.
239  find_components(binary_pix, blocks, to_blocks);
240  TO_BLOCK_IT it(to_blocks);
241  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
242  TO_BLOCK* to_block = it.data();
243  // Compute the edge offsets whether or not there is a grey_pix.
244  // We have by-passed auto page seg, so we have to run it here.
245  // By page segmentation mode there is no non-text to avoid running on.
246  to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
247  }
248  } else if (!PSM_SPARSE(pageseg_mode)) {
249  // AutoPageSeg does not need to find_components as it did that already.
250  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
251  filter_blobs(page_tr_, to_blocks, true);
252  }
253 
254  ASSERT_HOST(!to_blocks->empty());
255  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
256  const FCOORD anticlockwise90(0.0f, 1.0f);
257  const FCOORD clockwise90(0.0f, -1.0f);
258  TO_BLOCK_IT it(to_blocks);
259  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
260  TO_BLOCK* to_block = it.data();
261  BLOCK* block = to_block->block;
262  // Create a fake poly_block in block from its bounding box.
263  block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(),
265  // Rotate the to_block along with its contained block and blobnbox lists.
266  to_block->rotate(anticlockwise90);
267  // Set the block's rotation values to obey the convention followed in
268  // layout analysis for vertical text.
269  block->set_re_rotation(clockwise90);
270  block->set_classify_rotation(clockwise90);
271  }
272  }
273 
274  TO_BLOCK_IT to_block_it(to_blocks);
275  TO_BLOCK* to_block = to_block_it.data();
276  // Make the rows in the block.
277  float gradient;
278  // Do it the old fashioned way.
279  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
280  gradient = make_rows(page_tr_, to_blocks);
281  } else if (!PSM_SPARSE(pageseg_mode)) {
282  // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
283  gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
284  to_block, to_blocks);
285  } else {
286  gradient = 0.0f;
287  }
288  BaselineDetect baseline_detector(textord_baseline_debug,
289  reskew, to_blocks);
290  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
291  baseline_detector.ComputeBaselineSplinesAndXheights(
292  page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
294  // Now make the words in the lines.
295  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
296  // SINGLE_LINE uses the old word maker on the single line.
297  make_words(this, page_tr_, gradient, blocks, to_blocks);
298  } else {
299  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
300  // single word, and in SINGLE_CHAR mode, all the outlines
301  // go in a single blob.
302  TO_BLOCK* to_block = to_block_it.data();
303  make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
304  to_block->get_rows(), to_block->block->row_list());
305  }
306  // Remove empties.
307  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
308  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
309  // Compute the margins for each row in the block, to be used later for
310  // paragraph detection.
311  BLOCK_IT b_it(blocks);
312  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
313  b_it.data()->compute_row_margins();
314  }
315 #ifndef GRAPHICS_DISABLED
316  close_to_win();
317 #endif
318 }
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:58
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:200
void set_x(int16_t xin)
rewrite function
Definition: points.h:62
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:118
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:219
void rotate(const FCOORD &rotation)
Definition: blobbox.h:723
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:139
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:201
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:164
int textord_baseline_debug
Definition: textord.h:377
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:100
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:56
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:145
bool textord_show_final_rows
Definition: makerow.cpp:47
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:209
bool textord_heavy_nr
Definition: makerow.cpp:43
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
Definition: blobbox.cpp:1056
Definition: ocrblock.h:30
BLOCK * block
Definition: blobbox.h:790
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
Definition: tordmain.cpp:250
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:60
Definition: points.h:189
void close_to_win()
Definition: drawtord.cpp:55
void set_y(int16_t yin)
rewrite function
Definition: points.h:66
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:206
PDBLK pdblk
Definition: ocrblock.h:192
Treat the image as a single character.
Definition: publictypes.h:176
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ to_spacing()

void tesseract::Textord::to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)

Definition at line 44 of file tospace.cpp.

47  {
48  TO_BLOCK_IT block_it; //iterator
49  TO_BLOCK *block; //current block;
50  TO_ROW *row; //current row
51  int block_index; //block number
52  int row_index; //row number
53  //estimated width of real spaces for whole block
54  int16_t block_space_gap_width;
55  //estimated width of non space gaps for whole block
56  int16_t block_non_space_gap_width;
57  bool old_text_ord_proportional;//old fixed/prop result
58 
59  block_it.set_to_list (blocks);
60  block_index = 1;
61  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
62  block_it.forward ()) {
63  block = block_it.data ();
64  std::unique_ptr<GAPMAP> gapmap(new GAPMAP (block)); //map of big vert gaps in blk
65  block_spacing_stats(block,
66  gapmap.get(),
67  old_text_ord_proportional,
68  block_space_gap_width,
69  block_non_space_gap_width);
70  // Make sure relative values of block-level space and non-space gap
71  // widths are reasonable. The ratio of 1:3 is also used in
72  // block_spacing_stats, to corrrect the block_space_gap_width
73  // Useful for arabic and hindi, when the non-space gap width is
74  // often over-estimated and should not be trusted. A similar ratio
75  // is found in block_spacing_stats.
77  (float) block_space_gap_width / block_non_space_gap_width < 3.0) {
78  block_non_space_gap_width = (int16_t) floor (block_space_gap_width / 3.0);
79  }
80  // row iterator
81  TO_ROW_IT row_it(block->get_rows());
82  row_index = 1;
83  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
84  row = row_it.data ();
85  if ((row->pitch_decision == PITCH_DEF_PROP) ||
86  (row->pitch_decision == PITCH_CORR_PROP)) {
87  if ((tosp_debug_level > 0) && !old_text_ord_proportional)
88  tprintf ("Block %d Row %d: Now Proportional\n",
89  block_index, row_index);
90  row_spacing_stats(row,
91  gapmap.get(),
92  block_index,
93  row_index,
94  block_space_gap_width,
95  block_non_space_gap_width);
96  }
97  else {
98  if ((tosp_debug_level > 0) && old_text_ord_proportional)
99  tprintf
100  ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
101  block_index, row_index, row->pitch_decision,
102  row->fixed_pitch);
103  }
104 #ifndef GRAPHICS_DISABLED
106  plot_word_decisions (to_win, (int16_t) row->fixed_pitch, row);
107 #endif
108  row_index++;
109  }
110  block_index++;
111  }
112 }
Definition: gap_map.h:16
float fixed_pitch
Definition: blobbox.h:664
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:249
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
bool tosp_old_to_method
Definition: textord.h:263
PITCH_TYPE pitch_decision
Definition: blobbox.h:663
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:266

◆ use_cjk_fp_model()

bool tesseract::Textord::use_cjk_fp_model ( ) const
inline

Definition at line 92 of file textord.h.

92  {
93  return use_cjk_fp_model_;
94  }

Member Data Documentation

◆ textord_baseline_debug

int tesseract::Textord::textord_baseline_debug = 0

"Baseline debug level"

Definition at line 377 of file textord.h.

◆ textord_blob_size_bigile

double tesseract::Textord::textord_blob_size_bigile = 95

"Percentile for large blobs"

Definition at line 378 of file textord.h.

◆ textord_blob_size_smallile

double tesseract::Textord::textord_blob_size_smallile = 20

"Percentile for small blobs"

Definition at line 381 of file textord.h.

◆ textord_blshift_maxshift

double tesseract::Textord::textord_blshift_maxshift = 0.00

"Max baseline shift"

Definition at line 398 of file textord.h.

◆ textord_blshift_xfraction

double tesseract::Textord::textord_blshift_xfraction = 9.99

"Min size of baseline shift"

Definition at line 399 of file textord.h.

◆ textord_initialasc_ile

double tesseract::Textord::textord_initialasc_ile = 0.90

"Ile of sizes for xheight guess"

Definition at line 383 of file textord.h.

◆ textord_initialx_ile

double tesseract::Textord::textord_initialx_ile = 0.75

"Ile of sizes for xheight guess"

Definition at line 382 of file textord.h.

◆ textord_max_noise_size

int tesseract::Textord::textord_max_noise_size = 7

"Pixel size of noise"

Definition at line 376 of file textord.h.

◆ textord_no_rejects

bool tesseract::Textord::textord_no_rejects = false

"Don't remove noise blobs"

Definition at line 373 of file textord.h.

◆ textord_noise_area_ratio

double tesseract::Textord::textord_noise_area_ratio = 0.7

"Fraction of bounding box for noise"

Definition at line 380 of file textord.h.

◆ textord_noise_debug

bool tesseract::Textord::textord_noise_debug = FALSE

"Debug row garbage detector"

Definition at line 397 of file textord.h.

◆ textord_noise_hfract

double tesseract::Textord::textord_noise_hfract = 1.0/64

"Height fraction to discard outlines as speckle noise"

Definition at line 394 of file textord.h.

◆ textord_noise_normratio

double tesseract::Textord::textord_noise_normratio = 2.0

"Dot to norm ratio for deletion"

Definition at line 387 of file textord.h.

◆ textord_noise_rejrows

bool tesseract::Textord::textord_noise_rejrows = true

"Reject noise-like rows"

Definition at line 389 of file textord.h.

◆ textord_noise_rejwords

bool tesseract::Textord::textord_noise_rejwords = true

"Reject noise-like words"

Definition at line 388 of file textord.h.

◆ textord_noise_rowratio

double tesseract::Textord::textord_noise_rowratio = 6.0

"Dot to norm ratio for deletion"

Definition at line 396 of file textord.h.

◆ textord_noise_sizefraction

int tesseract::Textord::textord_noise_sizefraction = 10

"Fraction of size for maxima"

Definition at line 384 of file textord.h.

◆ textord_noise_sizelimit

double tesseract::Textord::textord_noise_sizelimit = 0.5

"Fraction of x for big t count"

Definition at line 385 of file textord.h.

◆ textord_noise_sncount

int tesseract::Textord::textord_noise_sncount = 1

"super norm blobs to save row"

Definition at line 395 of file textord.h.

◆ textord_noise_sxfract

double tesseract::Textord::textord_noise_sxfract = 0.4

"xh fract width error for norm blobs"

Definition at line 392 of file textord.h.

◆ textord_noise_syfract

double tesseract::Textord::textord_noise_syfract = 0.2

"xh fract error for norm blobs"

Definition at line 390 of file textord.h.

◆ textord_noise_translimit

int tesseract::Textord::textord_noise_translimit = 16

"Transitions for normal blob"

Definition at line 386 of file textord.h.

◆ textord_show_blobs

bool tesseract::Textord::textord_show_blobs = false

"Display unsorted blobs"

Definition at line 374 of file textord.h.

◆ textord_show_boxes

bool tesseract::Textord::textord_show_boxes = false

"Display boxes"

Definition at line 375 of file textord.h.

◆ textord_single_height_mode

bool tesseract::Textord::textord_single_height_mode = false

"Script has no xheight, so use a single mode for horizontal text"

Definition at line 261 of file textord.h.

◆ tosp_all_flips_fuzzy

bool tesseract::Textord::tosp_all_flips_fuzzy = false

"Pass ANY flip to context?"

Definition at line 287 of file textord.h.

◆ tosp_block_use_cert_spaces

bool tesseract::Textord::tosp_block_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 277 of file textord.h.

◆ tosp_debug_level

int tesseract::Textord::tosp_debug_level = 0

"Debug data"

Definition at line 302 of file textord.h.

◆ tosp_dont_fool_with_small_kerns

double tesseract::Textord::tosp_dont_fool_with_small_kerns = -1

"Limit use of xht gap with odd small kns"

Definition at line 365 of file textord.h.

◆ tosp_enough_small_gaps

double tesseract::Textord::tosp_enough_small_gaps = 0.65

"Fract of kerns reqd for isolated row stats"

Definition at line 343 of file textord.h.

◆ tosp_enough_space_samples_for_median

int tesseract::Textord::tosp_enough_space_samples_for_median = 3

"or should we use mean"

Definition at line 304 of file textord.h.

◆ tosp_few_samples

int tesseract::Textord::tosp_few_samples = 40

"No.gaps reqd with 1 large gap to treat as a table"

Definition at line 308 of file textord.h.

◆ tosp_flip_caution

double tesseract::Textord::tosp_flip_caution = 0.0

"Don't autoflip kn to sp when large separation"

Definition at line 361 of file textord.h.

◆ tosp_flip_fuzz_kn_to_sp

bool tesseract::Textord::tosp_flip_fuzz_kn_to_sp = true

"Default flip"

Definition at line 298 of file textord.h.

◆ tosp_flip_fuzz_sp_to_kn

bool tesseract::Textord::tosp_flip_fuzz_sp_to_kn = true

"Default flip"

Definition at line 299 of file textord.h.

◆ tosp_force_wordbreak_on_punct

bool tesseract::Textord::tosp_force_wordbreak_on_punct = false

"Force word breaks on punct to break long lines in non-space " "delimited langs"

Definition at line 271 of file textord.h.

◆ tosp_fuzzy_kn_fraction

double tesseract::Textord::tosp_fuzzy_kn_fraction = 0.5

"New fuzzy kn alg"

Definition at line 350 of file textord.h.

◆ tosp_fuzzy_limit_all

bool tesseract::Textord::tosp_fuzzy_limit_all = true

"Don't restrict kn->sp fuzzy limit to tables"

Definition at line 289 of file textord.h.

◆ tosp_fuzzy_sp_fraction

double tesseract::Textord::tosp_fuzzy_sp_fraction = 0.5

"New fuzzy sp alg"

Definition at line 351 of file textord.h.

◆ tosp_fuzzy_space_factor

double tesseract::Textord::tosp_fuzzy_space_factor = 0.6

"Fract of xheight for fuzz sp"

Definition at line 327 of file textord.h.

◆ tosp_fuzzy_space_factor1

double tesseract::Textord::tosp_fuzzy_space_factor1 = 0.5

"Fract of xheight for fuzz sp"

Definition at line 329 of file textord.h.

◆ tosp_fuzzy_space_factor2

double tesseract::Textord::tosp_fuzzy_space_factor2 = 0.72

"Fract of xheight for fuzz sp"

Definition at line 331 of file textord.h.

◆ tosp_gap_factor

double tesseract::Textord::tosp_gap_factor = 0.83

"gap ratio to flip sp->kern"

Definition at line 332 of file textord.h.

◆ tosp_ignore_big_gaps

double tesseract::Textord::tosp_ignore_big_gaps = -1

"xht multiplier"

Definition at line 339 of file textord.h.

◆ tosp_ignore_very_big_gaps

double tesseract::Textord::tosp_ignore_very_big_gaps = 3.5

"xht multiplier"

Definition at line 340 of file textord.h.

◆ tosp_improve_thresh

bool tesseract::Textord::tosp_improve_thresh = false

"Enable improvement heuristic"

Definition at line 301 of file textord.h.

◆ tosp_init_guess_kn_mult

double tesseract::Textord::tosp_init_guess_kn_mult = 2.2

"Thresh guess - mult kn by this"

Definition at line 355 of file textord.h.

◆ tosp_init_guess_xht_mult

double tesseract::Textord::tosp_init_guess_xht_mult = 0.28

"Thresh guess - mult xht by this"

Definition at line 357 of file textord.h.

◆ tosp_kern_gap_factor1

double tesseract::Textord::tosp_kern_gap_factor1 = 2.0

"gap ratio to flip kern->sp"

Definition at line 334 of file textord.h.

◆ tosp_kern_gap_factor2

double tesseract::Textord::tosp_kern_gap_factor2 = 1.3

"gap ratio to flip kern->sp"

Definition at line 336 of file textord.h.

◆ tosp_kern_gap_factor3

double tesseract::Textord::tosp_kern_gap_factor3 = 2.5

"gap ratio to flip kern->sp"

Definition at line 338 of file textord.h.

◆ tosp_large_kerning

double tesseract::Textord::tosp_large_kerning = 0.19

"Limit use of xht gap with large kns"

Definition at line 363 of file textord.h.

◆ tosp_max_sane_kn_thresh

double tesseract::Textord::tosp_max_sane_kn_thresh = 5.0

"Multiplier on kn to limit thresh"

Definition at line 359 of file textord.h.

◆ tosp_min_sane_kn_sp

double tesseract::Textord::tosp_min_sane_kn_sp = 1.5

"Don't trust spaces less than this time kn"

Definition at line 353 of file textord.h.

◆ tosp_narrow_aspect_ratio

double tesseract::Textord::tosp_narrow_aspect_ratio = 0.48

"narrow if w/h less than this"

Definition at line 322 of file textord.h.

◆ tosp_narrow_blobs_not_cert

bool tesseract::Textord::tosp_narrow_blobs_not_cert = true

"Only stat OBVIOUS spaces"

Definition at line 281 of file textord.h.

◆ tosp_narrow_fraction

double tesseract::Textord::tosp_narrow_fraction = 0.3

"Fract of xheight for narrow"

Definition at line 320 of file textord.h.

◆ tosp_near_lh_edge

double tesseract::Textord::tosp_near_lh_edge = 0

"Don't reduce box if the top left is non blank"

Definition at line 367 of file textord.h.

◆ tosp_old_sp_kn_th_factor

double tesseract::Textord::tosp_old_sp_kn_th_factor = 2.0

"Factor for defining space threshold in terms of space and " "kern sizes"

Definition at line 314 of file textord.h.

◆ tosp_old_to_bug_fix

bool tesseract::Textord::tosp_old_to_bug_fix = false

"Fix suspected bug in old code"

Definition at line 275 of file textord.h.

◆ tosp_old_to_constrain_sp_kn

bool tesseract::Textord::tosp_old_to_constrain_sp_kn = false

"Constrain relative values of inter and intra-word gaps for " "old_to_method."

Definition at line 266 of file textord.h.

◆ tosp_old_to_method

bool tesseract::Textord::tosp_old_to_method = false

"Space stats use prechopping?"

Definition at line 263 of file textord.h.

◆ tosp_only_small_gaps_for_kern

bool tesseract::Textord::tosp_only_small_gaps_for_kern = false

"Better guess"

Definition at line 286 of file textord.h.

◆ tosp_only_use_prop_rows

bool tesseract::Textord::tosp_only_use_prop_rows = true

"Block stats to use fixed pitch rows?"

Definition at line 268 of file textord.h.

◆ tosp_only_use_xht_gaps

bool tesseract::Textord::tosp_only_use_xht_gaps = false

"Only use within xht gap for wd breaks"

Definition at line 295 of file textord.h.

◆ tosp_pass_wide_fuzz_sp_to_context

double tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context = 0.75

"How wide fuzzies need context"

Definition at line 371 of file textord.h.

◆ tosp_recovery_isolated_row_stats

bool tesseract::Textord::tosp_recovery_isolated_row_stats = true

"Use row alone when inadequate cert spaces"

Definition at line 285 of file textord.h.

◆ tosp_redo_kern_limit

int tesseract::Textord::tosp_redo_kern_limit = 10

"No.samples reqd to reestimate for row"

Definition at line 306 of file textord.h.

◆ tosp_rep_space

double tesseract::Textord::tosp_rep_space = 1.6

"rep gap multiplier for space"

Definition at line 341 of file textord.h.

◆ tosp_row_use_cert_spaces

bool tesseract::Textord::tosp_row_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 279 of file textord.h.

◆ tosp_row_use_cert_spaces1

bool tesseract::Textord::tosp_row_use_cert_spaces1 = true

"Only stat OBVIOUS spaces"

Definition at line 283 of file textord.h.

◆ tosp_rule_9_test_punct

bool tesseract::Textord::tosp_rule_9_test_punct = false

"Don't chng kn to space next to punct"

Definition at line 297 of file textord.h.

◆ tosp_sanity_method

int tesseract::Textord::tosp_sanity_method = 1

"How to avoid being silly"

Definition at line 311 of file textord.h.

◆ tosp_short_row

int tesseract::Textord::tosp_short_row = 20

"No.gaps reqd with few cert spaces to use certs"

Definition at line 310 of file textord.h.

◆ tosp_silly_kn_sp_gap

double tesseract::Textord::tosp_silly_kn_sp_gap = 0.2

"Don't let sp minus kn get too small"

Definition at line 369 of file textord.h.

◆ tosp_stats_use_xht_gaps

bool tesseract::Textord::tosp_stats_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 291 of file textord.h.

◆ tosp_table_fuzzy_kn_sp_ratio

double tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio = 3.0

"Fuzzy if less than this"

Definition at line 349 of file textord.h.

◆ tosp_table_kn_sp_ratio

double tesseract::Textord::tosp_table_kn_sp_ratio = 2.25

"Min difference of kn & sp in table"

Definition at line 345 of file textord.h.

◆ tosp_table_xht_sp_ratio

double tesseract::Textord::tosp_table_xht_sp_ratio = 0.33

"Expect spaces bigger than this"

Definition at line 347 of file textord.h.

◆ tosp_threshold_bias1

double tesseract::Textord::tosp_threshold_bias1 = 0

"how far between kern and space?"

Definition at line 316 of file textord.h.

◆ tosp_threshold_bias2

double tesseract::Textord::tosp_threshold_bias2 = 0

"how far between kern and space?"

Definition at line 318 of file textord.h.

◆ tosp_use_pre_chopping

bool tesseract::Textord::tosp_use_pre_chopping = false

"Space stats use prechopping?"

Definition at line 273 of file textord.h.

◆ tosp_use_xht_gaps

bool tesseract::Textord::tosp_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 293 of file textord.h.

◆ tosp_wide_aspect_ratio

double tesseract::Textord::tosp_wide_aspect_ratio = 0.0

"wide if w/h less than this"

Definition at line 325 of file textord.h.

◆ tosp_wide_fraction

double tesseract::Textord::tosp_wide_fraction = 0.52

"Fract of xheight for wide"

Definition at line 323 of file textord.h.


The documentation for this class was generated from the following files: