tesseract  5.0.0-alpha-619-ge9db
tesseract::Textord Class Reference

#include <textord.h>

Public Member Functions

 Textord (CCStruct *ccstruct)
 
 ~Textord ()=default
 
void TextordPage (PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res)
 
bool use_cjk_fp_model () const
 
void set_use_cjk_fp_model (bool flag)
 
void to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
 
ROWmake_prop_words (TO_ROW *row, FCOORD rotation)
 
ROWmake_blob_words (TO_ROW *row, FCOORD rotation)
 
void find_components (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
 
void compute_block_xheight (TO_BLOCK *block, float gradient)
 
void make_spline_rows (TO_BLOCK *block, float gradient, bool testing_on)
 
compute_row_xheight

Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time. Set xheigh_evidence to the number of blobs with the chosen xheight that appear in this row.

void compute_row_xheight (TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
 

Public Attributes

bool textord_single_height_mode = false
 
bool tosp_old_to_method = false
 
bool tosp_old_to_constrain_sp_kn = false
 
bool tosp_only_use_prop_rows = true
 
bool tosp_force_wordbreak_on_punct = false
 
bool tosp_use_pre_chopping = false
 
bool tosp_old_to_bug_fix = false
 
bool tosp_block_use_cert_spaces = true
 
bool tosp_row_use_cert_spaces = true
 
bool tosp_narrow_blobs_not_cert = true
 
bool tosp_row_use_cert_spaces1 = true
 
bool tosp_recovery_isolated_row_stats = true
 
bool tosp_only_small_gaps_for_kern = false
 
bool tosp_all_flips_fuzzy = false
 
bool tosp_fuzzy_limit_all = true
 
bool tosp_stats_use_xht_gaps = true
 
bool tosp_use_xht_gaps = true
 
bool tosp_only_use_xht_gaps = false
 
bool tosp_rule_9_test_punct = false
 
bool tosp_flip_fuzz_kn_to_sp = true
 
bool tosp_flip_fuzz_sp_to_kn = true
 
bool tosp_improve_thresh = false
 
int tosp_debug_level = 0
 
int tosp_enough_space_samples_for_median = 3
 
int tosp_redo_kern_limit = 10
 
int tosp_few_samples = 40
 
int tosp_short_row = 20
 
int tosp_sanity_method = 1
 
double tosp_old_sp_kn_th_factor = 2.0
 
double tosp_threshold_bias1 = 0
 
double tosp_threshold_bias2 = 0
 
double tosp_narrow_fraction = 0.3
 
double tosp_narrow_aspect_ratio = 0.48
 
double tosp_wide_fraction = 0.52
 
double tosp_wide_aspect_ratio = 0.0
 
double tosp_fuzzy_space_factor = 0.6
 
double tosp_fuzzy_space_factor1 = 0.5
 
double tosp_fuzzy_space_factor2 = 0.72
 
double tosp_gap_factor = 0.83
 
double tosp_kern_gap_factor1 = 2.0
 
double tosp_kern_gap_factor2 = 1.3
 
double tosp_kern_gap_factor3 = 2.5
 
double tosp_ignore_big_gaps = -1
 
double tosp_ignore_very_big_gaps = 3.5
 
double tosp_rep_space = 1.6
 
double tosp_enough_small_gaps = 0.65
 
double tosp_table_kn_sp_ratio = 2.25
 
double tosp_table_xht_sp_ratio = 0.33
 
double tosp_table_fuzzy_kn_sp_ratio = 3.0
 
double tosp_fuzzy_kn_fraction = 0.5
 
double tosp_fuzzy_sp_fraction = 0.5
 
double tosp_min_sane_kn_sp = 1.5
 
double tosp_init_guess_kn_mult = 2.2
 
double tosp_init_guess_xht_mult = 0.28
 
double tosp_max_sane_kn_thresh = 5.0
 
double tosp_flip_caution = 0.0
 
double tosp_large_kerning = 0.19
 
double tosp_dont_fool_with_small_kerns = -1
 
double tosp_near_lh_edge = 0
 
double tosp_silly_kn_sp_gap = 0.2
 
double tosp_pass_wide_fuzz_sp_to_context = 0.75
 
bool textord_no_rejects = false
 
bool textord_show_blobs = false
 
bool textord_show_boxes = false
 
int textord_max_noise_size = 7
 
int textord_baseline_debug = 0
 
double textord_noise_area_ratio = 0.7
 
double textord_initialx_ile = 0.75
 
double textord_initialasc_ile = 0.90
 
int textord_noise_sizefraction = 10
 
double textord_noise_sizelimit = 0.5
 
int textord_noise_translimit = 16
 
double textord_noise_normratio = 2.0
 
bool textord_noise_rejwords = true
 
bool textord_noise_rejrows = true
 
double textord_noise_syfract = 0.2
 
double textord_noise_sxfract = 0.4
 
double textord_noise_hfract = 1.0/64
 
int textord_noise_sncount = 1
 
double textord_noise_rowratio = 6.0
 
bool textord_noise_debug = false
 
double textord_blshift_maxshift = 0.00
 
double textord_blshift_xfraction = 9.99
 

Detailed Description

Definition at line 68 of file textord.h.

Constructor & Destructor Documentation

◆ Textord()

tesseract::Textord::Textord ( CCStruct ccstruct)
explicit

Definition at line 35 of file textord.cpp.

36  : ccstruct_(ccstruct),
37  use_cjk_fp_model_(false),
38  // makerow.cpp ///////////////////////////////////////////
40  "Script has no xheight, so use a single mode",
41  ccstruct_->params()),
42  // tospace.cpp ///////////////////////////////////////////
43  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
44  ccstruct_->params()),
46  "Constrain relative values of inter and intra-word gaps for "
47  "old_to_method.",
48  ccstruct_->params()),
50  "Block stats to use fixed pitch rows?", ccstruct_->params()),
52  "Force word breaks on punct to break long lines in non-space "
53  "delimited langs",
54  ccstruct_->params()),
55  BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
56  ccstruct_->params()),
57  BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
58  ccstruct_->params()),
59  BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
60  ccstruct_->params()),
61  BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
62  ccstruct_->params()),
63  BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
64  ccstruct_->params()),
65  BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
66  ccstruct_->params()),
68  "Use row alone when inadequate cert spaces",
69  ccstruct_->params()),
70  BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
71  ccstruct_->params()),
72  BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
73  ccstruct_->params()),
75  "Don't restrict kn->sp fuzzy limit to tables",
76  ccstruct_->params()),
78  "Use within xht gap for wd breaks", ccstruct_->params()),
79  BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
80  ccstruct_->params()),
82  "Only use within xht gap for wd breaks", ccstruct_->params()),
84  "Don't chng kn to space next to punct", ccstruct_->params()),
85  BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
86  ccstruct_->params()),
87  BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
88  ccstruct_->params()),
89  BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
90  ccstruct_->params()),
91  INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
93  "or should we use mean", ccstruct_->params()),
95  "No.samples reqd to reestimate for row", ccstruct_->params()),
97  "No.gaps reqd with 1 large gap to treat as a table",
98  ccstruct_->params()),
100  "No.gaps reqd with few cert spaces to use certs",
101  ccstruct_->params()),
102  INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
103  ccstruct_->params()),
105  "Factor for defining space threshold in terms of space and "
106  "kern sizes",
107  ccstruct_->params()),
108  double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
109  ccstruct_->params()),
110  double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
111  ccstruct_->params()),
112  double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
113  ccstruct_->params()),
115  "narrow if w/h less than this", ccstruct_->params()),
116  double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
117  ccstruct_->params()),
118  double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
119  ccstruct_->params()),
121  "Fract of xheight for fuzz sp", ccstruct_->params()),
123  "Fract of xheight for fuzz sp", ccstruct_->params()),
125  "Fract of xheight for fuzz sp", ccstruct_->params()),
126  double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
127  ccstruct_->params()),
128  double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
129  ccstruct_->params()),
130  double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
131  ccstruct_->params()),
132  double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
133  ccstruct_->params()),
134  double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
135  ccstruct_->params()),
136  double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
137  ccstruct_->params()),
138  double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
139  ccstruct_->params()),
141  "Fract of kerns reqd for isolated row stats",
142  ccstruct_->params()),
144  "Min difference of kn & sp in table", ccstruct_->params()),
146  "Expect spaces bigger than this", ccstruct_->params()),
148  "Fuzzy if less than this", ccstruct_->params()),
149  double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
150  ccstruct_->params()),
151  double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
152  ccstruct_->params()),
154  "Don't trust spaces less than this time kn",
155  ccstruct_->params()),
157  "Thresh guess - mult kn by this", ccstruct_->params()),
159  "Thresh guess - mult xht by this", ccstruct_->params()),
161  "Multiplier on kn to limit thresh", ccstruct_->params()),
163  "Don't autoflip kn to sp when large separation",
164  ccstruct_->params()),
166  "Limit use of xht gap with large kns", ccstruct_->params()),
168  "Limit use of xht gap with odd small kns",
169  ccstruct_->params()),
171  "Don't reduce box if the top left is non blank",
172  ccstruct_->params()),
174  "Don't let sp minus kn get too small", ccstruct_->params()),
176  "How wide fuzzies need context", ccstruct_->params()),
177  // tordmain.cpp ///////////////////////////////////////////
178  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
179  ccstruct_->params()),
180  BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
181  ccstruct_->params()),
182  BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
183  ccstruct_->params()),
184  INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
185  ccstruct_->params()),
186  INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
187  ccstruct_->params()),
189  "Fraction of bounding box for noise", ccstruct_->params()),
191  "Ile of sizes for xheight guess", ccstruct_->params()),
193  "Ile of sizes for xheight guess", ccstruct_->params()),
194  INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
195  ccstruct_->params()),
197  "Fraction of x for big t count", ccstruct_->params()),
198  INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
199  ccstruct_->params()),
201  "Dot to norm ratio for deletion", ccstruct_->params()),
202  BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
203  ccstruct_->params()),
204  BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
205  ccstruct_->params()),
207  "xh fract height error for norm blobs",
208  ccstruct_->params()),
210  "xh fract width error for norm blobs", ccstruct_->params()),
212  "Height fraction to discard outlines as speckle noise",
213  ccstruct_->params()),
214  INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
215  ccstruct_->params()),
217  "Dot to norm ratio for deletion", ccstruct_->params()),
218  BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
219  ccstruct_->params()),
220  double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
221  ccstruct_->params()),
223  "Min size of baseline shift", ccstruct_->params()) {}

◆ ~Textord()

tesseract::Textord::~Textord ( )
default

Member Function Documentation

◆ CleanupSingleRowResult()

void tesseract::Textord::CleanupSingleRowResult ( PageSegMode  pageseg_mode,
PAGE_RES page_res 
)

Definition at line 318 of file textord.cpp.

319  {
320  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
321  return; // No cleanup required.
322  PAGE_RES_IT it(page_res);
323  // Find the best row, being the greatest mean word conf.
324  float row_total_conf = 0.0f;
325  int row_word_count = 0;
326  ROW_RES* best_row = nullptr;
327  float best_conf = 0.0f;
328  for (it.restart_page(); it.word() != nullptr; it.forward()) {
329  WERD_RES* word = it.word();
330  row_total_conf += word->best_choice->certainty();
331  ++row_word_count;
332  if (it.next_row() != it.row()) {
333  row_total_conf /= row_word_count;
334  if (best_row == nullptr || best_conf < row_total_conf) {
335  best_row = it.row();
336  best_conf = row_total_conf;
337  }
338  row_total_conf = 0.0f;
339  row_word_count = 0;
340  }
341  }
342  // Now eliminate any word not in the best row.
343  for (it.restart_page(); it.word() != nullptr; it.forward()) {
344  if (it.row() != best_row)
345  it.DeleteCurrentWord();
346  }
347 }

◆ compute_block_xheight()

void tesseract::Textord::compute_block_xheight ( TO_BLOCK block,
float  gradient 
)

Definition at line 1254 of file makerow.cpp.

1254  {
1255  TO_ROW *row; // current row
1256  float asc_frac_xheight = CCStruct::kAscenderFraction /
1258  float desc_frac_xheight = CCStruct::kDescenderFraction /
1260  int32_t min_height, max_height; // limits on xheight
1261  TO_ROW_IT row_it = block->get_rows();
1262  if (row_it.empty()) return; // no rows
1263 
1264  // Compute the best guess of xheight of each row individually.
1265  // Use xheight and ascrise values of the rows where ascenders were found.
1266  get_min_max_xheight(block->line_size, &min_height, &max_height);
1267  STATS row_asc_xheights(min_height, max_height + 1);
1268  STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
1269  static_cast<int>(max_height * asc_frac_xheight) + 1);
1270  int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
1271  int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
1272  STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
1273  STATS row_desc_xheights(min_height, max_height + 1);
1274  STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
1275  STATS row_cap_xheights(min_height, max_height + 1);
1276  STATS row_cap_floating_xheights(min_height, max_height + 1);
1277  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1278  row = row_it.data();
1279  // Compute the xheight of this row if it has not been computed before.
1280  if (row->xheight <= 0.0) {
1282  gradient, block->line_size);
1283  }
1284  ROW_CATEGORY row_category = get_row_category(row);
1285  if (row_category == ROW_ASCENDERS_FOUND) {
1286  row_asc_xheights.add(static_cast<int32_t>(row->xheight),
1287  row->xheight_evidence);
1288  row_asc_ascrise.add(static_cast<int32_t>(row->ascrise),
1289  row->xheight_evidence);
1290  row_asc_descdrop.add(static_cast<int32_t>(-row->descdrop),
1291  row->xheight_evidence);
1292  } else if (row_category == ROW_DESCENDERS_FOUND) {
1293  row_desc_xheights.add(static_cast<int32_t>(row->xheight),
1294  row->xheight_evidence);
1295  row_desc_descdrop.add(static_cast<int32_t>(-row->descdrop),
1296  row->xheight_evidence);
1297  } else if (row_category == ROW_UNKNOWN) {
1298  fill_heights(row, gradient, min_height, max_height,
1299  &row_cap_xheights, &row_cap_floating_xheights);
1300  }
1301  }
1302 
1303  float xheight = 0.0;
1304  float ascrise = 0.0;
1305  float descdrop = 0.0;
1306  // Compute our best guess of xheight of this block.
1307  if (row_asc_xheights.get_total() > 0) {
1308  // Determine xheight from rows where ascenders were found.
1309  xheight = row_asc_xheights.median();
1310  ascrise = row_asc_ascrise.median();
1311  descdrop = -row_asc_descdrop.median();
1312  } else if (row_desc_xheights.get_total() > 0) {
1313  // Determine xheight from rows where descenders were found.
1314  xheight = row_desc_xheights.median();
1315  descdrop = -row_desc_descdrop.median();
1316  } else if (row_cap_xheights.get_total() > 0) {
1317  // All the rows in the block were (a/de)scenderless.
1318  // Try to search for two modes in row_cap_heights that could
1319  // be the xheight and the capheight (e.g. some of the rows
1320  // were lowercase, but did not have enough (a/de)scenders.
1321  // If such two modes can not be found, this block is most
1322  // likely all caps (or all small caps, in which case the code
1323  // still works as intended).
1324  compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights,
1326  block->block->classify_rotation().y() == 0.0,
1327  min_height, max_height, &(xheight), &(ascrise));
1328  if (ascrise == 0) { // assume only caps in the whole block
1329  xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio;
1330  }
1331  } else { // default block sizes
1332  xheight = block->line_size * CCStruct::kXHeightFraction;
1333  }
1334  // Correct xheight, ascrise and descdrop if necessary.
1335  bool corrected_xheight = false;
1336  if (xheight < textord_min_xheight) {
1337  xheight = static_cast<float>(textord_min_xheight);
1338  corrected_xheight = true;
1339  }
1340  if (corrected_xheight || ascrise <= 0.0) {
1341  ascrise = xheight * asc_frac_xheight;
1342  }
1343  if (corrected_xheight || descdrop >= 0.0) {
1344  descdrop = -(xheight * desc_frac_xheight);
1345  }
1346  block->xheight = xheight;
1347 
1348  if (textord_debug_xheights) {
1349  tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n",
1350  xheight, ascrise, descdrop);
1351  }
1352  // Correct xheight, ascrise, descdrop of rows based on block averages.
1353  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1354  correct_row_xheight(row_it.data(), xheight, ascrise, descdrop);
1355  }
1356 }

◆ compute_row_xheight()

void tesseract::Textord::compute_row_xheight ( TO_ROW row,
const FCOORD rotation,
float  gradient,
int  block_line_size 
)

Definition at line 1366 of file makerow.cpp.

1369  {
1370  // Find blobs representing repeated characters in rows and mark them.
1371  // This information is used for computing row xheight and at a later
1372  // stage when words are formed by make_words.
1373  if (!row->rep_chars_marked()) {
1374  mark_repeated_chars(row);
1375  }
1376 
1377  int min_height, max_height;
1378  get_min_max_xheight(block_line_size, &min_height, &max_height);
1379  STATS heights(min_height, max_height + 1);
1380  STATS floating_heights(min_height, max_height + 1);
1381  fill_heights(row, gradient, min_height, max_height,
1382  &heights, &floating_heights);
1383  row->ascrise = 0.0f;
1384  row->xheight = 0.0f;
1385  row->xheight_evidence =
1386  compute_xheight_from_modes(&heights, &floating_heights,
1388  rotation.y() == 0.0,
1389  min_height, max_height,
1390  &(row->xheight), &(row->ascrise));
1391  row->descdrop = 0.0f;
1392  if (row->xheight > 0.0) {
1393  row->descdrop = static_cast<float>(
1394  compute_row_descdrop(row, gradient, row->xheight_evidence, &heights));
1395  }
1396 }

◆ filter_blobs()

void tesseract::Textord::filter_blobs ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks,
bool  testing_on 
)

Definition at line 245 of file tordmain.cpp.

252  { // for plotting
253  TO_BLOCK_IT block_it = blocks; // destination iterator
254  TO_BLOCK *block; // created block
255 
256  #ifndef GRAPHICS_DISABLED
257  if (to_win != nullptr)
258  to_win->Clear();
259  #endif // GRAPHICS_DISABLED
260 
261  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
262  block_it.forward()) {
263  block = block_it.data();
264  block->line_size = filter_noise_blobs(&block->blobs,
265  &block->noise_blobs,
266  &block->small_blobs,
267  &block->large_blobs);
268  if (block->line_size == 0) block->line_size = 1;
269  block->line_spacing = block->line_size *
276 
277  #ifndef GRAPHICS_DISABLED
278  if (textord_show_blobs && testing_on) {
279  if (to_win == nullptr)
280  create_to_win(page_tr);
281  block->plot_graded_blobs(to_win);
282  }
283  if (textord_show_boxes && testing_on) {
284  if (to_win == nullptr)
285  create_to_win(page_tr);

◆ find_components()

void tesseract::Textord::find_components ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 215 of file tordmain.cpp.

220  {
221  int width = pixGetWidth(pix);
222  int height = pixGetHeight(pix);
223  if (width > INT16_MAX || height > INT16_MAX) {
224  tprintf("Input image too large! (%d, %d)\n", width, height);
225  return; // Can't handle it.
226  }
227 
229 
230  BLOCK_IT block_it(blocks); // iterator
231  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
232  block_it.forward()) {
233  BLOCK* block = block_it.data();
234  if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) {
235  extract_edges(pix, block);
236  }
237  }
238 

◆ make_blob_words()

ROW * tesseract::Textord::make_blob_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 1177 of file tospace.cpp.

1187  {
1188  bool bol; // start of line
1189  ROW *real_row; // output row
1190  C_OUTLINE_IT cout_it;
1191  C_BLOB_LIST cblobs;
1192  C_BLOB_IT cblob_it = &cblobs;
1193  WERD_LIST words;
1194  WERD *word; // new word
1195  BLOBNBOX *bblob; // current blob
1196  TBOX blob_box; // bounding box
1197  BLOBNBOX_IT box_it; // iterator
1198  int16_t word_count = 0;
1199 
1200  cblob_it.set_to_list(&cblobs);
1201  box_it.set_to_list(row->blob_list());
1202  // new words
1203  WERD_IT word_it(&words);
1204  bol = true;
1205  if (!box_it.empty()) {
1206 
1207  do {
1208  bblob = box_it.data();
1209  blob_box = bblob->bounding_box();
1210  if (bblob->joined_to_prev()) {
1211  if (bblob->cblob() != nullptr) {
1212  cout_it.set_to_list(cblob_it.data()->out_list());
1213  cout_it.move_to_last();
1214  cout_it.add_list_after(bblob->cblob()->out_list());
1215  delete bblob->cblob();
1216  }
1217  } else {
1218  if (bblob->cblob() != nullptr)
1219  cblob_it.add_after_then_move(bblob->cblob());
1220  }
1221  box_it.forward(); // next one
1222  bblob = box_it.data();
1223  blob_box = bblob->bounding_box();
1224 
1225  if (!bblob->joined_to_prev() && !cblobs.empty()) {
1226  word = new WERD(&cblobs, 1, nullptr);
1227  word_count++;
1228  word_it.add_after_then_move(word);
1229  if (bol) {
1230  word->set_flag(W_BOL, true);
1231  bol = false;
1232  }
1233  if (box_it.at_first()) { // at end of line
1234  word->set_flag(W_EOL, true);
1235  }
1236  }
1237  }
1238  while (!box_it.at_first()); // until back at start
1239  /* Setup the row with created words. */
1240  real_row = new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
1241  word_it.set_to_list(real_row->word_list());
1242  //put words in row
1243  word_it.add_list_after(&words);
1244  real_row->recalc_bounding_box();
1245  if (tosp_debug_level > 4) {
1246  tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1247  word_count,
1248  real_row->bounding_box().left(),
1249  real_row->bounding_box().bottom(),

◆ make_prop_words()

ROW * tesseract::Textord::make_prop_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 885 of file tospace.cpp.

894  {
895  bool bol; // start of line
896  /* prev_ values are for start of word being built. non prev_ values are for
897  the gap between the word being built and the next one. */
898  bool prev_fuzzy_sp; // probably space
899  bool prev_fuzzy_non; // probably not
900  uint8_t prev_blanks; // in front of word
901  bool fuzzy_sp = false; // probably space
902  bool fuzzy_non = false; // probably not
903  uint8_t blanks = 0; // in front of word
904  bool prev_gap_was_a_space = false;
905  bool break_at_next_gap = false;
906  ROW *real_row; // output row
907  C_OUTLINE_IT cout_it;
908  C_BLOB_LIST cblobs;
909  C_BLOB_IT cblob_it = &cblobs;
910  WERD_LIST words;
911  WERD *word; // new word
912  int32_t next_rep_char_word_right = INT32_MAX;
913  float repetition_spacing; // gap between repetitions
914  int32_t xstarts[2]; // row ends
915  int32_t prev_x; // end of prev blob
916  BLOBNBOX *bblob; // current blob
917  TBOX blob_box; // bounding box
918  BLOBNBOX_IT box_it; // iterator
919  TBOX prev_blob_box;
920  TBOX next_blob_box;
921  int16_t prev_gap = INT16_MAX;
922  int16_t current_gap = INT16_MAX;
923  int16_t next_gap = INT16_MAX;
924  int16_t prev_within_xht_gap = INT16_MAX;
925  int16_t current_within_xht_gap = INT16_MAX;
926  int16_t next_within_xht_gap = INT16_MAX;
927  int16_t word_count = 0;
928 
929  // repeated char words
930  WERD_IT rep_char_it(&(row->rep_words));
931  if (!rep_char_it.empty ()) {
932  next_rep_char_word_right =
933  rep_char_it.data ()->bounding_box ().right ();
934  }
935 
936  prev_x = -INT16_MAX;
937  cblob_it.set_to_list (&cblobs);
938  box_it.set_to_list (row->blob_list ());
939  // new words
940  WERD_IT word_it(&words);
941  bol = true;
942  prev_blanks = 0;
943  prev_fuzzy_sp = false;
944  prev_fuzzy_non = false;
945  if (!box_it.empty ()) {
946  xstarts[0] = box_it.data ()->bounding_box ().left ();
947  if (xstarts[0] > next_rep_char_word_right) {
948  /* We need to insert a repeated char word at the start of the row */
949  word = rep_char_it.extract ();
950  word_it.add_after_then_move (word);
951  /* Set spaces before repeated char word */
952  word->set_flag (W_BOL, true);
953  bol = false;
954  word->set_blanks (0);
955  //NO uncertainty
956  word->set_flag (W_FUZZY_SP, false);
957  word->set_flag (W_FUZZY_NON, false);
958  xstarts[0] = word->bounding_box ().left ();
959  /* Set spaces after repeated char word (and leave current word set) */
960  repetition_spacing = find_mean_blob_spacing (word);
961  current_gap = box_it.data ()->bounding_box ().left () -
962  next_rep_char_word_right;
963  current_within_xht_gap = current_gap;
964  if (current_gap > tosp_rep_space * repetition_spacing) {
965  prev_blanks = static_cast<uint8_t>(floor (current_gap / row->space_size));
966  if (prev_blanks < 1)
967  prev_blanks = 1;
968  }
969  else
970  prev_blanks = 0;
971  if (tosp_debug_level > 5)
972  tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
973  box_it.data ()->bounding_box ().left (),
974  box_it.data ()->bounding_box ().bottom (),
975  repetition_spacing, current_gap);
976  prev_fuzzy_sp = false;
977  prev_fuzzy_non = false;
978  if (rep_char_it.empty ()) {
979  next_rep_char_word_right = INT32_MAX;
980  }
981  else {
982  rep_char_it.forward ();
983  next_rep_char_word_right =
984  rep_char_it.data ()->bounding_box ().right ();
985  }
986  }
987 
988  peek_at_next_gap(row,
989  box_it,
990  next_blob_box,
991  next_gap,
992  next_within_xht_gap);
993  do {
994  bblob = box_it.data ();
995  blob_box = bblob->bounding_box ();
996  if (bblob->joined_to_prev ()) {
997  if (bblob->cblob () != nullptr) {
998  cout_it.set_to_list (cblob_it.data ()->out_list ());
999  cout_it.move_to_last ();
1000  cout_it.add_list_after (bblob->cblob ()->out_list ());
1001  delete bblob->cblob ();
1002  }
1003  } else {
1004  if (bblob->cblob() != nullptr)
1005  cblob_it.add_after_then_move (bblob->cblob ());
1006  prev_x = blob_box.right ();
1007  }
1008  box_it.forward (); //next one
1009  bblob = box_it.data ();
1010  blob_box = bblob->bounding_box ();
1011 
1012  if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) {
1013  /* Real Blob - not multiple outlines or pre-chopped */
1014  prev_gap = current_gap;
1015  prev_within_xht_gap = current_within_xht_gap;
1016  prev_blob_box = next_blob_box;
1017  current_gap = next_gap;
1018  current_within_xht_gap = next_within_xht_gap;
1019  peek_at_next_gap(row,
1020  box_it,
1021  next_blob_box,
1022  next_gap,
1023  next_within_xht_gap);
1024 
1025  int16_t prev_gap_arg = prev_gap;
1026  int16_t next_gap_arg = next_gap;
1027  if (tosp_only_use_xht_gaps) {
1028  prev_gap_arg = prev_within_xht_gap;
1029  next_gap_arg = next_within_xht_gap;
1030  }
1031  // Decide if a word-break should be inserted
1032  if (blob_box.left () > next_rep_char_word_right ||
1033  make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1034  current_gap, current_within_xht_gap,
1035  next_blob_box, next_gap_arg,
1036  blanks, fuzzy_sp, fuzzy_non,
1037  prev_gap_was_a_space,
1038  break_at_next_gap) ||
1039  box_it.at_first()) {
1040  /* Form a new word out of the blobs collected */
1041  word = new WERD (&cblobs, prev_blanks, nullptr);
1042  word_count++;
1043  word_it.add_after_then_move (word);
1044  if (bol) {
1045  word->set_flag (W_BOL, true);
1046  bol = false;
1047  }
1048  if (prev_fuzzy_sp)
1049  //probably space
1050  word->set_flag (W_FUZZY_SP, true);
1051  else if (prev_fuzzy_non)
1052  word->set_flag (W_FUZZY_NON, true);
1053  //probably not
1054 
1055  if (blob_box.left () > next_rep_char_word_right) {
1056  /* We need to insert a repeated char word */
1057  word = rep_char_it.extract ();
1058  word_it.add_after_then_move (word);
1059 
1060  /* Set spaces before repeated char word */
1061  repetition_spacing = find_mean_blob_spacing (word);
1062  current_gap = word->bounding_box ().left () - prev_x;
1063  current_within_xht_gap = current_gap;
1064  if (current_gap > tosp_rep_space * repetition_spacing) {
1065  blanks =
1066  static_cast<uint8_t>(floor (current_gap / row->space_size));
1067  if (blanks < 1)
1068  blanks = 1;
1069  }
1070  else
1071  blanks = 0;
1072  if (tosp_debug_level > 5)
1073  tprintf
1074  ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1075  word->bounding_box ().left (),
1076  word->bounding_box ().bottom (),
1077  repetition_spacing, current_gap, blanks);
1078  word->set_blanks (blanks);
1079  //NO uncertainty
1080  word->set_flag (W_FUZZY_SP, false);
1081  word->set_flag (W_FUZZY_NON, false);
1082 
1083  /* Set spaces after repeated char word (and leave current word set) */
1084  current_gap =
1085  blob_box.left () - next_rep_char_word_right;
1086  if (current_gap > tosp_rep_space * repetition_spacing) {
1087  blanks = static_cast<uint8_t>(current_gap / row->space_size);
1088  if (blanks < 1)
1089  blanks = 1;
1090  }
1091  else
1092  blanks = 0;
1093  if (tosp_debug_level > 5)
1094  tprintf (" Rgap:%d (%d blanks)\n",
1095  current_gap, blanks);
1096  fuzzy_sp = false;
1097  fuzzy_non = false;
1098 
1099  if (rep_char_it.empty ()) {
1100  next_rep_char_word_right = INT32_MAX;
1101  }
1102  else {
1103  rep_char_it.forward ();
1104  next_rep_char_word_right =
1105  rep_char_it.data ()->bounding_box ().right ();
1106  }
1107  }
1108 
1109  if (box_it.at_first () && rep_char_it.empty ()) {
1110  //at end of line
1111  word->set_flag (W_EOL, true);
1112  xstarts[1] = prev_x;
1113  }
1114  else {
1115  prev_blanks = blanks;
1116  prev_fuzzy_sp = fuzzy_sp;
1117  prev_fuzzy_non = fuzzy_non;
1118  }
1119  }
1120  }
1121  }
1122  while (!box_it.at_first ()); //until back at start
1123 
1124  /* Insert any further repeated char words */
1125  while (!rep_char_it.empty ()) {
1126  word = rep_char_it.extract ();
1127  word_it.add_after_then_move (word);
1128 
1129  /* Set spaces before repeated char word */
1130  repetition_spacing = find_mean_blob_spacing (word);
1131  current_gap = word->bounding_box ().left () - prev_x;
1132  if (current_gap > tosp_rep_space * repetition_spacing) {
1133  blanks = static_cast<uint8_t>(floor (current_gap / row->space_size));
1134  if (blanks < 1)
1135  blanks = 1;
1136  }
1137  else
1138  blanks = 0;
1139  if (tosp_debug_level > 5)
1140  tprintf(
1141  "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1142  word->bounding_box().left(), word->bounding_box().bottom(),
1143  repetition_spacing, current_gap, blanks);
1144  word->set_blanks (blanks);
1145  //NO uncertainty
1146  word->set_flag (W_FUZZY_SP, false);
1147  word->set_flag (W_FUZZY_NON, false);
1148  prev_x = word->bounding_box ().right ();
1149  if (rep_char_it.empty ()) {
1150  //at end of line
1151  word->set_flag (W_EOL, true);
1152  xstarts[1] = prev_x;
1153  }
1154  else {
1155  rep_char_it.forward ();
1156  }
1157  }
1158  real_row = new ROW (row,
1159  static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
1160  word_it.set_to_list (real_row->word_list ());
1161  //put words in row
1162  word_it.add_list_after (&words);
1163  real_row->recalc_bounding_box ();
1164 
1165  if (tosp_debug_level > 4) {
1166  tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1167  word_count,
1168  real_row->bounding_box ().left (),
1169  real_row->bounding_box ().bottom (),
1170  real_row->bounding_box ().right (),

◆ make_spline_rows()

void tesseract::Textord::make_spline_rows ( TO_BLOCK block,
float  gradient,
bool  testing_on 
)

Definition at line 2003 of file makerow.cpp.

2005  {
2006 #ifndef GRAPHICS_DISABLED
2007  ScrollView::Color colour; //of row
2008 #endif
2009  TO_ROW_IT row_it = block->get_rows ();
2010 
2011  row_it.move_to_first ();
2012  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2013  if (row_it.data ()->blob_list ()->empty ())
2014  delete row_it.extract (); //nothing in it
2015  else
2016  make_baseline_spline (row_it.data (), block);
2017  }
2018  if (textord_old_baselines) {
2019 #ifndef GRAPHICS_DISABLED
2020  if (testing_on) {
2021  colour = ScrollView::RED;
2022  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
2023  row_it.forward ()) {
2024  row_it.data ()->baseline.plot (to_win, colour);
2025  colour = static_cast<ScrollView::Color>(colour + 1);
2026  if (colour > ScrollView::MAGENTA)
2027  colour = ScrollView::RED;
2028  }
2029  }
2030 #endif
2031  make_old_baselines(block, testing_on, gradient);
2032  }
2033 #ifndef GRAPHICS_DISABLED
2034  if (testing_on) {
2035  colour = ScrollView::RED;
2036  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2037  row_it.data ()->baseline.plot (to_win, colour);
2038  colour = static_cast<ScrollView::Color>(colour + 1);
2039  if (colour > ScrollView::MAGENTA)
2040  colour = ScrollView::RED;
2041  }
2042  }
2043 #endif
2044 }

◆ set_use_cjk_fp_model()

void tesseract::Textord::set_use_cjk_fp_model ( bool  flag)
inline

Definition at line 95 of file textord.h.

95  {
96  use_cjk_fp_model_ = flag;
97  }

◆ TextordPage()

void tesseract::Textord::TextordPage ( PageSegMode  pageseg_mode,
const FCOORD reskew,
int  width,
int  height,
Pix *  binary_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
bool  use_box_bottoms,
BLOBNBOX_LIST *  diacritic_blobs,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 226 of file textord.cpp.

230  {
231  page_tr_.set_x(width);
232  page_tr_.set_y(height);
233  if (to_blocks->empty()) {
234  // AutoPageSeg was not used, so we need to find_components first.
235  find_components(binary_pix, blocks, to_blocks);
236  TO_BLOCK_IT it(to_blocks);
237  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
238  TO_BLOCK* to_block = it.data();
239  // Compute the edge offsets whether or not there is a grey_pix.
240  // We have by-passed auto page seg, so we have to run it here.
241  // By page segmentation mode there is no non-text to avoid running on.
242  to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
243  }
244  } else if (!PSM_SPARSE(pageseg_mode)) {
245  // AutoPageSeg does not need to find_components as it did that already.
246  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
247  filter_blobs(page_tr_, to_blocks, true);
248  }
249 
250  ASSERT_HOST(!to_blocks->empty());
251  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
252  const FCOORD anticlockwise90(0.0f, 1.0f);
253  const FCOORD clockwise90(0.0f, -1.0f);
254  TO_BLOCK_IT it(to_blocks);
255  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256  TO_BLOCK* to_block = it.data();
257  BLOCK* block = to_block->block;
258  // Create a fake poly_block in block from its bounding box.
259  block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(),
261  // Rotate the to_block along with its contained block and blobnbox lists.
262  to_block->rotate(anticlockwise90);
263  // Set the block's rotation values to obey the convention followed in
264  // layout analysis for vertical text.
265  block->set_re_rotation(clockwise90);
266  block->set_classify_rotation(clockwise90);
267  }
268  }
269 
270  TO_BLOCK_IT to_block_it(to_blocks);
271  TO_BLOCK* to_block = to_block_it.data();
272  // Make the rows in the block.
273  float gradient;
274  // Do it the old fashioned way.
275  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
276  gradient = make_rows(page_tr_, to_blocks);
277  } else if (!PSM_SPARSE(pageseg_mode)) {
278  // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
279  gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
280  to_block, to_blocks);
281  } else {
282  gradient = 0.0f;
283  }
284  BaselineDetect baseline_detector(textord_baseline_debug,
285  reskew, to_blocks);
286  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
287  baseline_detector.ComputeBaselineSplinesAndXheights(
288  page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
290  // Now make the words in the lines.
291  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
292  // SINGLE_LINE uses the old word maker on the single line.
293  make_words(this, page_tr_, gradient, blocks, to_blocks);
294  } else {
295  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
296  // single word, and in SINGLE_CHAR mode, all the outlines
297  // go in a single blob.
298  TO_BLOCK* to_block = to_block_it.data();
299  make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
300  to_block->get_rows(), to_block->block->row_list());
301  }
302  // Remove empties.
303  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
304  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
305  // Compute the margins for each row in the block, to be used later for
306  // paragraph detection.
307  BLOCK_IT b_it(blocks);
308  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
309  b_it.data()->compute_row_margins();
310  }
311 #ifndef GRAPHICS_DISABLED
312  close_to_win();
313 #endif
314 }

◆ to_spacing()

void tesseract::Textord::to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)

Definition at line 43 of file tospace.cpp.

43  {
45  ICOORD page_tr, //topright of page
46  TO_BLOCK_LIST *blocks //blocks on page
47  ) {
48  TO_BLOCK_IT block_it; //iterator
49  TO_BLOCK *block; //current block;
50  TO_ROW *row; //current row
51  int block_index; //block number
52  int row_index; //row number
53  //estimated width of real spaces for whole block
54  int16_t block_space_gap_width;
55  //estimated width of non space gaps for whole block
56  int16_t block_non_space_gap_width;
57  bool old_text_ord_proportional;//old fixed/prop result
58 
59  block_it.set_to_list (blocks);
60  block_index = 1;
61  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
62  block_it.forward ()) {
63  block = block_it.data ();
64  std::unique_ptr<GAPMAP> gapmap(new GAPMAP (block)); //map of big vert gaps in blk
65  block_spacing_stats(block,
66  gapmap.get(),
67  old_text_ord_proportional,
68  block_space_gap_width,
69  block_non_space_gap_width);
70  // Make sure relative values of block-level space and non-space gap
71  // widths are reasonable. The ratio of 1:3 is also used in
72  // block_spacing_stats, to corrrect the block_space_gap_width
73  // Useful for arabic and hindi, when the non-space gap width is
74  // often over-estimated and should not be trusted. A similar ratio
75  // is found in block_spacing_stats.
77  static_cast<float>(block_space_gap_width) / block_non_space_gap_width < 3.0) {
78  block_non_space_gap_width = static_cast<int16_t>(floor (block_space_gap_width / 3.0));
79  }
80  // row iterator
81  TO_ROW_IT row_it(block->get_rows());
82  row_index = 1;
83  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
84  row = row_it.data ();
85  if ((row->pitch_decision == PITCH_DEF_PROP) ||
86  (row->pitch_decision == PITCH_CORR_PROP)) {
87  if ((tosp_debug_level > 0) && !old_text_ord_proportional)
88  tprintf ("Block %d Row %d: Now Proportional\n",
89  block_index, row_index);
90  row_spacing_stats(row,
91  gapmap.get(),
92  block_index,
93  row_index,
94  block_space_gap_width,
95  block_non_space_gap_width);
96  }
97  else {
98  if ((tosp_debug_level > 0) && old_text_ord_proportional)
99  tprintf
100  ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
101  block_index, row_index, row->pitch_decision,
102  row->fixed_pitch);
103  }
104 #ifndef GRAPHICS_DISABLED
106  plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
107 #endif
108  row_index++;
109  }
110  block_index++;
111  }

◆ use_cjk_fp_model()

bool tesseract::Textord::use_cjk_fp_model ( ) const
inline

Definition at line 92 of file textord.h.

92  {
93  return use_cjk_fp_model_;
94  }

Member Data Documentation

◆ textord_baseline_debug

int tesseract::Textord::textord_baseline_debug = 0

"Baseline debug level"

Definition at line 377 of file textord.h.

◆ textord_blshift_maxshift

double tesseract::Textord::textord_blshift_maxshift = 0.00

"Max baseline shift"

Definition at line 396 of file textord.h.

◆ textord_blshift_xfraction

double tesseract::Textord::textord_blshift_xfraction = 9.99

"Min size of baseline shift"

Definition at line 397 of file textord.h.

◆ textord_initialasc_ile

double tesseract::Textord::textord_initialasc_ile = 0.90

"Ile of sizes for xheight guess"

Definition at line 381 of file textord.h.

◆ textord_initialx_ile

double tesseract::Textord::textord_initialx_ile = 0.75

"Ile of sizes for xheight guess"

Definition at line 380 of file textord.h.

◆ textord_max_noise_size

int tesseract::Textord::textord_max_noise_size = 7

"Pixel size of noise"

Definition at line 376 of file textord.h.

◆ textord_no_rejects

bool tesseract::Textord::textord_no_rejects = false

"Don't remove noise blobs"

Definition at line 373 of file textord.h.

◆ textord_noise_area_ratio

double tesseract::Textord::textord_noise_area_ratio = 0.7

"Fraction of bounding box for noise"

Definition at line 379 of file textord.h.

◆ textord_noise_debug

bool tesseract::Textord::textord_noise_debug = false

"Debug row garbage detector"

Definition at line 395 of file textord.h.

◆ textord_noise_hfract

double tesseract::Textord::textord_noise_hfract = 1.0/64

"Height fraction to discard outlines as speckle noise"

Definition at line 392 of file textord.h.

◆ textord_noise_normratio

double tesseract::Textord::textord_noise_normratio = 2.0

"Dot to norm ratio for deletion"

Definition at line 385 of file textord.h.

◆ textord_noise_rejrows

bool tesseract::Textord::textord_noise_rejrows = true

"Reject noise-like rows"

Definition at line 387 of file textord.h.

◆ textord_noise_rejwords

bool tesseract::Textord::textord_noise_rejwords = true

"Reject noise-like words"

Definition at line 386 of file textord.h.

◆ textord_noise_rowratio

double tesseract::Textord::textord_noise_rowratio = 6.0

"Dot to norm ratio for deletion"

Definition at line 394 of file textord.h.

◆ textord_noise_sizefraction

int tesseract::Textord::textord_noise_sizefraction = 10

"Fraction of size for maxima"

Definition at line 382 of file textord.h.

◆ textord_noise_sizelimit

double tesseract::Textord::textord_noise_sizelimit = 0.5

"Fraction of x for big t count"

Definition at line 383 of file textord.h.

◆ textord_noise_sncount

int tesseract::Textord::textord_noise_sncount = 1

"super norm blobs to save row"

Definition at line 393 of file textord.h.

◆ textord_noise_sxfract

double tesseract::Textord::textord_noise_sxfract = 0.4

"xh fract width error for norm blobs"

Definition at line 390 of file textord.h.

◆ textord_noise_syfract

double tesseract::Textord::textord_noise_syfract = 0.2

"xh fract error for norm blobs"

Definition at line 388 of file textord.h.

◆ textord_noise_translimit

int tesseract::Textord::textord_noise_translimit = 16

"Transitions for normal blob"

Definition at line 384 of file textord.h.

◆ textord_show_blobs

bool tesseract::Textord::textord_show_blobs = false

"Display unsorted blobs"

Definition at line 374 of file textord.h.

◆ textord_show_boxes

bool tesseract::Textord::textord_show_boxes = false

"Display boxes"

Definition at line 375 of file textord.h.

◆ textord_single_height_mode

bool tesseract::Textord::textord_single_height_mode = false

"Script has no xheight, so use a single mode for horizontal text"

Definition at line 261 of file textord.h.

◆ tosp_all_flips_fuzzy

bool tesseract::Textord::tosp_all_flips_fuzzy = false

"Pass ANY flip to context?"

Definition at line 287 of file textord.h.

◆ tosp_block_use_cert_spaces

bool tesseract::Textord::tosp_block_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 277 of file textord.h.

◆ tosp_debug_level

int tesseract::Textord::tosp_debug_level = 0

"Debug data"

Definition at line 302 of file textord.h.

◆ tosp_dont_fool_with_small_kerns

double tesseract::Textord::tosp_dont_fool_with_small_kerns = -1

"Limit use of xht gap with odd small kns"

Definition at line 365 of file textord.h.

◆ tosp_enough_small_gaps

double tesseract::Textord::tosp_enough_small_gaps = 0.65

"Fract of kerns reqd for isolated row stats"

Definition at line 343 of file textord.h.

◆ tosp_enough_space_samples_for_median

int tesseract::Textord::tosp_enough_space_samples_for_median = 3

"or should we use mean"

Definition at line 304 of file textord.h.

◆ tosp_few_samples

int tesseract::Textord::tosp_few_samples = 40

"No.gaps reqd with 1 large gap to treat as a table"

Definition at line 308 of file textord.h.

◆ tosp_flip_caution

double tesseract::Textord::tosp_flip_caution = 0.0

"Don't autoflip kn to sp when large separation"

Definition at line 361 of file textord.h.

◆ tosp_flip_fuzz_kn_to_sp

bool tesseract::Textord::tosp_flip_fuzz_kn_to_sp = true

"Default flip"

Definition at line 298 of file textord.h.

◆ tosp_flip_fuzz_sp_to_kn

bool tesseract::Textord::tosp_flip_fuzz_sp_to_kn = true

"Default flip"

Definition at line 299 of file textord.h.

◆ tosp_force_wordbreak_on_punct

bool tesseract::Textord::tosp_force_wordbreak_on_punct = false

"Force word breaks on punct to break long lines in non-space " "delimited langs"

Definition at line 271 of file textord.h.

◆ tosp_fuzzy_kn_fraction

double tesseract::Textord::tosp_fuzzy_kn_fraction = 0.5

"New fuzzy kn alg"

Definition at line 350 of file textord.h.

◆ tosp_fuzzy_limit_all

bool tesseract::Textord::tosp_fuzzy_limit_all = true

"Don't restrict kn->sp fuzzy limit to tables"

Definition at line 289 of file textord.h.

◆ tosp_fuzzy_sp_fraction

double tesseract::Textord::tosp_fuzzy_sp_fraction = 0.5

"New fuzzy sp alg"

Definition at line 351 of file textord.h.

◆ tosp_fuzzy_space_factor

double tesseract::Textord::tosp_fuzzy_space_factor = 0.6

"Fract of xheight for fuzz sp"

Definition at line 327 of file textord.h.

◆ tosp_fuzzy_space_factor1

double tesseract::Textord::tosp_fuzzy_space_factor1 = 0.5

"Fract of xheight for fuzz sp"

Definition at line 329 of file textord.h.

◆ tosp_fuzzy_space_factor2

double tesseract::Textord::tosp_fuzzy_space_factor2 = 0.72

"Fract of xheight for fuzz sp"

Definition at line 331 of file textord.h.

◆ tosp_gap_factor

double tesseract::Textord::tosp_gap_factor = 0.83

"gap ratio to flip sp->kern"

Definition at line 332 of file textord.h.

◆ tosp_ignore_big_gaps

double tesseract::Textord::tosp_ignore_big_gaps = -1

"xht multiplier"

Definition at line 339 of file textord.h.

◆ tosp_ignore_very_big_gaps

double tesseract::Textord::tosp_ignore_very_big_gaps = 3.5

"xht multiplier"

Definition at line 340 of file textord.h.

◆ tosp_improve_thresh

bool tesseract::Textord::tosp_improve_thresh = false

"Enable improvement heuristic"

Definition at line 301 of file textord.h.

◆ tosp_init_guess_kn_mult

double tesseract::Textord::tosp_init_guess_kn_mult = 2.2

"Thresh guess - mult kn by this"

Definition at line 355 of file textord.h.

◆ tosp_init_guess_xht_mult

double tesseract::Textord::tosp_init_guess_xht_mult = 0.28

"Thresh guess - mult xht by this"

Definition at line 357 of file textord.h.

◆ tosp_kern_gap_factor1

double tesseract::Textord::tosp_kern_gap_factor1 = 2.0

"gap ratio to flip kern->sp"

Definition at line 334 of file textord.h.

◆ tosp_kern_gap_factor2

double tesseract::Textord::tosp_kern_gap_factor2 = 1.3

"gap ratio to flip kern->sp"

Definition at line 336 of file textord.h.

◆ tosp_kern_gap_factor3

double tesseract::Textord::tosp_kern_gap_factor3 = 2.5

"gap ratio to flip kern->sp"

Definition at line 338 of file textord.h.

◆ tosp_large_kerning

double tesseract::Textord::tosp_large_kerning = 0.19

"Limit use of xht gap with large kns"

Definition at line 363 of file textord.h.

◆ tosp_max_sane_kn_thresh

double tesseract::Textord::tosp_max_sane_kn_thresh = 5.0

"Multiplier on kn to limit thresh"

Definition at line 359 of file textord.h.

◆ tosp_min_sane_kn_sp

double tesseract::Textord::tosp_min_sane_kn_sp = 1.5

"Don't trust spaces less than this time kn"

Definition at line 353 of file textord.h.

◆ tosp_narrow_aspect_ratio

double tesseract::Textord::tosp_narrow_aspect_ratio = 0.48

"narrow if w/h less than this"

Definition at line 322 of file textord.h.

◆ tosp_narrow_blobs_not_cert

bool tesseract::Textord::tosp_narrow_blobs_not_cert = true

"Only stat OBVIOUS spaces"

Definition at line 281 of file textord.h.

◆ tosp_narrow_fraction

double tesseract::Textord::tosp_narrow_fraction = 0.3

"Fract of xheight for narrow"

Definition at line 320 of file textord.h.

◆ tosp_near_lh_edge

double tesseract::Textord::tosp_near_lh_edge = 0

"Don't reduce box if the top left is non blank"

Definition at line 367 of file textord.h.

◆ tosp_old_sp_kn_th_factor

double tesseract::Textord::tosp_old_sp_kn_th_factor = 2.0

"Factor for defining space threshold in terms of space and " "kern sizes"

Definition at line 314 of file textord.h.

◆ tosp_old_to_bug_fix

bool tesseract::Textord::tosp_old_to_bug_fix = false

"Fix suspected bug in old code"

Definition at line 275 of file textord.h.

◆ tosp_old_to_constrain_sp_kn

bool tesseract::Textord::tosp_old_to_constrain_sp_kn = false

"Constrain relative values of inter and intra-word gaps for " "old_to_method."

Definition at line 266 of file textord.h.

◆ tosp_old_to_method

bool tesseract::Textord::tosp_old_to_method = false

"Space stats use prechopping?"

Definition at line 263 of file textord.h.

◆ tosp_only_small_gaps_for_kern

bool tesseract::Textord::tosp_only_small_gaps_for_kern = false

"Better guess"

Definition at line 286 of file textord.h.

◆ tosp_only_use_prop_rows

bool tesseract::Textord::tosp_only_use_prop_rows = true

"Block stats to use fixed pitch rows?"

Definition at line 268 of file textord.h.

◆ tosp_only_use_xht_gaps

bool tesseract::Textord::tosp_only_use_xht_gaps = false

"Only use within xht gap for wd breaks"

Definition at line 295 of file textord.h.

◆ tosp_pass_wide_fuzz_sp_to_context

double tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context = 0.75

"How wide fuzzies need context"

Definition at line 371 of file textord.h.

◆ tosp_recovery_isolated_row_stats

bool tesseract::Textord::tosp_recovery_isolated_row_stats = true

"Use row alone when inadequate cert spaces"

Definition at line 285 of file textord.h.

◆ tosp_redo_kern_limit

int tesseract::Textord::tosp_redo_kern_limit = 10

"No.samples reqd to reestimate for row"

Definition at line 306 of file textord.h.

◆ tosp_rep_space

double tesseract::Textord::tosp_rep_space = 1.6

"rep gap multiplier for space"

Definition at line 341 of file textord.h.

◆ tosp_row_use_cert_spaces

bool tesseract::Textord::tosp_row_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 279 of file textord.h.

◆ tosp_row_use_cert_spaces1

bool tesseract::Textord::tosp_row_use_cert_spaces1 = true

"Only stat OBVIOUS spaces"

Definition at line 283 of file textord.h.

◆ tosp_rule_9_test_punct

bool tesseract::Textord::tosp_rule_9_test_punct = false

"Don't chng kn to space next to punct"

Definition at line 297 of file textord.h.

◆ tosp_sanity_method

int tesseract::Textord::tosp_sanity_method = 1

"How to avoid being silly"

Definition at line 311 of file textord.h.

◆ tosp_short_row

int tesseract::Textord::tosp_short_row = 20

"No.gaps reqd with few cert spaces to use certs"

Definition at line 310 of file textord.h.

◆ tosp_silly_kn_sp_gap

double tesseract::Textord::tosp_silly_kn_sp_gap = 0.2

"Don't let sp minus kn get too small"

Definition at line 369 of file textord.h.

◆ tosp_stats_use_xht_gaps

bool tesseract::Textord::tosp_stats_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 291 of file textord.h.

◆ tosp_table_fuzzy_kn_sp_ratio

double tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio = 3.0

"Fuzzy if less than this"

Definition at line 349 of file textord.h.

◆ tosp_table_kn_sp_ratio

double tesseract::Textord::tosp_table_kn_sp_ratio = 2.25

"Min difference of kn & sp in table"

Definition at line 345 of file textord.h.

◆ tosp_table_xht_sp_ratio

double tesseract::Textord::tosp_table_xht_sp_ratio = 0.33

"Expect spaces bigger than this"

Definition at line 347 of file textord.h.

◆ tosp_threshold_bias1

double tesseract::Textord::tosp_threshold_bias1 = 0

"how far between kern and space?"

Definition at line 316 of file textord.h.

◆ tosp_threshold_bias2

double tesseract::Textord::tosp_threshold_bias2 = 0

"how far between kern and space?"

Definition at line 318 of file textord.h.

◆ tosp_use_pre_chopping

bool tesseract::Textord::tosp_use_pre_chopping = false

"Space stats use prechopping?"

Definition at line 273 of file textord.h.

◆ tosp_use_xht_gaps

bool tesseract::Textord::tosp_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 293 of file textord.h.

◆ tosp_wide_aspect_ratio

double tesseract::Textord::tosp_wide_aspect_ratio = 0.0

"wide if w/h less than this"

Definition at line 325 of file textord.h.

◆ tosp_wide_fraction

double tesseract::Textord::tosp_wide_fraction = 0.52

"Fract of xheight for wide"

Definition at line 323 of file textord.h.


The documentation for this class was generated from the following files:
TO_BLOCK::ComputeEdgeOffsets
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
Definition: blobbox.cpp:1038
tesseract::Textord::textord_show_blobs
bool textord_show_blobs
Definition: textord.h:374
textord_old_baselines
bool textord_old_baselines
Definition: makerow.cpp:51
tesseract::Textord::tosp_stats_use_xht_gaps
bool tosp_stats_use_xht_gaps
Definition: textord.h:291
TO_BLOCK::small_blobs
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:774
TO_BLOCK::plot_graded_blobs
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1054
ICOORD::set_x
void set_x(int16_t xin)
rewrite function
Definition: points.h:60
tesseract::Textord::textord_noise_rejwords
bool textord_noise_rejwords
Definition: textord.h:386
make_baseline_spline
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2056
make_single_word
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:51
tesseract::Textord::tosp_wide_fraction
double tosp_wide_fraction
Definition: textord.h:323
TO_ROW::rep_words
WERD_LIST rep_words
Definition: blobbox.h:667
tesseract::Textord::tosp_debug_level
int tosp_debug_level
Definition: textord.h:302
tesseract::Textord::tosp_large_kerning
double tosp_large_kerning
Definition: textord.h:363
TO_ROW::space_size
float space_size
Definition: blobbox.h:666
C_BLOB::out_list
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:69
tesseract::CCStruct::kXHeightFraction
static const double kXHeightFraction
Definition: ccstruct.h:34
tesseract::Textord::tosp_fuzzy_space_factor1
double tosp_fuzzy_space_factor1
Definition: textord.h:329
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:58
tesseract::Textord::textord_blshift_xfraction
double textord_blshift_xfraction
Definition: textord.h:397
tesseract::Textord::tosp_all_flips_fuzzy
bool tosp_all_flips_fuzzy
Definition: textord.h:287
tesseract::Textord::tosp_min_sane_kn_sp
double tosp_min_sane_kn_sp
Definition: textord.h:353
create_to_win
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:42
make_single_row
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:163
TO_BLOCK::noise_blobs
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:773
POLY_BLOCK::IsText
bool IsText() const
Definition: polyblk.h:62
tesseract::Textord::to_spacing
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
Definition: tospace.cpp:43
tesseract::Textord::tosp_improve_thresh
bool tosp_improve_thresh
Definition: textord.h:301
tesseract::CCStruct::kXHeightCapRatio
static const double kXHeightCapRatio
Definition: ccstruct.h:37
tesseract::Textord::tosp_table_kn_sp_ratio
double tosp_table_kn_sp_ratio
Definition: textord.h:345
tesseract::Textord::textord_noise_sxfract
double textord_noise_sxfract
Definition: textord.h:390
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
plot_word_decisions
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:239
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
tesseract::Textord::tosp_ignore_big_gaps
double tosp_ignore_big_gaps
Definition: textord.h:339
INT_MEMBER
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:312
tesseract::PSM_RAW_LINE
Definition: publictypes.h:176
textord_heavy_nr
bool textord_heavy_nr
Definition: makerow.cpp:42
BLOCK::row_list
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:115
ROW_DESCENDERS_FOUND
Definition: makerow.h:36
PITCH_DEF_PROP
Definition: blobbox.h:48
FCOORD::y
float y() const
Definition: points.h:209
tesseract::Textord::textord_initialasc_ile
double textord_initialasc_ile
Definition: textord.h:381
ICOORD
integer coordinate
Definition: points.h:30
tesseract::Textord::textord_max_noise_size
int textord_max_noise_size
Definition: textord.h:376
tesseract::Textord::tosp_fuzzy_space_factor
double tosp_fuzzy_space_factor
Definition: textord.h:327
fill_heights
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1406
ROW::recalc_bounding_box
void recalc_bounding_box()
Definition: ocrrow.cpp:96
tesseract::Textord::tosp_near_lh_edge
double tosp_near_lh_edge
Definition: textord.h:367
WERD_CHOICE::certainty
float certainty() const
Definition: ratngs.h:318
tesseract::Textord::textord_noise_sncount
int textord_noise_sncount
Definition: textord.h:393
ScrollView::Clear
void Clear()
Definition: scrollview.cpp:588
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
TO_BLOCK
Definition: blobbox.h:691
tesseract::Textord::textord_blshift_maxshift
double textord_blshift_maxshift
Definition: textord.h:396
tesseract::Textord::tosp_recovery_isolated_row_stats
bool tosp_recovery_isolated_row_stats
Definition: textord.h:285
WERD_RES
Definition: pageres.h:160
tesseract::Textord::tosp_rule_9_test_punct
bool tosp_rule_9_test_punct
Definition: textord.h:297
PITCH_CORR_PROP
Definition: blobbox.h:51
textord_debug_xheights
bool textord_debug_xheights
Definition: makerow.cpp:55
tesseract::Textord::tosp_kern_gap_factor2
double tosp_kern_gap_factor2
Definition: textord.h:336
tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio
double tosp_table_fuzzy_kn_sp_ratio
Definition: textord.h:349
ROW_ASCENDERS_FOUND
Definition: makerow.h:35
TO_ROW::pitch_decision
PITCH_TYPE pitch_decision
Definition: blobbox.h:649
tesseract::Textord::tosp_short_row
int tosp_short_row
Definition: textord.h:310
tesseract::Textord::textord_initialx_ile
double textord_initialx_ile
Definition: textord.h:380
tesseract::Textord::tosp_row_use_cert_spaces
bool tosp_row_use_cert_spaces
Definition: textord.h:279
FCOORD
Definition: points.h:187
compute_xheight_from_modes
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1467
BLOBNBOX
Definition: blobbox.h:142
tesseract::PSM_WORD_FIND_ENABLED
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:206
tesseract::Textord::textord_single_height_mode
bool textord_single_height_mode
Definition: textord.h:261
ICOORD::set_y
void set_y(int16_t yin)
rewrite function
Definition: points.h:64
PT_VERTICAL_TEXT
Definition: capi.h:115
tesseract::Textord::tosp_narrow_blobs_not_cert
bool tosp_narrow_blobs_not_cert
Definition: textord.h:281
tesseract::Textord::compute_row_xheight
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1366
tesseract::Textord::tosp_flip_fuzz_kn_to_sp
bool tosp_flip_fuzz_kn_to_sp
Definition: textord.h:298
textord_show_initial_words
bool textord_show_initial_words
Definition: tovars.cpp:22
textord_min_xheight
int textord_min_xheight
Definition: makerow.cpp:67
TO_BLOCK::rotate
void rotate(const FCOORD &rotation)
Definition: blobbox.h:709
textord_excess_blobsize
double textord_excess_blobsize
Definition: makerow.cpp:83
tesseract::Textord::textord_no_rejects
bool textord_no_rejects
Definition: textord.h:373
BLOCK::set_re_rotation
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:136
get_row_category
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:121
tesseract::Textord::tosp_redo_kern_limit
int tosp_redo_kern_limit
Definition: textord.h:306
tesseract::Textord::tosp_only_use_xht_gaps
bool tosp_only_use_xht_gaps
Definition: textord.h:295
tesseract::Textord::tosp_sanity_method
int tosp_sanity_method
Definition: textord.h:311
tesseract::Textord::tosp_only_small_gaps_for_kern
bool tosp_only_small_gaps_for_kern
Definition: textord.h:286
ROW_CATEGORY
ROW_CATEGORY
Definition: makerow.h:34
tesseract::Textord::find_components
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:215
tesseract::PSM_SPARSE
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:197
tesseract::Textord::tosp_table_xht_sp_ratio
double tosp_table_xht_sp_ratio
Definition: textord.h:347
LOC_EDGE_PROG
#define LOC_EDGE_PROG
Definition: errcode.h:42
TO_ROW::rep_chars_marked
bool rep_chars_marked() const
Definition: blobbox.h:630
extract_edges
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:329
tesseract::Textord::tosp_threshold_bias2
double tosp_threshold_bias2
Definition: textord.h:318
BLOCK
Definition: ocrblock.h:28
tesseract::PSM_SINGLE_BLOCK_VERT_TEXT
Definition: publictypes.h:166
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:54
tesseract::Textord::tosp_row_use_cert_spaces1
bool tosp_row_use_cert_spaces1
Definition: textord.h:283
tesseract::Textord::tosp_fuzzy_limit_all
bool tosp_fuzzy_limit_all
Definition: textord.h:289
tesseract::Textord::tosp_old_to_constrain_sp_kn
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:266
mark_repeated_chars
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2639
TO_BLOCK::large_blobs
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:775
close_to_win
void close_to_win()
Definition: drawtord.cpp:51
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::Textord::tosp_enough_small_gaps
double tosp_enough_small_gaps
Definition: textord.h:343
PDBLK::set_poly_block
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:56
make_words
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:95
tesseract::Textord::textord_noise_syfract
double textord_noise_syfract
Definition: textord.h:388
tesseract::Textord::tosp_narrow_fraction
double tosp_narrow_fraction
Definition: textord.h:320
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
ROW_RES::row
ROW * row
Definition: pageres.h:136
TO_BLOCK::block
BLOCK * block
Definition: blobbox.h:776
ScrollView::MAGENTA
Definition: scrollview.h:109
WERD::set_flag
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:117
TO_BLOCK::xheight
float xheight
Definition: blobbox.h:787
set_global_loc_code
void set_global_loc_code(int loc_code)
Definition: globaloc.cpp:25
W_EOL
end of line
Definition: werd.h:47
BLOBNBOX::joined_to_prev
bool joined_to_prev() const
Definition: blobbox.h:255
tesseract::Textord::tosp_old_sp_kn_th_factor
double tosp_old_sp_kn_th_factor
Definition: textord.h:314
tesseract::Textord::tosp_dont_fool_with_small_kerns
double tosp_dont_fool_with_small_kerns
Definition: textord.h:365
tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context
double tosp_pass_wide_fuzz_sp_to_context
Definition: textord.h:371
double_MEMBER
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:321
tesseract::Textord::tosp_only_use_prop_rows
bool tosp_only_use_prop_rows
Definition: textord.h:268
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
BLOCK::set_classify_rotation
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:142
tesseract::CCStruct::kAscenderFraction
static const double kAscenderFraction
Definition: ccstruct.h:35
tesseract::Textord::tosp_fuzzy_sp_fraction
double tosp_fuzzy_sp_fraction
Definition: textord.h:351
tesseract::Textord::tosp_old_to_bug_fix
bool tosp_old_to_bug_fix
Definition: textord.h:275
ScrollView::WHITE
Definition: scrollview.h:103
ROW::bounding_box
TBOX bounding_box() const
Definition: ocrrow.h:87
tesseract::CCStruct::kDescenderFraction
static const double kDescenderFraction
Definition: ccstruct.h:33
GAPMAP
Definition: gap_map.h:16
TO_ROW::fixed_pitch
float fixed_pitch
Definition: blobbox.h:650
tesseract::Textord::tosp_wide_aspect_ratio
double tosp_wide_aspect_ratio
Definition: textord.h:325
TO_ROW::xheight
float xheight
Definition: blobbox.h:656
tesseract::CCUtil::params
ParamsVectors * params()
Definition: ccutil.h:51
tesseract::Textord::tosp_force_wordbreak_on_punct
bool tosp_force_wordbreak_on_punct
Definition: textord.h:271
tesseract::Textord::tosp_use_pre_chopping
bool tosp_use_pre_chopping
Definition: textord.h:273
tesseract::Textord::tosp_init_guess_kn_mult
double tosp_init_guess_kn_mult
Definition: textord.h:355
TO_BLOCK::line_spacing
float line_spacing
Definition: blobbox.h:778
ScrollView::RED
Definition: scrollview.h:104
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
tesseract::PSM_LINE_FIND_ENABLED
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:203
tesseract::Textord::tosp_few_samples
int tosp_few_samples
Definition: textord.h:308
tesseract::Textord::tosp_flip_fuzz_sp_to_kn
bool tosp_flip_fuzz_sp_to_kn
Definition: textord.h:299
tesseract::Textord::textord_noise_rejrows
bool textord_noise_rejrows
Definition: textord.h:387
get_min_max_xheight
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:114
tesseract::Textord::textord_noise_debug
bool textord_noise_debug
Definition: textord.h:395
textord_show_final_rows
bool textord_show_final_rows
Definition: makerow.cpp:46
ROW_UNKNOWN
Definition: makerow.h:37
PAGE_RES_IT
Definition: pageres.h:668
tesseract::Textord::textord_noise_sizelimit
double textord_noise_sizelimit
Definition: textord.h:383
correct_row_xheight
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1685
TO_ROW::xheight_evidence
int xheight_evidence
Definition: blobbox.h:657
tesseract::Textord::tosp_max_sane_kn_thresh
double tosp_max_sane_kn_thresh
Definition: textord.h:359
tesseract::Textord::textord_show_boxes
bool textord_show_boxes
Definition: textord.h:375
tesseract::Textord::textord_noise_hfract
double textord_noise_hfract
Definition: textord.h:392
tesseract::Textord::textord_noise_sizefraction
int textord_noise_sizefraction
Definition: textord.h:382
TO_BLOCK::get_rows
TO_ROW_LIST * get_rows()
Definition: blobbox.h:703
tesseract::Textord::tosp_use_xht_gaps
bool tosp_use_xht_gaps
Definition: textord.h:293
tesseract::Textord::textord_baseline_debug
int textord_baseline_debug
Definition: textord.h:377
textord_min_linesize
double textord_min_linesize
Definition: makerow.cpp:81
tesseract::Textord::tosp_ignore_very_big_gaps
double tosp_ignore_very_big_gaps
Definition: textord.h:340
tesseract::Textord::tosp_fuzzy_space_factor2
double tosp_fuzzy_space_factor2
Definition: textord.h:331
W_FUZZY_SP
fuzzy space
Definition: werd.h:53
tesseract::Textord::tosp_rep_space
double tosp_rep_space
Definition: textord.h:341
ROW_RES
Definition: pageres.h:133
tesseract::Textord::textord_noise_rowratio
double textord_noise_rowratio
Definition: textord.h:394
tesseract::Textord::tosp_block_use_cert_spaces
bool tosp_block_use_cert_spaces
Definition: textord.h:277
WERD
Definition: werd.h:55
tesseract::Textord::tosp_kern_gap_factor1
double tosp_kern_gap_factor1
Definition: textord.h:334
TBOX::left
int16_t left() const
Definition: rect.h:71
ROW
Definition: ocrrow.h:35
tesseract::Textord::tosp_flip_caution
double tosp_flip_caution
Definition: textord.h:361
tesseract::Textord::tosp_enough_space_samples_for_median
int tosp_enough_space_samples_for_median
Definition: textord.h:304
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::Textord::tosp_narrow_aspect_ratio
double tosp_narrow_aspect_ratio
Definition: textord.h:322
plot_box_list
void plot_box_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
Definition: drawtord.cpp:64
tesseract::Textord::tosp_old_to_method
bool tosp_old_to_method
Definition: textord.h:263
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
BLOCK::classify_rotation
FCOORD classify_rotation() const
Definition: ocrblock.h:139
POLY_BLOCK
Definition: polyblk.h:26
WERD::set_blanks
void set_blanks(uint8_t new_blanks)
Definition: werd.h:101
tesseract::Textord::tosp_fuzzy_kn_fraction
double tosp_fuzzy_kn_fraction
Definition: textord.h:350
TO_ROW
Definition: blobbox.h:543
WERD_RES::word
WERD * word
Definition: pageres.h:180
tesseract::Textord::textord_noise_normratio
double textord_noise_normratio
Definition: textord.h:385
TO_ROW::ascrise
float ascrise
Definition: blobbox.h:658
TO_ROW::kern_size
float kern_size
Definition: blobbox.h:665
ScrollView::Color
Color
Definition: scrollview.h:100
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
BOOL_MEMBER
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:315
tesseract::Textord::tosp_gap_factor
double tosp_gap_factor
Definition: textord.h:332
TO_ROW::descdrop
float descdrop
Definition: blobbox.h:659
TO_BLOCK::max_blob_size
float max_blob_size
Definition: blobbox.h:785
ROW::word_list
WERD_LIST * word_list()
Definition: ocrrow.h:54
tesseract::Textord::tosp_silly_kn_sp_gap
double tosp_silly_kn_sp_gap
Definition: textord.h:369
tesseract::Textord::tosp_init_guess_xht_mult
double tosp_init_guess_xht_mult
Definition: textord.h:357
tesseract::Textord::textord_noise_area_ratio
double textord_noise_area_ratio
Definition: textord.h:379
to_win
ScrollView * to_win
Definition: drawtord.cpp:34
compute_row_descdrop
int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
Definition: makerow.cpp:1563
tesseract::Textord::tosp_threshold_bias1
double tosp_threshold_bias1
Definition: textord.h:316
tesseract::Textord::filter_blobs
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
Definition: tordmain.cpp:245
W_BOL
start of line
Definition: werd.h:46
tesseract::Textord::tosp_kern_gap_factor3
double tosp_kern_gap_factor3
Definition: textord.h:338
tesseract::Textord::textord_noise_translimit
int textord_noise_translimit
Definition: textord.h:384
TO_ROW::blob_list
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:599
make_rows
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:200
TBOX
Definition: rect.h:33
TO_BLOCK::line_size
float line_size
Definition: blobbox.h:784
tesseract::PSM_SINGLE_CHAR
Treat the image as a single character.
Definition: publictypes.h:172