tesseract
5.0.0-alpha-619-ge9db
|
#include <textord.h>
|
| Textord (CCStruct *ccstruct) |
|
| ~Textord ()=default |
|
void | TextordPage (PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) |
|
void | CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res) |
|
bool | use_cjk_fp_model () const |
|
void | set_use_cjk_fp_model (bool flag) |
|
void | to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks) |
|
ROW * | make_prop_words (TO_ROW *row, FCOORD rotation) |
|
ROW * | make_blob_words (TO_ROW *row, FCOORD rotation) |
|
void | find_components (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) |
|
void | filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on) |
|
void | compute_block_xheight (TO_BLOCK *block, float gradient) |
|
void | make_spline_rows (TO_BLOCK *block, float gradient, bool testing_on) |
|
|
Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time. Set xheigh_evidence to the number of blobs with the chosen xheight that appear in this row.
|
void | compute_row_xheight (TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size) |
|
Definition at line 68 of file textord.h.
◆ Textord()
tesseract::Textord::Textord |
( |
CCStruct * |
ccstruct | ) |
|
|
explicit |
Definition at line 35 of file textord.cpp.
36 : ccstruct_(ccstruct),
37 use_cjk_fp_model_(
false),
40 "Script has no xheight, so use a single mode",
46 "Constrain relative values of inter and intra-word gaps for "
50 "Block stats to use fixed pitch rows?", ccstruct_->
params()),
52 "Force word breaks on punct to break long lines in non-space "
68 "Use row alone when inadequate cert spaces",
75 "Don't restrict kn->sp fuzzy limit to tables",
78 "Use within xht gap for wd breaks", ccstruct_->
params()),
82 "Only use within xht gap for wd breaks", ccstruct_->
params()),
84 "Don't chng kn to space next to punct", ccstruct_->
params()),
93 "or should we use mean", ccstruct_->
params()),
95 "No.samples reqd to reestimate for row", ccstruct_->
params()),
97 "No.gaps reqd with 1 large gap to treat as a table",
100 "No.gaps reqd with few cert spaces to use certs",
105 "Factor for defining space threshold in terms of space and "
115 "narrow if w/h less than this", ccstruct_->
params()),
121 "Fract of xheight for fuzz sp", ccstruct_->
params()),
123 "Fract of xheight for fuzz sp", ccstruct_->
params()),
125 "Fract of xheight for fuzz sp", ccstruct_->
params()),
141 "Fract of kerns reqd for isolated row stats",
144 "Min difference of kn & sp in table", ccstruct_->
params()),
146 "Expect spaces bigger than this", ccstruct_->
params()),
148 "Fuzzy if less than this", ccstruct_->
params()),
154 "Don't trust spaces less than this time kn",
157 "Thresh guess - mult kn by this", ccstruct_->
params()),
159 "Thresh guess - mult xht by this", ccstruct_->
params()),
161 "Multiplier on kn to limit thresh", ccstruct_->
params()),
163 "Don't autoflip kn to sp when large separation",
166 "Limit use of xht gap with large kns", ccstruct_->
params()),
168 "Limit use of xht gap with odd small kns",
171 "Don't reduce box if the top left is non blank",
174 "Don't let sp minus kn get too small", ccstruct_->
params()),
176 "How wide fuzzies need context", ccstruct_->
params()),
189 "Fraction of bounding box for noise", ccstruct_->
params()),
191 "Ile of sizes for xheight guess", ccstruct_->
params()),
193 "Ile of sizes for xheight guess", ccstruct_->
params()),
197 "Fraction of x for big t count", ccstruct_->
params()),
201 "Dot to norm ratio for deletion", ccstruct_->
params()),
207 "xh fract height error for norm blobs",
210 "xh fract width error for norm blobs", ccstruct_->
params()),
212 "Height fraction to discard outlines as speckle noise",
217 "Dot to norm ratio for deletion", ccstruct_->
params()),
223 "Min size of baseline shift", ccstruct_->
params()) {}
◆ ~Textord()
tesseract::Textord::~Textord |
( |
| ) |
|
|
default |
◆ CleanupSingleRowResult()
void tesseract::Textord::CleanupSingleRowResult |
( |
PageSegMode |
pageseg_mode, |
|
|
PAGE_RES * |
page_res |
|
) |
| |
Definition at line 318 of file textord.cpp.
324 float row_total_conf = 0.0f;
325 int row_word_count = 0;
327 float best_conf = 0.0f;
328 for (it.restart_page(); it.word() !=
nullptr; it.forward()) {
332 if (it.next_row() != it.row()) {
333 row_total_conf /= row_word_count;
334 if (best_row ==
nullptr || best_conf < row_total_conf) {
336 best_conf = row_total_conf;
338 row_total_conf = 0.0f;
343 for (it.restart_page(); it.word() !=
nullptr; it.forward()) {
344 if (it.row() != best_row)
345 it.DeleteCurrentWord();
◆ compute_block_xheight()
void tesseract::Textord::compute_block_xheight |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient |
|
) |
| |
Definition at line 1254 of file makerow.cpp.
1260 int32_t min_height, max_height;
1261 TO_ROW_IT row_it = block->
get_rows();
1262 if (row_it.empty())
return;
1267 STATS row_asc_xheights(min_height, max_height + 1);
1268 STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
1269 static_cast<int>(max_height * asc_frac_xheight) + 1);
1270 int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
1271 int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
1272 STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
1273 STATS row_desc_xheights(min_height, max_height + 1);
1274 STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
1275 STATS row_cap_xheights(min_height, max_height + 1);
1276 STATS row_cap_floating_xheights(min_height, max_height + 1);
1277 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1278 row = row_it.data();
1286 row_asc_xheights.add(static_cast<int32_t>(row->
xheight),
1288 row_asc_ascrise.add(static_cast<int32_t>(row->
ascrise),
1290 row_asc_descdrop.add(static_cast<int32_t>(-row->
descdrop),
1293 row_desc_xheights.add(static_cast<int32_t>(row->
xheight),
1295 row_desc_descdrop.add(static_cast<int32_t>(-row->
descdrop),
1299 &row_cap_xheights, &row_cap_floating_xheights);
1303 float xheight = 0.0;
1304 float ascrise = 0.0;
1305 float descdrop = 0.0;
1307 if (row_asc_xheights.get_total() > 0) {
1309 xheight = row_asc_xheights.median();
1310 ascrise = row_asc_ascrise.median();
1311 descdrop = -row_asc_descdrop.median();
1312 }
else if (row_desc_xheights.get_total() > 0) {
1314 xheight = row_desc_xheights.median();
1315 descdrop = -row_desc_descdrop.median();
1316 }
else if (row_cap_xheights.get_total() > 0) {
1327 min_height, max_height, &(xheight), &(ascrise));
1335 bool corrected_xheight =
false;
1338 corrected_xheight =
true;
1340 if (corrected_xheight || ascrise <= 0.0) {
1341 ascrise = xheight * asc_frac_xheight;
1343 if (corrected_xheight || descdrop >= 0.0) {
1344 descdrop = -(xheight * desc_frac_xheight);
1349 tprintf(
"Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n",
1350 xheight, ascrise, descdrop);
1353 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
◆ compute_row_xheight()
void tesseract::Textord::compute_row_xheight |
( |
TO_ROW * |
row, |
|
|
const FCOORD & |
rotation, |
|
|
float |
gradient, |
|
|
int |
block_line_size |
|
) |
| |
Definition at line 1366 of file makerow.cpp.
1377 int min_height, max_height;
1379 STATS heights(min_height, max_height + 1);
1380 STATS floating_heights(min_height, max_height + 1);
1382 &heights, &floating_heights);
1388 rotation.
y() == 0.0,
1389 min_height, max_height,
1393 row->
descdrop = static_cast<float>(
◆ filter_blobs()
void tesseract::Textord::filter_blobs |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK_LIST * |
blocks, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 245 of file tordmain.cpp.
253 TO_BLOCK_IT block_it = blocks;
256 #ifndef GRAPHICS_DISABLED
259 #endif // GRAPHICS_DISABLED
261 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
262 block_it.forward()) {
263 block = block_it.data();
277 #ifndef GRAPHICS_DISABLED
◆ find_components()
void tesseract::Textord::find_components |
( |
Pix * |
pix, |
|
|
BLOCK_LIST * |
blocks, |
|
|
TO_BLOCK_LIST * |
to_blocks |
|
) |
| |
Definition at line 215 of file tordmain.cpp.
221 int width = pixGetWidth(pix);
222 int height = pixGetHeight(pix);
223 if (width > INT16_MAX || height > INT16_MAX) {
224 tprintf(
"Input image too large! (%d, %d)\n", width, height);
230 BLOCK_IT block_it(blocks);
231 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
232 block_it.forward()) {
233 BLOCK* block = block_it.data();
◆ make_blob_words()
ROW * tesseract::Textord::make_blob_words |
( |
TO_ROW * |
row, |
|
|
FCOORD |
rotation |
|
) |
| |
Definition at line 1177 of file tospace.cpp.
1190 C_OUTLINE_IT cout_it;
1192 C_BLOB_IT cblob_it = &cblobs;
1198 int16_t word_count = 0;
1200 cblob_it.set_to_list(&cblobs);
1203 WERD_IT word_it(&words);
1205 if (!box_it.empty()) {
1208 bblob = box_it.data();
1211 if (bblob->
cblob() !=
nullptr) {
1212 cout_it.set_to_list(cblob_it.data()->out_list());
1213 cout_it.move_to_last();
1215 delete bblob->
cblob();
1218 if (bblob->
cblob() !=
nullptr)
1219 cblob_it.add_after_then_move(bblob->
cblob());
1222 bblob = box_it.data();
1226 word =
new WERD(&cblobs, 1,
nullptr);
1228 word_it.add_after_then_move(word);
1233 if (box_it.at_first()) {
1238 while (!box_it.at_first());
1241 word_it.set_to_list(real_row->
word_list());
1243 word_it.add_list_after(&words);
1246 tprintf (
"Row:Made %d words in row ((%d,%d)(%d,%d))\n",
◆ make_prop_words()
ROW * tesseract::Textord::make_prop_words |
( |
TO_ROW * |
row, |
|
|
FCOORD |
rotation |
|
) |
| |
Definition at line 885 of file tospace.cpp.
901 bool fuzzy_sp =
false;
902 bool fuzzy_non =
false;
904 bool prev_gap_was_a_space =
false;
905 bool break_at_next_gap =
false;
907 C_OUTLINE_IT cout_it;
909 C_BLOB_IT cblob_it = &cblobs;
912 int32_t next_rep_char_word_right = INT32_MAX;
913 float repetition_spacing;
921 int16_t prev_gap = INT16_MAX;
922 int16_t current_gap = INT16_MAX;
923 int16_t next_gap = INT16_MAX;
924 int16_t prev_within_xht_gap = INT16_MAX;
925 int16_t current_within_xht_gap = INT16_MAX;
926 int16_t next_within_xht_gap = INT16_MAX;
927 int16_t word_count = 0;
931 if (!rep_char_it.empty ()) {
932 next_rep_char_word_right =
933 rep_char_it.data ()->bounding_box ().right ();
937 cblob_it.set_to_list (&cblobs);
940 WERD_IT word_it(&words);
943 prev_fuzzy_sp =
false;
944 prev_fuzzy_non =
false;
945 if (!box_it.empty ()) {
946 xstarts[0] = box_it.data ()->bounding_box ().left ();
947 if (xstarts[0] > next_rep_char_word_right) {
949 word = rep_char_it.extract ();
950 word_it.add_after_then_move (word);
960 repetition_spacing = find_mean_blob_spacing (word);
961 current_gap = box_it.data ()->bounding_box ().left () -
962 next_rep_char_word_right;
963 current_within_xht_gap = current_gap;
965 prev_blanks = static_cast<uint8_t>(floor (current_gap / row->
space_size));
972 tprintf (
"Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
973 box_it.data ()->bounding_box ().left (),
974 box_it.data ()->bounding_box ().bottom (),
975 repetition_spacing, current_gap);
976 prev_fuzzy_sp =
false;
977 prev_fuzzy_non =
false;
978 if (rep_char_it.empty ()) {
979 next_rep_char_word_right = INT32_MAX;
982 rep_char_it.forward ();
983 next_rep_char_word_right =
984 rep_char_it.data ()->bounding_box ().right ();
988 peek_at_next_gap(row,
992 next_within_xht_gap);
994 bblob = box_it.data ();
997 if (bblob->
cblob () !=
nullptr) {
998 cout_it.set_to_list (cblob_it.data ()->out_list ());
999 cout_it.move_to_last ();
1001 delete bblob->
cblob ();
1004 if (bblob->
cblob() !=
nullptr)
1005 cblob_it.add_after_then_move (bblob->
cblob ());
1006 prev_x = blob_box.
right ();
1009 bblob = box_it.data ();
1014 prev_gap = current_gap;
1015 prev_within_xht_gap = current_within_xht_gap;
1016 prev_blob_box = next_blob_box;
1017 current_gap = next_gap;
1018 current_within_xht_gap = next_within_xht_gap;
1019 peek_at_next_gap(row,
1023 next_within_xht_gap);
1025 int16_t prev_gap_arg = prev_gap;
1026 int16_t next_gap_arg = next_gap;
1028 prev_gap_arg = prev_within_xht_gap;
1029 next_gap_arg = next_within_xht_gap;
1032 if (blob_box.
left () > next_rep_char_word_right ||
1033 make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1034 current_gap, current_within_xht_gap,
1035 next_blob_box, next_gap_arg,
1036 blanks, fuzzy_sp, fuzzy_non,
1037 prev_gap_was_a_space,
1038 break_at_next_gap) ||
1039 box_it.at_first()) {
1041 word =
new WERD (&cblobs, prev_blanks,
nullptr);
1043 word_it.add_after_then_move (word);
1051 else if (prev_fuzzy_non)
1055 if (blob_box.
left () > next_rep_char_word_right) {
1057 word = rep_char_it.extract ();
1058 word_it.add_after_then_move (word);
1061 repetition_spacing = find_mean_blob_spacing (word);
1063 current_within_xht_gap = current_gap;
1066 static_cast<uint8_t>(floor (current_gap / row->
space_size));
1074 (
"Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1077 repetition_spacing, current_gap, blanks);
1085 blob_box.
left () - next_rep_char_word_right;
1087 blanks = static_cast<uint8_t>(current_gap / row->
space_size);
1094 tprintf (
" Rgap:%d (%d blanks)\n",
1095 current_gap, blanks);
1099 if (rep_char_it.empty ()) {
1100 next_rep_char_word_right = INT32_MAX;
1103 rep_char_it.forward ();
1104 next_rep_char_word_right =
1105 rep_char_it.data ()->bounding_box ().right ();
1109 if (box_it.at_first () && rep_char_it.empty ()) {
1112 xstarts[1] = prev_x;
1115 prev_blanks = blanks;
1116 prev_fuzzy_sp = fuzzy_sp;
1117 prev_fuzzy_non = fuzzy_non;
1122 while (!box_it.at_first ());
1125 while (!rep_char_it.empty ()) {
1126 word = rep_char_it.extract ();
1127 word_it.add_after_then_move (word);
1130 repetition_spacing = find_mean_blob_spacing (word);
1133 blanks = static_cast<uint8_t>(floor (current_gap / row->
space_size));
1141 "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1143 repetition_spacing, current_gap, blanks);
1149 if (rep_char_it.empty ()) {
1152 xstarts[1] = prev_x;
1155 rep_char_it.forward ();
1158 real_row =
new ROW (row,
1160 word_it.set_to_list (real_row->
word_list ());
1162 word_it.add_list_after (&words);
1166 tprintf (
"Row: Made %d words in row ((%d,%d)(%d,%d))\n",
◆ make_spline_rows()
void tesseract::Textord::make_spline_rows |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 2003 of file makerow.cpp.
2006 #ifndef GRAPHICS_DISABLED
2009 TO_ROW_IT row_it = block->
get_rows ();
2011 row_it.move_to_first ();
2012 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2013 if (row_it.data ()->blob_list ()->empty ())
2014 delete row_it.extract ();
2019 #ifndef GRAPHICS_DISABLED
2022 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
2023 row_it.forward ()) {
2024 row_it.data ()->baseline.plot (
to_win, colour);
2025 colour = static_cast<ScrollView::Color>(colour + 1);
2031 make_old_baselines(block, testing_on, gradient);
2033 #ifndef GRAPHICS_DISABLED
2036 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2037 row_it.data ()->baseline.plot (
to_win, colour);
2038 colour = static_cast<ScrollView::Color>(colour + 1);
◆ set_use_cjk_fp_model()
void tesseract::Textord::set_use_cjk_fp_model |
( |
bool |
flag | ) |
|
|
inline |
Definition at line 95 of file textord.h.
96 use_cjk_fp_model_ = flag;
◆ TextordPage()
void tesseract::Textord::TextordPage |
( |
PageSegMode |
pageseg_mode, |
|
|
const FCOORD & |
reskew, |
|
|
int |
width, |
|
|
int |
height, |
|
|
Pix * |
binary_pix, |
|
|
Pix * |
thresholds_pix, |
|
|
Pix * |
grey_pix, |
|
|
bool |
use_box_bottoms, |
|
|
BLOBNBOX_LIST * |
diacritic_blobs, |
|
|
BLOCK_LIST * |
blocks, |
|
|
TO_BLOCK_LIST * |
to_blocks |
|
) |
| |
Definition at line 226 of file textord.cpp.
231 page_tr_.
set_x(width);
232 page_tr_.
set_y(height);
233 if (to_blocks->empty()) {
236 TO_BLOCK_IT it(to_blocks);
237 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
252 const FCOORD anticlockwise90(0.0f, 1.0f);
253 const FCOORD clockwise90(0.0f, -1.0f);
254 TO_BLOCK_IT it(to_blocks);
255 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
262 to_block->
rotate(anticlockwise90);
270 TO_BLOCK_IT to_block_it(to_blocks);
271 TO_BLOCK* to_block = to_block_it.data();
276 gradient =
make_rows(page_tr_, to_blocks);
280 to_block, to_blocks);
286 baseline_detector.ComputeStraightBaselines(use_box_bottoms);
287 baseline_detector.ComputeBaselineSplinesAndXheights(
293 make_words(
this, page_tr_, gradient, blocks, to_blocks);
298 TO_BLOCK* to_block = to_block_it.data();
304 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
307 BLOCK_IT b_it(blocks);
308 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
309 b_it.data()->compute_row_margins();
311 #ifndef GRAPHICS_DISABLED
◆ to_spacing()
void tesseract::Textord::to_spacing |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK_LIST * |
blocks |
|
) |
| |
Definition at line 43 of file tospace.cpp.
54 int16_t block_space_gap_width;
56 int16_t block_non_space_gap_width;
57 bool old_text_ord_proportional;
59 block_it.set_to_list (blocks);
61 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
62 block_it.forward ()) {
63 block = block_it.data ();
64 std::unique_ptr<GAPMAP> gapmap(
new GAPMAP (block));
65 block_spacing_stats(block,
67 old_text_ord_proportional,
68 block_space_gap_width,
69 block_non_space_gap_width);
77 static_cast<float>(block_space_gap_width) / block_non_space_gap_width < 3.0) {
78 block_non_space_gap_width = static_cast<int16_t>(floor (block_space_gap_width / 3.0));
83 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
88 tprintf (
"Block %d Row %d: Now Proportional\n",
89 block_index, row_index);
90 row_spacing_stats(row,
94 block_space_gap_width,
95 block_non_space_gap_width);
100 (
"Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
104 #ifndef GRAPHICS_DISABLED
◆ use_cjk_fp_model()
bool tesseract::Textord::use_cjk_fp_model |
( |
| ) |
const |
|
inline |
Definition at line 92 of file textord.h.
93 return use_cjk_fp_model_;
◆ textord_baseline_debug
int tesseract::Textord::textord_baseline_debug = 0 |
"Baseline debug level"
Definition at line 377 of file textord.h.
◆ textord_blshift_maxshift
double tesseract::Textord::textord_blshift_maxshift = 0.00 |
"Max baseline shift"
Definition at line 396 of file textord.h.
◆ textord_blshift_xfraction
double tesseract::Textord::textord_blshift_xfraction = 9.99 |
"Min size of baseline shift"
Definition at line 397 of file textord.h.
◆ textord_initialasc_ile
double tesseract::Textord::textord_initialasc_ile = 0.90 |
"Ile of sizes for xheight guess"
Definition at line 381 of file textord.h.
◆ textord_initialx_ile
double tesseract::Textord::textord_initialx_ile = 0.75 |
"Ile of sizes for xheight guess"
Definition at line 380 of file textord.h.
◆ textord_max_noise_size
int tesseract::Textord::textord_max_noise_size = 7 |
"Pixel size of noise"
Definition at line 376 of file textord.h.
◆ textord_no_rejects
bool tesseract::Textord::textord_no_rejects = false |
"Don't remove noise blobs"
Definition at line 373 of file textord.h.
◆ textord_noise_area_ratio
double tesseract::Textord::textord_noise_area_ratio = 0.7 |
"Fraction of bounding box for noise"
Definition at line 379 of file textord.h.
◆ textord_noise_debug
bool tesseract::Textord::textord_noise_debug = false |
"Debug row garbage detector"
Definition at line 395 of file textord.h.
◆ textord_noise_hfract
double tesseract::Textord::textord_noise_hfract = 1.0/64 |
"Height fraction to discard outlines as speckle noise"
Definition at line 392 of file textord.h.
◆ textord_noise_normratio
double tesseract::Textord::textord_noise_normratio = 2.0 |
"Dot to norm ratio for deletion"
Definition at line 385 of file textord.h.
◆ textord_noise_rejrows
bool tesseract::Textord::textord_noise_rejrows = true |
"Reject noise-like rows"
Definition at line 387 of file textord.h.
◆ textord_noise_rejwords
bool tesseract::Textord::textord_noise_rejwords = true |
"Reject noise-like words"
Definition at line 386 of file textord.h.
◆ textord_noise_rowratio
double tesseract::Textord::textord_noise_rowratio = 6.0 |
"Dot to norm ratio for deletion"
Definition at line 394 of file textord.h.
◆ textord_noise_sizefraction
int tesseract::Textord::textord_noise_sizefraction = 10 |
"Fraction of size for maxima"
Definition at line 382 of file textord.h.
◆ textord_noise_sizelimit
double tesseract::Textord::textord_noise_sizelimit = 0.5 |
"Fraction of x for big t count"
Definition at line 383 of file textord.h.
◆ textord_noise_sncount
int tesseract::Textord::textord_noise_sncount = 1 |
"super norm blobs to save row"
Definition at line 393 of file textord.h.
◆ textord_noise_sxfract
double tesseract::Textord::textord_noise_sxfract = 0.4 |
"xh fract width error for norm blobs"
Definition at line 390 of file textord.h.
◆ textord_noise_syfract
double tesseract::Textord::textord_noise_syfract = 0.2 |
"xh fract error for norm blobs"
Definition at line 388 of file textord.h.
◆ textord_noise_translimit
int tesseract::Textord::textord_noise_translimit = 16 |
"Transitions for normal blob"
Definition at line 384 of file textord.h.
◆ textord_show_blobs
bool tesseract::Textord::textord_show_blobs = false |
"Display unsorted blobs"
Definition at line 374 of file textord.h.
◆ textord_show_boxes
bool tesseract::Textord::textord_show_boxes = false |
◆ textord_single_height_mode
bool tesseract::Textord::textord_single_height_mode = false |
"Script has no xheight, so use a single mode for horizontal text"
Definition at line 261 of file textord.h.
◆ tosp_all_flips_fuzzy
bool tesseract::Textord::tosp_all_flips_fuzzy = false |
"Pass ANY flip to context?"
Definition at line 287 of file textord.h.
◆ tosp_block_use_cert_spaces
bool tesseract::Textord::tosp_block_use_cert_spaces = true |
"Only stat OBVIOUS spaces"
Definition at line 277 of file textord.h.
◆ tosp_debug_level
int tesseract::Textord::tosp_debug_level = 0 |
◆ tosp_dont_fool_with_small_kerns
double tesseract::Textord::tosp_dont_fool_with_small_kerns = -1 |
"Limit use of xht gap with odd small kns"
Definition at line 365 of file textord.h.
◆ tosp_enough_small_gaps
double tesseract::Textord::tosp_enough_small_gaps = 0.65 |
"Fract of kerns reqd for isolated row stats"
Definition at line 343 of file textord.h.
◆ tosp_enough_space_samples_for_median
int tesseract::Textord::tosp_enough_space_samples_for_median = 3 |
"or should we use mean"
Definition at line 304 of file textord.h.
◆ tosp_few_samples
int tesseract::Textord::tosp_few_samples = 40 |
"No.gaps reqd with 1 large gap to treat as a table"
Definition at line 308 of file textord.h.
◆ tosp_flip_caution
double tesseract::Textord::tosp_flip_caution = 0.0 |
"Don't autoflip kn to sp when large separation"
Definition at line 361 of file textord.h.
◆ tosp_flip_fuzz_kn_to_sp
bool tesseract::Textord::tosp_flip_fuzz_kn_to_sp = true |
◆ tosp_flip_fuzz_sp_to_kn
bool tesseract::Textord::tosp_flip_fuzz_sp_to_kn = true |
◆ tosp_force_wordbreak_on_punct
bool tesseract::Textord::tosp_force_wordbreak_on_punct = false |
"Force word breaks on punct to break long lines in non-space " "delimited langs"
Definition at line 271 of file textord.h.
◆ tosp_fuzzy_kn_fraction
double tesseract::Textord::tosp_fuzzy_kn_fraction = 0.5 |
"New fuzzy kn alg"
Definition at line 350 of file textord.h.
◆ tosp_fuzzy_limit_all
bool tesseract::Textord::tosp_fuzzy_limit_all = true |
"Don't restrict kn->sp fuzzy limit to tables"
Definition at line 289 of file textord.h.
◆ tosp_fuzzy_sp_fraction
double tesseract::Textord::tosp_fuzzy_sp_fraction = 0.5 |
"New fuzzy sp alg"
Definition at line 351 of file textord.h.
◆ tosp_fuzzy_space_factor
double tesseract::Textord::tosp_fuzzy_space_factor = 0.6 |
"Fract of xheight for fuzz sp"
Definition at line 327 of file textord.h.
◆ tosp_fuzzy_space_factor1
double tesseract::Textord::tosp_fuzzy_space_factor1 = 0.5 |
"Fract of xheight for fuzz sp"
Definition at line 329 of file textord.h.
◆ tosp_fuzzy_space_factor2
double tesseract::Textord::tosp_fuzzy_space_factor2 = 0.72 |
"Fract of xheight for fuzz sp"
Definition at line 331 of file textord.h.
◆ tosp_gap_factor
double tesseract::Textord::tosp_gap_factor = 0.83 |
"gap ratio to flip sp->kern"
Definition at line 332 of file textord.h.
◆ tosp_ignore_big_gaps
double tesseract::Textord::tosp_ignore_big_gaps = -1 |
◆ tosp_ignore_very_big_gaps
double tesseract::Textord::tosp_ignore_very_big_gaps = 3.5 |
◆ tosp_improve_thresh
bool tesseract::Textord::tosp_improve_thresh = false |
"Enable improvement heuristic"
Definition at line 301 of file textord.h.
◆ tosp_init_guess_kn_mult
double tesseract::Textord::tosp_init_guess_kn_mult = 2.2 |
"Thresh guess - mult kn by this"
Definition at line 355 of file textord.h.
◆ tosp_init_guess_xht_mult
double tesseract::Textord::tosp_init_guess_xht_mult = 0.28 |
"Thresh guess - mult xht by this"
Definition at line 357 of file textord.h.
◆ tosp_kern_gap_factor1
double tesseract::Textord::tosp_kern_gap_factor1 = 2.0 |
"gap ratio to flip kern->sp"
Definition at line 334 of file textord.h.
◆ tosp_kern_gap_factor2
double tesseract::Textord::tosp_kern_gap_factor2 = 1.3 |
"gap ratio to flip kern->sp"
Definition at line 336 of file textord.h.
◆ tosp_kern_gap_factor3
double tesseract::Textord::tosp_kern_gap_factor3 = 2.5 |
"gap ratio to flip kern->sp"
Definition at line 338 of file textord.h.
◆ tosp_large_kerning
double tesseract::Textord::tosp_large_kerning = 0.19 |
"Limit use of xht gap with large kns"
Definition at line 363 of file textord.h.
◆ tosp_max_sane_kn_thresh
double tesseract::Textord::tosp_max_sane_kn_thresh = 5.0 |
"Multiplier on kn to limit thresh"
Definition at line 359 of file textord.h.
◆ tosp_min_sane_kn_sp
double tesseract::Textord::tosp_min_sane_kn_sp = 1.5 |
"Don't trust spaces less than this time kn"
Definition at line 353 of file textord.h.
◆ tosp_narrow_aspect_ratio
double tesseract::Textord::tosp_narrow_aspect_ratio = 0.48 |
"narrow if w/h less than this"
Definition at line 322 of file textord.h.
◆ tosp_narrow_blobs_not_cert
bool tesseract::Textord::tosp_narrow_blobs_not_cert = true |
"Only stat OBVIOUS spaces"
Definition at line 281 of file textord.h.
◆ tosp_narrow_fraction
double tesseract::Textord::tosp_narrow_fraction = 0.3 |
"Fract of xheight for narrow"
Definition at line 320 of file textord.h.
◆ tosp_near_lh_edge
double tesseract::Textord::tosp_near_lh_edge = 0 |
"Don't reduce box if the top left is non blank"
Definition at line 367 of file textord.h.
◆ tosp_old_sp_kn_th_factor
double tesseract::Textord::tosp_old_sp_kn_th_factor = 2.0 |
"Factor for defining space threshold in terms of space and " "kern sizes"
Definition at line 314 of file textord.h.
◆ tosp_old_to_bug_fix
bool tesseract::Textord::tosp_old_to_bug_fix = false |
"Fix suspected bug in old code"
Definition at line 275 of file textord.h.
◆ tosp_old_to_constrain_sp_kn
bool tesseract::Textord::tosp_old_to_constrain_sp_kn = false |
"Constrain relative values of inter and intra-word gaps for " "old_to_method."
Definition at line 266 of file textord.h.
◆ tosp_old_to_method
bool tesseract::Textord::tosp_old_to_method = false |
"Space stats use prechopping?"
Definition at line 263 of file textord.h.
◆ tosp_only_small_gaps_for_kern
bool tesseract::Textord::tosp_only_small_gaps_for_kern = false |
◆ tosp_only_use_prop_rows
bool tesseract::Textord::tosp_only_use_prop_rows = true |
"Block stats to use fixed pitch rows?"
Definition at line 268 of file textord.h.
◆ tosp_only_use_xht_gaps
bool tesseract::Textord::tosp_only_use_xht_gaps = false |
"Only use within xht gap for wd breaks"
Definition at line 295 of file textord.h.
◆ tosp_pass_wide_fuzz_sp_to_context
double tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context = 0.75 |
"How wide fuzzies need context"
Definition at line 371 of file textord.h.
◆ tosp_recovery_isolated_row_stats
bool tesseract::Textord::tosp_recovery_isolated_row_stats = true |
"Use row alone when inadequate cert spaces"
Definition at line 285 of file textord.h.
◆ tosp_redo_kern_limit
int tesseract::Textord::tosp_redo_kern_limit = 10 |
"No.samples reqd to reestimate for row"
Definition at line 306 of file textord.h.
◆ tosp_rep_space
double tesseract::Textord::tosp_rep_space = 1.6 |
"rep gap multiplier for space"
Definition at line 341 of file textord.h.
◆ tosp_row_use_cert_spaces
bool tesseract::Textord::tosp_row_use_cert_spaces = true |
"Only stat OBVIOUS spaces"
Definition at line 279 of file textord.h.
◆ tosp_row_use_cert_spaces1
bool tesseract::Textord::tosp_row_use_cert_spaces1 = true |
"Only stat OBVIOUS spaces"
Definition at line 283 of file textord.h.
◆ tosp_rule_9_test_punct
bool tesseract::Textord::tosp_rule_9_test_punct = false |
"Don't chng kn to space next to punct"
Definition at line 297 of file textord.h.
◆ tosp_sanity_method
int tesseract::Textord::tosp_sanity_method = 1 |
"How to avoid being silly"
Definition at line 311 of file textord.h.
◆ tosp_short_row
int tesseract::Textord::tosp_short_row = 20 |
"No.gaps reqd with few cert spaces to use certs"
Definition at line 310 of file textord.h.
◆ tosp_silly_kn_sp_gap
double tesseract::Textord::tosp_silly_kn_sp_gap = 0.2 |
"Don't let sp minus kn get too small"
Definition at line 369 of file textord.h.
◆ tosp_stats_use_xht_gaps
bool tesseract::Textord::tosp_stats_use_xht_gaps = true |
"Use within xht gap for wd breaks"
Definition at line 291 of file textord.h.
◆ tosp_table_fuzzy_kn_sp_ratio
double tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio = 3.0 |
"Fuzzy if less than this"
Definition at line 349 of file textord.h.
◆ tosp_table_kn_sp_ratio
double tesseract::Textord::tosp_table_kn_sp_ratio = 2.25 |
"Min difference of kn & sp in table"
Definition at line 345 of file textord.h.
◆ tosp_table_xht_sp_ratio
double tesseract::Textord::tosp_table_xht_sp_ratio = 0.33 |
"Expect spaces bigger than this"
Definition at line 347 of file textord.h.
◆ tosp_threshold_bias1
double tesseract::Textord::tosp_threshold_bias1 = 0 |
"how far between kern and space?"
Definition at line 316 of file textord.h.
◆ tosp_threshold_bias2
double tesseract::Textord::tosp_threshold_bias2 = 0 |
"how far between kern and space?"
Definition at line 318 of file textord.h.
◆ tosp_use_pre_chopping
bool tesseract::Textord::tosp_use_pre_chopping = false |
"Space stats use prechopping?"
Definition at line 273 of file textord.h.
◆ tosp_use_xht_gaps
bool tesseract::Textord::tosp_use_xht_gaps = true |
"Use within xht gap for wd breaks"
Definition at line 293 of file textord.h.
◆ tosp_wide_aspect_ratio
double tesseract::Textord::tosp_wide_aspect_ratio = 0.0 |
"wide if w/h less than this"
Definition at line 325 of file textord.h.
◆ tosp_wide_fraction
double tesseract::Textord::tosp_wide_fraction = 0.52 |
"Fract of xheight for wide"
Definition at line 323 of file textord.h.
The documentation for this class was generated from the following files:
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
bool textord_old_baselines
bool tosp_stats_use_xht_gaps
BLOBNBOX_LIST small_blobs
void plot_graded_blobs(ScrollView *to_win)
void set_x(int16_t xin)
rewrite function
bool textord_noise_rejwords
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
double tosp_wide_fraction
double tosp_large_kerning
C_OUTLINE_LIST * out_list()
static const double kXHeightFraction
double tosp_fuzzy_space_factor1
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
double textord_blshift_xfraction
bool tosp_all_flips_fuzzy
double tosp_min_sane_kn_sp
ScrollView * create_to_win(ICOORD page_tr)
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
BLOBNBOX_LIST noise_blobs
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
static const double kXHeightCapRatio
double tosp_table_kn_sp_ratio
double textord_noise_sxfract
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
TBOX bounding_box() const
double tosp_ignore_big_gaps
#define INT_MEMBER(name, val, comment, vec)
ROW_LIST * row_list()
get rows
double textord_initialasc_ile
int textord_max_noise_size
double tosp_fuzzy_space_factor
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
void recalc_bounding_box()
int textord_noise_sncount
double textord_blshift_maxshift
bool tosp_recovery_isolated_row_stats
bool tosp_rule_9_test_punct
bool textord_debug_xheights
double tosp_kern_gap_factor2
double tosp_table_fuzzy_kn_sp_ratio
PITCH_TYPE pitch_decision
double textord_initialx_ile
bool tosp_row_use_cert_spaces
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
bool textord_single_height_mode
void set_y(int16_t yin)
rewrite function
bool tosp_narrow_blobs_not_cert
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
bool tosp_flip_fuzz_kn_to_sp
bool textord_show_initial_words
void rotate(const FCOORD &rotation)
double textord_excess_blobsize
void set_re_rotation(const FCOORD &rotation)
ROW_CATEGORY get_row_category(const TO_ROW *row)
bool tosp_only_use_xht_gaps
bool tosp_only_small_gaps_for_kern
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
bool PSM_SPARSE(int pageseg_mode)
double tosp_table_xht_sp_ratio
bool rep_chars_marked() const
void extract_edges(Pix *pix, BLOCK *block)
double tosp_threshold_bias2
PDBLK pdblk
Page Description Block.
bool tosp_row_use_cert_spaces1
bool tosp_fuzzy_limit_all
bool tosp_old_to_constrain_sp_kn
void mark_repeated_chars(TO_ROW *row)
BLOBNBOX_LIST large_blobs
WERD_CHOICE * best_choice
double tosp_enough_small_gaps
void set_poly_block(POLY_BLOCK *blk)
set the poly block
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
double textord_noise_syfract
double tosp_narrow_fraction
POLY_BLOCK * poly_block() const
void set_flag(WERD_FLAGS mask, bool value)
void set_global_loc_code(int loc_code)
bool joined_to_prev() const
double tosp_old_sp_kn_th_factor
double tosp_dont_fool_with_small_kerns
double tosp_pass_wide_fuzz_sp_to_context
#define double_MEMBER(name, val, comment, vec)
bool tosp_only_use_prop_rows
void set_classify_rotation(const FCOORD &rotation)
static const double kAscenderFraction
double tosp_fuzzy_sp_fraction
TBOX bounding_box() const
static const double kDescenderFraction
double tosp_wide_aspect_ratio
bool tosp_force_wordbreak_on_punct
bool tosp_use_pre_chopping
double tosp_init_guess_kn_mult
const TBOX & bounding_box() const
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
bool tosp_flip_fuzz_sp_to_kn
bool textord_noise_rejrows
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
bool textord_show_final_rows
double textord_noise_sizelimit
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
double tosp_max_sane_kn_thresh
double textord_noise_hfract
int textord_noise_sizefraction
int textord_baseline_debug
double textord_min_linesize
double tosp_ignore_very_big_gaps
double tosp_fuzzy_space_factor2
double textord_noise_rowratio
bool tosp_block_use_cert_spaces
double tosp_kern_gap_factor1
int tosp_enough_space_samples_for_median
double tosp_narrow_aspect_ratio
void plot_box_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
DLLSYM void tprintf(const char *format,...)
FCOORD classify_rotation() const
void set_blanks(uint8_t new_blanks)
double tosp_fuzzy_kn_fraction
double textord_noise_normratio
#define BOOL_MEMBER(name, val, comment, vec)
double tosp_silly_kn_sp_gap
double tosp_init_guess_xht_mult
double textord_noise_area_ratio
int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
double tosp_threshold_bias1
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
double tosp_kern_gap_factor3
int textord_noise_translimit
BLOBNBOX_LIST * blob_list()
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Treat the image as a single character.