tesseract
5.0.0-alpha-619-ge9db
|
#include "params.h"
#include "ocrblock.h"
#include "blobs.h"
#include "blobbox.h"
#include "statistc.h"
Go to the source code of this file.
|
void | get_min_max_xheight (int block_linesize, int *min_height, int *max_height) |
|
ROW_CATEGORY | get_row_category (const TO_ROW *row) |
|
bool | within_error_margin (float test, float num, float margin) |
|
void | fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights) |
|
float | make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks) |
|
float | make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks) |
|
void | make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on) |
|
void | fit_lms_line (TO_ROW *row) |
|
void | compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err) |
|
void | vigorous_noise_removal (TO_BLOCK *block) |
|
void | cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on) |
|
void | delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on) |
|
bool | find_best_dropout_row (TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on) |
|
TBOX | deskew_block_coords (TO_BLOCK *block, float gradient) |
|
void | compute_line_occupation (TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas) |
|
void | compute_occupation_threshold (int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds) |
|
void | compute_dropout_distances (int32_t *occupation, int32_t *thresholds, int32_t line_count) |
|
void | expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on) |
|
void | adjust_row_limits (TO_BLOCK *block) |
|
void | compute_row_stats (TO_BLOCK *block, bool testing_on) |
|
float | median_block_xheight (TO_BLOCK *block, float gradient) |
|
int | compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise) |
|
int32_t | compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *heights) |
|
int32_t | compute_height_modes (STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes) |
|
void | correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop) |
|
void | separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on) |
|
void | pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on) |
|
void | fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on) |
|
void | fit_parallel_lms (float gradient, TO_ROW *row) |
|
void | make_baseline_spline (TO_ROW *row, TO_BLOCK *block) |
|
bool | segment_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts) |
|
double * | linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[]) |
|
void | assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew) |
|
OVERLAP_STATE | most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob) |
|
int | blob_x_order (const void *item1, const void *item2) |
|
int | row_y_order (const void *item1, const void *item2) |
|
int | row_spacing_order (const void *item1, const void *item2) |
|
void | mark_repeated_chars (TO_ROW *row) |
|
◆ OVERLAP_STATE
Enumerator |
---|
ASSIGN | |
REJECT | |
NEW_ROW | |
Definition at line 27 of file makerow.h.
◆ ROW_CATEGORY
Enumerator |
---|
ROW_ASCENDERS_FOUND | |
ROW_DESCENDERS_FOUND | |
ROW_UNKNOWN | |
ROW_INVALID | |
Definition at line 34 of file makerow.h.
◆ adjust_row_limits()
void adjust_row_limits |
( |
TO_BLOCK * |
block | ) |
|
adjust_row_limits
Change the limits of rows to suit the default fractions.
Definition at line 1107 of file makerow.cpp.
1114 TO_ROW_IT row_it = block->
get_rows ();
1117 tprintf(
"Adjusting row limits for block(%d,%d)\n",
1120 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1121 row = row_it.data ();
1124 tprintf(
"Row at %f has min %f, max %f, size %f\n",
◆ assign_blobs_to_rows()
void assign_blobs_to_rows |
( |
TO_BLOCK * |
block, |
|
|
float * |
gradient, |
|
|
int |
pass, |
|
|
bool |
reject_misses, |
|
|
bool |
make_new_rows, |
|
|
bool |
drawing_skew |
|
) |
| |
Definition at line 2278 of file makerow.cpp.
2289 float g_length = 1.0f;
2294 float smooth_factor;
2299 TO_ROW *dest_row =
nullptr;
2301 BLOBNBOX_IT blob_it = &block->
blobs;
2302 TO_ROW_IT row_it = block->
get_rows ();
2307 if (gradient !=
nullptr)
2308 g_length = sqrt (1 + *gradient * *gradient);
2309 #ifndef GRAPHICS_DISABLED
2315 smooth_factor = 1.0;
2317 row_count = row_it.length ();
2318 if (!blob_it.empty ()) {
2319 left_x = blob_it.data ()->bounding_box ().left ();
2325 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
2326 blob = blob_it.data ();
2327 if (gradient !=
nullptr) {
2332 && last_x - left_x > block->
line_size * 2
2333 && textord_interpolating_skew) {
2336 / (last_x - left_x);
2342 #ifndef GRAPHICS_DISABLED
2346 if (!row_it.empty ()) {
2347 for (row_it.move_to_first ();
2348 !row_it.at_last () && row_it.data ()->min_y () > top;
2350 row = row_it.data ();
2351 if (row->
min_y () <= top && row->
max_y () >= bottom) {
2359 if (overlap_result ==
NEW_ROW && !reject_misses)
2364 if (!make_new_rows) {
2365 near_dist = row_it.data_relative (-1)->min_y () - top;
2367 if (bottom < row->min_y ()) {
2368 if (row->
min_y () - bottom <=
2376 else if (near_dist > 0
2377 && near_dist < bottom - row->max_y ()) {
2379 dest_row = row_it.data ();
2380 if (dest_row->
min_y () - bottom <=
2388 if (top - row->
max_y () <=
2390 block->
line_size) * (textord_overlap_x +
2399 if (overlap_result ==
ASSIGN)
2400 dest_row->
add_blob (blob_it.extract (), top, bottom,
2402 if (overlap_result ==
NEW_ROW) {
2403 if (make_new_rows && top - bottom < block->max_blob_size) {
2405 new TO_ROW (blob_it.extract (), top, bottom,
2408 if (bottom > row_it.data ()->min_y ())
2409 row_it.add_before_then_move (dest_row);
2412 row_it.add_after_then_move (dest_row);
2415 textord_skewsmooth_offset);
2421 else if (make_new_rows && top - bottom < block->max_blob_size) {
2426 row_it.add_after_then_move(dest_row);
2428 textord_skewsmooth_offset2);
2433 if (overlap_result !=
REJECT) {
2434 tprintf(
"Test blob assigned to row at (%g,%g) on pass %d\n",
2438 tprintf(
"Test blob assigned to no row on pass %d\n", pass);
2441 if (overlap_result !=
REJECT) {
2442 while (!row_it.at_first() &&
2443 row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
2444 row = row_it.extract();
2446 row_it.add_before_then_move(row);
2448 while (!row_it.at_last() &&
2449 row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
2450 row = row_it.extract();
2453 row_it.add_after_then_move(row);
2455 BLOBNBOX_IT added_blob_it(dest_row->
blob_list());
2456 added_blob_it.move_to_last();
2457 TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
2458 if (dest_row->
blob_list()->singleton() ||
2460 block_skew = (1 - smooth_factor) * block_skew
2466 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
2467 if (row_it.data()->blob_list()->empty())
2468 delete row_it.extract();
◆ blob_x_order()
int blob_x_order |
( |
const void * |
item1, |
|
|
const void * |
item2 |
|
) |
| |
Definition at line 2573 of file makerow.cpp.
2577 const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX* const*>(item1);
2579 const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX* const*>(item2);
◆ cleanup_rows_making()
void cleanup_rows_making |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_edge, |
|
|
bool |
testing_on |
|
) |
| |
cleanup_rows_making
Remove overlapping rows and fit all the blobs to what's left.
Definition at line 517 of file makerow.cpp.
526 BLOBNBOX_IT blob_it = &block->
blobs;
527 TO_ROW_IT row_it = block->
get_rows ();
529 #ifndef GRAPHICS_DISABLED
546 expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
547 blob_it.set_to_list (&block->
blobs);
548 row_it.set_to_list (block->
get_rows ());
549 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
550 blob_it.add_list_after (row_it.data ()->blob_list ());
554 blob_it.set_to_list (&block->
blobs);
558 blob_it.set_to_list (&block->
blobs);
◆ compute_dropout_distances()
void compute_dropout_distances |
( |
int32_t * |
occupation, |
|
|
int32_t * |
thresholds, |
|
|
int32_t |
line_count |
|
) |
| |
Definition at line 902 of file makerow.cpp.
911 int32_t prev_threshold;
918 prev_threshold = thresholds[line_index];
923 while (line_index < line_count
924 && (occupation[line_index] < thresholds[line_index]
925 || occupation[line_index - 1] >= prev_threshold));
926 if (line_index < line_count) {
927 back_index = line_index - 1;
929 while (next_dist < -distance && back_index >= 0) {
930 thresholds[back_index] = next_dist;
938 while (line_index < line_count);
◆ compute_height_modes()
int32_t compute_height_modes |
( |
STATS * |
heights, |
|
|
int32_t |
min_height, |
|
|
int32_t |
max_height, |
|
|
int32_t * |
modes, |
|
|
int32_t |
maxmodes |
|
) |
| |
Definition at line 1623 of file makerow.cpp.
1631 int32_t least_count;
1632 int32_t least_index;
1635 src_count = max_height + 1 - min_height;
1637 least_count = INT32_MAX;
1639 for (src_index = 0; src_index < src_count; src_index++) {
1640 pile_count = heights->
pile_count(min_height + src_index);
1641 if (pile_count > 0) {
1642 if (dest_count < maxmodes) {
1643 if (pile_count < least_count) {
1645 least_count = pile_count;
1646 least_index = dest_count;
1648 modes[dest_count++] = min_height + src_index;
1649 }
else if (pile_count >= least_count) {
1650 while (least_index < maxmodes - 1) {
1651 modes[least_index] = modes[least_index + 1];
1656 modes[maxmodes - 1] = min_height + src_index;
1657 if (pile_count == least_count) {
1659 least_index = maxmodes - 1;
1663 for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1664 pile_count = heights->
pile_count(modes[dest_count]);
1665 if (pile_count < least_count) {
1667 least_count = pile_count;
1668 least_index = dest_count;
◆ compute_line_occupation()
void compute_line_occupation |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
int32_t |
min_y, |
|
|
int32_t |
max_y, |
|
|
int32_t * |
occupation, |
|
|
int32_t * |
deltas |
|
) |
| |
Definition at line 768 of file makerow.cpp.
780 TO_ROW_IT row_it = block->
get_rows ();
787 line_count = max_y - min_y + 1;
788 length = sqrt (gradient * gradient + 1);
789 rotation =
FCOORD (1 / length, -gradient / length);
790 for (line_index = 0; line_index < line_count; line_index++)
791 deltas[line_index] = 0;
792 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
793 row = row_it.data ();
795 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
796 blob_it.forward ()) {
797 blob = blob_it.data ();
799 blob_box.
rotate (rotation);
800 int32_t width = blob_box.
right() - blob_box.
left();
801 index = blob_box.
bottom() - min_y;
804 deltas[index] += width;
805 index = blob_box.
top() - min_y;
807 deltas[index] -= width;
810 occupation[0] = deltas[0];
811 for (line_index = 1; line_index < line_count; line_index++)
812 occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
◆ compute_occupation_threshold()
void compute_occupation_threshold |
( |
int32_t |
low_window, |
|
|
int32_t |
high_window, |
|
|
int32_t |
line_count, |
|
|
int32_t * |
occupation, |
|
|
int32_t * |
thresholds |
|
) |
| |
compute_occupation_threshold
Compute thresholds for textline or not for the occupation array.
Definition at line 821 of file makerow.cpp.
839 if (low_window + high_window < line_count) {
840 for (sum = 0, high_index = 0; high_index < low_window; high_index++)
841 sum += occupation[high_index];
842 for (low_index = 0; low_index < high_window; low_index++, high_index++)
843 sum += occupation[high_index];
844 min_occ = occupation[0];
846 for (test_index = 1; test_index < high_index; test_index++) {
847 if (occupation[test_index] <= min_occ) {
848 min_occ = occupation[test_index];
849 min_index = test_index;
852 for (line_index = 0; line_index < low_window; line_index++)
853 thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
855 for (low_index = 0; high_index < line_count; low_index++, high_index++) {
856 sum -= occupation[low_index];
857 sum += occupation[high_index];
858 if (occupation[high_index] <= min_occ) {
860 min_occ = occupation[high_index];
861 min_index = high_index;
864 if (min_index <= low_index) {
865 min_occ = occupation[low_index + 1];
866 min_index = low_index + 1;
867 for (test_index = low_index + 2; test_index <= high_index;
869 if (occupation[test_index] <= min_occ) {
870 min_occ = occupation[test_index];
872 min_index = test_index;
876 thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
880 min_occ = occupation[0];
882 for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
883 if (occupation[low_index] < min_occ) {
884 min_occ = occupation[low_index];
885 min_index = low_index;
887 sum += occupation[low_index];
891 for (; line_index < line_count; line_index++)
892 thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
◆ compute_page_skew()
void compute_page_skew |
( |
TO_BLOCK_LIST * |
blocks, |
|
|
float & |
page_m, |
|
|
float & |
page_err |
|
) |
| |
Definition at line 286 of file makerow.cpp.
296 TO_BLOCK_IT block_it = blocks;
300 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
301 block_it.forward ()) {
302 POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
303 if (pb !=
nullptr && !pb->
IsText())
305 row_count += block_it.data ()->get_rows ()->length ();
307 TO_ROW_IT row_it(block_it.data()->get_rows());
308 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
309 blob_count += row_it.data ()->blob_list ()->length ();
311 if (row_count == 0) {
317 std::vector<float> gradients(blob_count);
319 std::vector<float> errors(blob_count);
322 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
323 block_it.forward ()) {
324 POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
325 if (pb !=
nullptr && !pb->
IsText())
327 TO_ROW_IT row_it(block_it.data ()->get_rows());
328 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
329 row = row_it.data ();
330 blob_count = row->
blob_list ()->length ();
331 row_err = static_cast<int32_t>(ceil (row->
line_error ()));
334 if (textord_biased_skewcalc) {
335 blob_count /= row_err;
336 for (blob_count /= row_err; blob_count > 0; blob_count--) {
337 gradients[row_index] = row->
line_m ();
344 gradients[row_index] = row->
line_m ();
350 if (row_index == 0) {
352 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
353 block_it.forward ()) {
354 POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
355 if (pb !=
nullptr && !pb->
IsText())
357 TO_ROW_IT row_it(block_it.data()->get_rows());
358 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
360 row = row_it.data ();
361 gradients[row_index] = row->
line_m ();
367 row_count = row_index;
369 &gradients[0], row_count);
370 page_m = gradients[row_index];
372 &errors[0], row_count);
373 page_err = errors[row_index];
◆ compute_row_descdrop()
int32_t compute_row_descdrop |
( |
TO_ROW * |
row, |
|
|
float |
gradient, |
|
|
int |
xheight_blob_count, |
|
|
STATS * |
heights |
|
) |
| |
Definition at line 1563 of file makerow.cpp.
1566 int i_min = asc_heights->min_bucket();
1568 i_min = static_cast<int>(
1571 int i_max = asc_heights->max_bucket();
1575 int num_potential_asc = 0;
1576 for (
int i = i_min; i <= i_max; ++i) {
1577 num_potential_asc += asc_heights->pile_count(i);
1587 STATS heights (min_height, max_height + 1);
1588 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1589 blob = blob_it.data();
1593 height = (gradient * xcentre + row->
parallel_c() -
1595 if (height >= min_height && height <= max_height)
1596 heights.
add(static_cast<int>(floor(height + 0.5)), 1);
1599 int blob_index = heights.
mode();
1600 int blob_count = heights.
pile_count(blob_index);
1601 float total_fraction =
1603 if (static_cast<float>(blob_count + num_potential_asc) <
1604 xheight_blob_count * total_fraction) {
1607 int descdrop = blob_count > 0 ? -blob_index : 0;
1609 tprintf(
"Descdrop: %d (potential ascenders %d, descenders %d)\n",
1610 descdrop, num_potential_asc, blob_count);
◆ compute_row_stats()
void compute_row_stats |
( |
TO_BLOCK * |
block, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1143 of file makerow.cpp.
1151 TO_ROW_IT row_it = block->
get_rows ();
1153 int16_t rowcount = row_it.length ();
1155 std::vector<TO_ROW*> rows(rowcount);
1158 row_it.move_to_last ();
1160 row = row_it.data ();
1161 if (prev_row !=
nullptr) {
1162 rows[rowcount++] = prev_row;
1165 tprintf (
"Row at %g yields spacing of %g\n",
1171 while (!row_it.at_last ());
1176 tprintf (
"Blob based spacing=(%g,%g), offset=%g",
1181 iqr = rows[row_index]->spacing;
1184 iqr -= rows[row_index]->spacing;
1187 block->
key_row = rows[row_index];
1189 tprintf (
" row based=%g(%g)", rows[row_index]->spacing, iqr);
1193 if (rows[row_index]->spacing < block->line_spacing
1194 && rows[row_index]->spacing > block->
line_size)
1196 block->
line_size = rows[row_index]->spacing;
1198 else if (rows[row_index]->spacing > block->
line_spacing)
1203 if (rows[row_index]->spacing < block->line_spacing)
1204 block->
line_size = rows[row_index]->spacing;
1219 tprintf (
"\nEstimate line size=%g, spacing=%g, offset=%g\n",
◆ compute_xheight_from_modes()
int compute_xheight_from_modes |
( |
STATS * |
heights, |
|
|
STATS * |
floating_heights, |
|
|
bool |
cap_only, |
|
|
int |
min_height, |
|
|
int |
max_height, |
|
|
float * |
xheight, |
|
|
float * |
ascrise |
|
) |
| |
Definition at line 1467 of file makerow.cpp.
1470 int blob_index = heights->
mode();
1471 int blob_count = heights->
pile_count(blob_index);
1473 tprintf(
"min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
1474 min_height, max_height, blob_index, blob_count,
1477 floating_heights->
print();
1479 if (blob_count == 0)
return 0;
1481 bool in_best_pile =
false;
1482 int prev_size = -INT32_MAX;
1486 if (cap_only && mode_count > 1)
1490 tprintf(
"found %d modes: ", mode_count);
1491 for (x = 0; x < mode_count; x++)
tprintf(
"%d ", modes[x]);
1495 for (x = 0; x < mode_count - 1; x++) {
1496 if (modes[x] != prev_size + 1)
1497 in_best_pile =
false;
1498 int modes_x_count = heights->
pile_count(modes[x]) -
1501 (in_best_pile || modes_x_count > best_count)) {
1502 for (
int asc = x + 1; asc < mode_count; asc++) {
1504 static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1509 if (modes_x_count > best_count) {
1510 in_best_pile =
true;
1511 best_count = modes_x_count;
1514 tprintf(
"X=%d, asc=%d, count=%d, ratio=%g\n",
1515 modes[x], modes[asc]-modes[x], modes_x_count, ratio);
1517 prev_size = modes[x];
1518 *xheight = static_cast<float>(modes[x]);
1519 *ascrise = static_cast<float>(modes[asc] - modes[x]);
1524 if (*xheight == 0) {
1531 if (floating_heights->
get_total() > 0) {
1532 for (x = min_height; x < max_height; ++x) {
1535 blob_index = heights->
mode();
1536 for (x = min_height; x < max_height; ++x) {
1540 *xheight = static_cast<float>(blob_index);
1542 best_count = heights->
pile_count(blob_index);
1544 tprintf(
"Single mode xheight set to %g\n", *xheight);
1546 tprintf(
"Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
◆ correct_row_xheight()
void correct_row_xheight |
( |
TO_ROW * |
row, |
|
|
float |
xheight, |
|
|
float |
ascrise, |
|
|
float |
descdrop |
|
) |
| |
Definition at line 1685 of file makerow.cpp.
1689 tprintf(
"correcting row xheight: row->xheight %.4f"
1690 ", row->acrise %.4f row->descdrop %.4f\n",
1693 bool normal_xheight =
1710 (normal_xheight || cap_xheight)) ||
1711 (row_category ==
ROW_UNKNOWN && normal_xheight)) {
1735 if (row->
xheight < xheight + ascrise && row->
xheight > xheight) {
1738 tprintf(
"all caps with irregular xheight\n");
1747 tprintf(
"corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
◆ delete_non_dropout_rows()
void delete_non_dropout_rows |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_edge, |
|
|
bool |
testing_on |
|
) |
| |
delete_non_dropout_rows
Compute the linespacing and offset.
Definition at line 570 of file makerow.cpp.
586 TO_ROW_IT row_it = block->
get_rows ();
587 BLOBNBOX_IT blob_it = &block->
blobs;
589 if (row_it.length () == 0)
594 min_y = block_box.
bottom () - 1;
595 max_y = block_box.
top () + 1;
596 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
597 line_index = static_cast<int32_t>(floor (row_it.data ()->intercept ()));
598 if (line_index <= min_y)
599 min_y = line_index - 1;
600 if (line_index >= max_y)
601 max_y = line_index + 1;
603 line_count = max_y - min_y + 1;
607 std::vector<int32_t> deltas(line_count);
609 std::vector<int32_t> occupation(line_count);
618 max_y - min_y + 1, &occupation[0], &deltas[0]);
619 #ifndef GRAPHICS_DISABLED
621 draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]);
625 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
626 row = row_it.data ();
627 line_index = static_cast<int32_t>(floor (row->
intercept ()));
628 distance = deltas[line_index - min_y];
630 line_index, &row_it, testing_on)) {
631 #ifndef GRAPHICS_DISABLED
636 blob_it.add_list_after (row_it.data ()->blob_list ());
637 delete row_it.extract ();
640 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
641 blob_it.add_list_after (row_it.data ()->blob_list ());
◆ deskew_block_coords()
TBOX deskew_block_coords |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient |
|
) |
| |
Definition at line 732 of file makerow.cpp.
740 TO_ROW_IT row_it = block->
get_rows ();
745 length = sqrt (gradient * gradient + 1);
746 rotation =
FCOORD (1 / length, -gradient / length);
747 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
748 row = row_it.data ();
750 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
751 blob_it.forward ()) {
752 blob = blob_it.data ();
754 blob_box.
rotate (rotation);
◆ expand_rows()
void expand_rows |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_edge, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 949 of file makerow.cpp.
959 float y_bottom, y_top;
963 BLOBNBOX_IT blob_it = &block->
blobs;
964 TO_ROW_IT row_it = block->
get_rows ();
966 #ifndef GRAPHICS_DISABLED
975 if (block->
get_rows ()->length () == 0)
981 if (block->
get_rows ()->length () == 0)
990 row_it.move_to_last ();
992 row = row_it.data ();
993 y_max = row->
max_y ();
994 y_min = row->
min_y ();
1000 if (y_min > y_bottom) {
1002 tprintf(
"Expanding bottom of row at %f from %f to %f\n",
1005 swallowed_row =
true;
1006 while (swallowed_row && !row_it.at_last ()) {
1007 swallowed_row =
false;
1009 test_row = row_it.data_relative (1);
1011 if (test_row->
max_y () > y_bottom) {
1012 if (test_row->
min_y () > y_bottom) {
1016 #ifndef GRAPHICS_DISABLED
1024 blob_it.set_to_list (row->
blob_list ());
1025 blob_it.add_list_after (test_row->
blob_list ());
1027 delete row_it.extract ();
1029 swallowed_row =
true;
1031 else if (test_row->
max_y () < y_min) {
1033 y_bottom = test_row->
max_y ();
1035 tprintf(
"Truncating limit to %f due to touching row at %f\n",
1041 tprintf(
"Not expanding limit beyond %f due to touching row at %f\n",
1048 if (y_max < y_top) {
1050 tprintf(
"Expanding top of row at %f from %f to %f\n",
1052 swallowed_row =
true;
1053 while (swallowed_row && !row_it.at_first ()) {
1054 swallowed_row =
false;
1056 test_row = row_it.data_relative (-1);
1057 if (test_row->
min_y () < y_top) {
1058 if (test_row->
max_y () < y_top) {
1062 blob_it.set_to_list (row->
blob_list ());
1063 #ifndef GRAPHICS_DISABLED
1071 blob_it.add_list_after (test_row->
blob_list ());
1073 delete row_it.extract ();
1075 swallowed_row =
true;
1077 else if (test_row->
min_y () < y_max) {
1079 y_top = test_row->
min_y ();
1081 tprintf(
"Truncating limit to %f due to touching row at %f\n",
1087 tprintf(
"Not expanding limit beyond %f due to touching row at %f\n",
1098 while (!row_it.at_last ());
◆ fill_heights()
void fill_heights |
( |
TO_ROW * |
row, |
|
|
float |
gradient, |
|
|
int |
min_height, |
|
|
int |
max_height, |
|
|
STATS * |
heights, |
|
|
STATS * |
floating_heights |
|
) |
| |
Definition at line 1406 of file makerow.cpp.
1414 if (blob_it.empty())
return;
1415 bool has_rep_chars =
1418 blob = blob_it.data();
1427 top -= gradient * xcentre + row->
parallel_c();
1428 if (top >= min_height && top <= max_height) {
1429 heights->
add(static_cast<int32_t>(floor(top + 0.5)), 1);
1431 floating_heights->
add(static_cast<int32_t>(floor(top + 0.5)), 1);
1439 while (!blob_it.at_first() &&
1440 blob_it.data()->repeated_set() == repeated_set) {
1443 tprintf(
"Skipping repeated char when computing xheight\n");
1448 }
while (!blob_it.at_first());
◆ find_best_dropout_row()
bool find_best_dropout_row |
( |
TO_ROW * |
row, |
|
|
int32_t |
distance, |
|
|
float |
dist_limit, |
|
|
int32_t |
line_index, |
|
|
TO_ROW_IT * |
row_it, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 652 of file makerow.cpp.
667 tprintf (
"Row at %g(%g), dropout dist=%d,",
677 if (abs_dist > dist_limit) {
679 tprintf (
" too far - deleting\n");
683 if ((distance < 0 && !row_it->at_last ())
684 || (
distance >= 0 && !row_it->at_first ())) {
685 row_offset = row_inc;
687 next_row = row_it->data_relative (row_offset);
688 next_index = static_cast<int32_t>(floor (next_row->
intercept ()));
690 && next_index < line_index
693 && next_index > line_index
696 tprintf (
" nearer neighbour (%d) at %g\n",
702 else if (next_index == line_index
706 tprintf (
" equal but more believable at %g (%g/%g)\n",
714 row_offset += row_inc;
716 while ((next_index == line_index
718 && row_offset < row_it->length ());
◆ fit_lms_line()
void fit_lms_line |
( |
TO_ROW * |
row | ) |
|
Definition at line 266 of file makerow.cpp.
271 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
272 const TBOX& box = blob_it.data()->bounding_box();
275 double error = lms.
Fit(&m, &c);
◆ fit_parallel_lms()
void fit_parallel_lms |
( |
float |
gradient, |
|
|
TO_ROW * |
row |
|
) |
| |
Definition at line 1973 of file makerow.cpp.
1980 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1981 if (!blob_it.data()->joined_to_prev()) {
1982 const TBOX& box = blob_it.data()->bounding_box();
1990 error = lms.
Fit(&gradient, &c);
◆ fit_parallel_rows()
void fit_parallel_rows |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_edge, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1931 of file makerow.cpp.
1938 #ifndef GRAPHICS_DISABLED
1941 TO_ROW_IT row_it = block->
get_rows ();
1943 row_it.move_to_first ();
1944 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1945 if (row_it.data ()->blob_list ()->empty ())
1946 delete row_it.extract ();
1950 #ifndef GRAPHICS_DISABLED
1953 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1955 block_edge, colour, rotation);
1956 colour = static_cast<ScrollView::Color>(colour + 1);
◆ get_min_max_xheight()
void get_min_max_xheight |
( |
int |
block_linesize, |
|
|
int * |
min_height, |
|
|
int * |
max_height |
|
) |
| |
|
inline |
Definition at line 114 of file makerow.h.
117 *min_height = static_cast<int32_t>(floor(block_linesize *
textord_minxh));
119 *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
◆ get_row_category()
◆ linear_spline_baseline()
double* linear_spline_baseline |
( |
TO_ROW * |
row, |
|
|
TO_BLOCK * |
block, |
|
|
int32_t & |
segments, |
|
|
int32_t |
xstarts[] |
|
) |
| |
Definition at line 2187 of file makerow.cpp.
2196 int blobs_per_segment;
2200 BLOBNBOX_IT blob_it = row->
blob_list ();
2201 BLOBNBOX_IT new_it = blob_it;
2207 xstarts[0] = box.
left ();
2209 while (!blob_it.at_first ()) {
2216 blobs_per_segment = blobcount / segments;
2218 auto *coeffs =
new double[segments * 3];
2221 (
"Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
2222 blobcount, box.
left (), box.
bottom (), segments, blobs_per_segment);
2224 for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
2229 blobindex += blobs_per_segment;
2231 while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2233 int middle = (box.
left() + box.
right()) / 2;
2236 if (index1 == blobindex - blobs_per_segment / 2
2237 || index1 == blobcount - 1) {
2238 xstarts[segment] = box.
left ();
2242 coeffs[segment * 3 - 3] = 0;
2243 coeffs[segment * 3 - 2] = b;
2244 coeffs[segment * 3 - 1] = c;
2246 if (segment > segments)
2249 blobindex += blobs_per_segment;
2251 while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2253 int middle = (new_box.
left() + new_box.
right()) / 2;
2256 if (index2 == blobindex - blobs_per_segment / 2
2257 || index2 == blobcount - 1) {
2258 xstarts[segment] = new_box.
left ();
2262 coeffs[segment * 3 - 3] = 0;
2263 coeffs[segment * 3 - 2] = b;
2264 coeffs[segment * 3 - 1] = c;
2267 while (segment <= segments);
◆ make_baseline_spline()
Definition at line 2056 of file makerow.cpp.
2062 auto *xstarts =
new int32_t[row->
blob_list()->length() + 1];
2067 xstarts[1] = xstarts[segments];
2069 coeffs =
new double[3];
2071 coeffs[1] = row->
line_m ();
2072 coeffs[2] = row->
line_c ();
◆ make_initial_textrows()
Definition at line 226 of file makerow.cpp.
232 TO_ROW_IT row_it = block->
get_rows ();
234 #ifndef GRAPHICS_DISABLED
244 row_it.move_to_first ();
245 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
247 #ifndef GRAPHICS_DISABLED
250 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
252 colour = static_cast<ScrollView::Color>(colour + 1);
◆ make_rows()
float make_rows |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK_LIST * |
port_blocks |
|
) |
| |
Definition at line 200 of file makerow.cpp.
203 TO_BLOCK_IT block_it;
205 block_it.set_to_list(port_blocks);
206 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
212 block_it.set_to_list(port_blocks);
213 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
215 block_it.data()->block->pdblk.bounding_box().left(),
◆ make_single_row()
float make_single_row |
( |
ICOORD |
page_tr, |
|
|
bool |
allow_sub_blobs, |
|
|
TO_BLOCK * |
block, |
|
|
TO_BLOCK_LIST * |
blocks |
|
) |
| |
Definition at line 163 of file makerow.cpp.
165 BLOBNBOX_IT blob_it = &block->
blobs;
166 TO_ROW_IT row_it = block->
get_rows();
172 if (block->
blobs.singleton() && allow_sub_blobs) {
173 blob_it.move_to_first();
174 float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
177 }
else if (block->
blobs.empty()) {
182 blob_it.add_after_then_move(bblob);
184 MakeRowFromBlobs(block->
line_size, &blob_it, &row_it);
186 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
◆ mark_repeated_chars()
void mark_repeated_chars |
( |
TO_ROW * |
row | ) |
|
Definition at line 2639 of file makerow.cpp.
2641 int num_repeated_sets = 0;
2642 if (!box_it.empty()) {
2645 int repeat_length = 1;
2648 BLOBNBOX_IT test_it(box_it);
2649 for (test_it.forward(); !test_it.at_first();) {
2650 bblob = test_it.data();
2654 bblob = test_it.data();
2663 num_repeated_sets++;
2664 for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2665 bblob = box_it.data();
2672 }
while (!box_it.at_first());
◆ median_block_xheight()
float median_block_xheight |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient |
|
) |
| |
◆ most_overlapping_row()
OVERLAP_STATE most_overlapping_row |
( |
TO_ROW_IT * |
row_it, |
|
|
TO_ROW *& |
best_row, |
|
|
float |
top, |
|
|
float |
bottom, |
|
|
float |
rowsize, |
|
|
bool |
testing_blob |
|
) |
| |
Definition at line 2478 of file makerow.cpp.
2489 float merge_top, merge_bottom;
2493 BLOBNBOX_IT blob_it;
2496 row = row_it->data ();
2497 bestover = top - bottom;
2498 if (top > row->
max_y ())
2499 bestover -= top - row->
max_y ();
2500 if (bottom < row->min_y ())
2502 bestover -= row->
min_y () - bottom;
2504 tprintf(
"Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
2505 bottom, top, row->
min_y(), row->
max_y(), rowsize, bestover);
2509 if (!row_it->at_last ()) {
2511 test_row = row_it->data ();
2512 if (test_row->
min_y () <= top && test_row->
max_y () >= bottom) {
2514 test_row->
max_y () >
2517 test_row->
min_y () <
2519 if (merge_top - merge_bottom <= rowsize) {
2521 tprintf (
"Merging rows at (%g,%g), (%g,%g)\n",
2525 test_row->
set_limits (merge_bottom, merge_top);
2526 blob_it.set_to_list (test_row->
blob_list ());
2527 blob_it.add_list_after (row->
blob_list ());
2529 row_it->backward ();
2530 delete row_it->extract ();
2534 overlap = top - bottom;
2535 if (top > test_row->
max_y ())
2536 overlap -= top - test_row->
max_y ();
2537 if (bottom < test_row->min_y ())
2538 overlap -= test_row->
min_y () - bottom;
2539 if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2542 if (overlap > bestover) {
2547 tprintf(
"Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
2548 bottom, top, test_row->
min_y(), test_row->
max_y(),
2549 rowsize, overlap, bestover);
2554 while (!row_it->at_last ()
2555 && test_row->
min_y () <= top && test_row->
max_y () >= bottom);
2556 while (row_it->data () != row)
2557 row_it->backward ();
2559 if (top - bottom - bestover > rowsize * textord_overlap_x &&
◆ pre_associate_blobs()
Definition at line 1845 of file makerow.cpp.
1851 #ifndef GRAPHICS_DISABLED
1858 BLOBNBOX_IT blob_it;
1859 BLOBNBOX_IT start_it;
1860 TO_ROW_IT row_it = block->
get_rows ();
1862 #ifndef GRAPHICS_DISABLED
1866 blob_rotation =
FCOORD (rotation.
x (), -rotation.
y ());
1867 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1869 blob_it.set_to_list (row_it.data ()->blob_list ());
1870 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1871 blob_it.forward ()) {
1872 blob = blob_it.data ();
1885 if (!blob_it.at_last ()) {
1886 nextblob = blob_it.data_relative(1);
1889 blob->
merge(nextblob);
1896 blob->
chop (&start_it, &blob_it,
1902 #ifndef GRAPHICS_DISABLED
1907 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1908 blob_it.forward ()) {
1909 blob = blob_it.data ();
1911 blob_box.
rotate (rotation);
1914 blob_box.
right (), blob_box.
top ());
1917 colour = static_cast<ScrollView::Color>(colour + 1);
◆ row_spacing_order()
int row_spacing_order |
( |
const void * |
item1, |
|
|
const void * |
item2 |
|
) |
| |
Definition at line 2617 of file makerow.cpp.
2621 const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
2623 const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
◆ row_y_order()
int row_y_order |
( |
const void * |
item1, |
|
|
const void * |
item2 |
|
) |
| |
Definition at line 2595 of file makerow.cpp.
2599 const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
2601 const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
◆ segment_baseline()
bool segment_baseline |
( |
TO_ROW * |
row, |
|
|
TO_BLOCK * |
block, |
|
|
int32_t & |
segments, |
|
|
int32_t * |
xstarts |
|
) |
| |
Definition at line 2088 of file makerow.cpp.
2104 BLOBNBOX_IT blob_it = row->
blob_list ();
2105 BLOBNBOX_IT new_it = blob_it;
2108 needs_curve =
false;
2110 xstarts[0] = box.
left ();
2112 blobcount = row->
blob_list ()->length ();
2114 tprintf (
"Segmenting baseline of %d blobs at (%d,%d)\n",
2118 blob_it.move_to_last ();
2119 box = blob_it.data ()->bounding_box ();
2120 xstarts[1] = box.
right ();
2124 new_it.mark_cycle_pt ();
2127 middle = (new_box.
left () + new_box.
right ()) / 2.0;
2130 yshifts.
add (yshift, blobindex);
2131 if (new_it.cycled_list ()) {
2132 xstarts[1] = new_box.
right ();
2153 xstarts[segments++] = box.
left ();
2159 middle = (new_box.
left () + new_box.
right ()) / 2.0;
2161 yshifts.
add (yshift, blobindex);
2165 while (!new_it.cycled_list ());
2167 xstarts[segments] = new_box.
right ();
2170 xstarts[--segments] = new_box.
right ();
2173 tprintf (
"Made %d segments on row at (%d,%d)\n",
◆ separate_underlines()
void separate_underlines |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1772 of file makerow.cpp.
1783 BLOBNBOX_IT blob_it;
1787 TO_ROW_IT row_it = block->
get_rows();
1792 length = sqrt(1 + gradient * gradient);
1793 g_vec =
FCOORD(1 / length, -gradient / length);
1794 blob_rotation =
FCOORD(rotation.
x(), -rotation.
y());
1795 blob_rotation.
rotate(g_vec);
1796 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1797 row = row_it.data();
1800 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
1801 blob_it.forward()) {
1802 blob = blob_it.data();
1810 rotated_blob, static_cast<int16_t>(row->
intercept()),
1811 static_cast<int16_t>(
1815 under_it.add_after_then_move(blob_it.extract());
1817 tprintf(
"Underlined blob at:");
1818 rotated_blob->bounding_box().print();
1822 }
else if (CountOverlaps(blob->
bounding_box(), min_blob_height,
1824 textord_max_blob_overlaps) {
1825 large_it.add_after_then_move(blob_it.extract());
1827 tprintf(
"Large blob overlaps %d blobs at:",
1828 CountOverlaps(blob_box, min_blob_height,
1833 delete rotated_blob;
◆ vigorous_noise_removal()
void vigorous_noise_removal |
( |
TO_BLOCK * |
block | ) |
|
Definition at line 466 of file makerow.cpp.
467 TO_ROW_IT row_it = block->
get_rows ();
468 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
469 TO_ROW* row = row_it.data();
473 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
478 STATS hstats(0, max_height + 1);
479 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
485 float xheight = hstats.median();
488 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
493 if (prev !=
nullptr) {
494 if (dot_of_i(blob, prev, row))
497 if (!b_it.at_last()) {
498 BLOBNBOX* next = b_it.data_relative(1);
499 if (dot_of_i(blob, next, row))
503 delete blob->
cblob();
504 delete b_it.extract();
◆ within_error_margin()
bool within_error_margin |
( |
float |
test, |
|
|
float |
num, |
|
|
float |
margin |
|
) |
| |
|
inline |
Definition at line 127 of file makerow.h.
129 return (test >= num * (1 - margin) && test <= num * (1 + margin));
◆ textord_ascheight_mode_fraction
double textord_ascheight_mode_fraction = 0.15 |
"Min pile height to make ascheight"
Definition at line 91 of file makerow.cpp.
◆ textord_ascx_ratio_max
double textord_ascx_ratio_max = 1.7 |
◆ textord_ascx_ratio_min
double textord_ascx_ratio_min = 1.2 |
◆ textord_cblob_blockocc
bool textord_cblob_blockocc = true |
"Use new projection for underlines"
◆ textord_chop_width
double textord_chop_width = 1.5 |
"Max width before chopping"
Definition at line 76 of file makerow.cpp.
◆ textord_debug_blob
bool textord_debug_blob = false |
"Print test blob information"
Definition at line 101 of file makerow.cpp.
◆ textord_debug_xheights
bool textord_debug_xheights = false |
"Test xheight algorithms"
Definition at line 55 of file makerow.cpp.
◆ textord_descx_ratio_max
double textord_descx_ratio_max = 0.6 |
◆ textord_descx_ratio_min
double textord_descx_ratio_min = 0.15 |
◆ textord_excess_blobsize
double textord_excess_blobsize = 1.3 |
"New row made if blob makes row this big"
Definition at line 83 of file makerow.cpp.
◆ textord_fix_makerow_bug
bool textord_fix_makerow_bug = true |
"Prevent multiple baselines"
Definition at line 54 of file makerow.cpp.
◆ textord_fix_xheight_bug
bool textord_fix_xheight_bug = true |
◆ textord_heavy_nr
bool textord_heavy_nr = false |
"Vigorously remove noise"
Definition at line 42 of file makerow.cpp.
◆ textord_linespace_iqrlimit
double textord_linespace_iqrlimit = 0.2 |
"Max iqr/median for linespace"
Definition at line 74 of file makerow.cpp.
◆ textord_lms_line_trials
int textord_lms_line_trials = 12 |
"Number of linew fits to do"
Definition at line 99 of file makerow.cpp.
◆ textord_min_blob_height_fraction
double textord_min_blob_height_fraction = 0.75 |
"Min blob height/top to include blob top into xheight stats"
Definition at line 87 of file makerow.cpp.
◆ textord_min_blobs_in_row
int textord_min_blobs_in_row = 4 |
"Min blobs before gradient counted"
Definition at line 62 of file makerow.cpp.
◆ textord_min_linesize
double textord_min_linesize = 1.25 |
"* blob height for initial linesize"
Definition at line 81 of file makerow.cpp.
◆ textord_min_xheight
int textord_min_xheight = 10 |
"Min credible pixel xheight"
Definition at line 67 of file makerow.cpp.
◆ textord_minxh
double textord_minxh = 0.25 |
"fraction of linesize for min xheight"
Definition at line 80 of file makerow.cpp.
◆ textord_new_initial_xheight
bool textord_new_initial_xheight = true |
"Use test xheight mechanism"
Definition at line 100 of file makerow.cpp.
◆ textord_occupancy_threshold
double textord_occupancy_threshold = 0.4 |
"Fraction of neighbourhood"
Definition at line 84 of file makerow.cpp.
◆ textord_old_baselines
bool textord_old_baselines = true |
"Use old baseline algorithm"
Definition at line 51 of file makerow.cpp.
◆ textord_old_xheight
bool textord_old_xheight = true |
"Use old xheight algorithm"
Definition at line 52 of file makerow.cpp.
◆ textord_parallel_baselines
bool textord_parallel_baselines = true |
"Force parallel baselines"
Definition at line 49 of file makerow.cpp.
◆ textord_quadratic_baselines
bool textord_quadratic_baselines = false |
◆ textord_show_expanded_rows
bool textord_show_expanded_rows = false |
"Display rows after expanding"
Definition at line 45 of file makerow.cpp.
◆ textord_show_final_blobs
bool textord_show_final_blobs = false |
"Display blob bounds after pre-ass"
Definition at line 47 of file makerow.cpp.
◆ textord_show_final_rows
bool textord_show_final_rows = false |
"Display rows after final fitting"
Definition at line 46 of file makerow.cpp.
◆ textord_show_initial_rows
bool textord_show_initial_rows = false |
"Display row accumulation"
Definition at line 43 of file makerow.cpp.
◆ textord_show_parallel_rows
bool textord_show_parallel_rows = false |
"Display page correlated rows"
Definition at line 44 of file makerow.cpp.
◆ textord_skew_ile
double textord_skew_ile = 0.5 |
"Ile of gradients for page skew"
Definition at line 72 of file makerow.cpp.
◆ textord_skew_lag
double textord_skew_lag = 0.75 |
"Lag for skew on row accumulation"
Definition at line 73 of file makerow.cpp.
◆ textord_spline_medianwin
int textord_spline_medianwin = 6 |
"Size of window for spline segmentation"
Definition at line 64 of file makerow.cpp.
◆ textord_spline_minblobs
int textord_spline_minblobs = 8 |
"Min blobs in each spline segment"
Definition at line 63 of file makerow.cpp.
◆ textord_spline_outlier_fraction
double textord_spline_outlier_fraction = 0.1 |
"Fraction of line spacing for outlier"
Definition at line 71 of file makerow.cpp.
◆ textord_spline_shift_fraction
double textord_spline_shift_fraction = 0.02 |
"Fraction of line spacing for quad"
Definition at line 69 of file makerow.cpp.
◆ textord_straight_baselines
bool textord_straight_baselines = false |
"Force straight baselines"
Definition at line 50 of file makerow.cpp.
◆ textord_test_landscape
bool textord_test_landscape = false |
"Tests refer to land/port"
Definition at line 48 of file makerow.cpp.
◆ textord_test_x
int textord_test_x = -INT32_MAX |
◆ textord_test_y
int textord_test_y = -INT32_MAX |
◆ textord_underline_width
double textord_underline_width = 2.0 |
"Multiple of line_size for underline"
Definition at line 85 of file makerow.cpp.
◆ textord_width_limit
double textord_width_limit = 8 |
"Max width of blobs to make rows"
Definition at line 75 of file makerow.cpp.
◆ textord_xheight_error_margin
double textord_xheight_error_margin = 0.1 |
◆ textord_xheight_mode_fraction
double textord_xheight_mode_fraction = 0.4 |
"Min pile height to make xheight"
Definition at line 89 of file makerow.cpp.
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
double textord_ascx_ratio_max
bool textord_fix_xheight_bug
void set_num_repeated_sets(int num_sets)
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
int32_t get_total() const
bool textord_show_parallel_rows
BLOBNBOX_LIST small_blobs
double textord_linespace_iqrlimit
static C_BLOB * FakeBlob(const TBOX &box)
bool textord_fix_makerow_bug
static const double kXHeightFraction
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
ScrollView * create_to_win(ICOORD page_tr)
void fit_parallel_lms(float gradient, TO_ROW *row)
BLOBNBOX_LIST noise_blobs
bool test_underline(bool testing_on, C_BLOB *blob, int16_t baseline, int16_t xheight)
int32_t compute_height_modes(STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
void set_repeated_set(int set_id)
double textord_chop_width
void set_parallel_line(float gradient, float new_c, float new_error)
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
bool textord_straight_baselines
void compute_line_occupation(TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
bool textord_test_landscape
double textord_spline_shift_fraction
bool contains(const FCOORD pt) const
int32_t pile_count(int32_t value) const
double textord_occupancy_threshold
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
void DrawTo(int x, int y)
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
bool textord_debug_xheights
void fit_lms_line(TO_ROW *row)
double textord_descx_ratio_max
int textord_spline_minblobs
void rotate(const FCOORD &vec)
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
double textord_underline_width
double textord_excess_blobsize
bool textord_show_final_blobs
int blob_x_order(const void *item1, const void *item2)
ROW_CATEGORY get_row_category(const TO_ROW *row)
bool textord_parallel_baselines
double textord_xheight_mode_fraction
float initial_min_y() const
bool rep_chars_marked() const
PDBLK pdblk
Page Description Block.
BLOBNBOX_LIST large_blobs
void compute_row_stats(TO_BLOCK *block, bool testing_on)
bool major_x_overlap(const TBOX &box) const
int textord_spline_medianwin
void compute_dropout_distances(int32_t *occupation, int32_t *thresholds, int32_t line_count)
bool within_error_margin(float test, float num, float margin)
int textord_lms_line_trials
const int kMinLeaderCount
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
void adjust_row_limits(TO_BLOCK *block)
int num_repeated_sets() const
bool joined_to_prev() const
bool textord_show_initial_rows
int textord_min_blobs_in_row
double textord_min_blob_height_fraction
double textord_descx_ratio_min
static const double kAscenderFraction
bool segment_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
static const double kDescenderFraction
void compute_occupation_threshold(int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
void rotate(const FCOORD vec)
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
const TBOX & bounding_box() const
bool textord_show_final_rows
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
int row_y_order(const void *item1, const void *item2)
bool textord_show_expanded_rows
bool find_best_dropout_row(TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
void merge(BLOBNBOX *nextblob)
BlobTextFlowType flow() const
float believability() const
void add(int32_t value, int32_t count)
bool textord_new_initial_xheight
double Fit(ICOORD *pt1, ICOORD *pt2)
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
void Add(const ICOORD &pt)
DLLSYM void tprintf(const char *format,...)
void add(float value, int32_t key)
double textord_ascx_ratio_min
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
void set_line(float new_m, float new_c, float new_error)
void SetCursor(int x, int y)
void Rectangle(int x1, int y1, int x2, int y2)
void set_limits(float new_min, float new_max)
int row_spacing_order(const void *item1, const void *item2)
double textord_xheight_error_margin
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
BLOBNBOX_LIST * blob_list()
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
void draw_occupation(int32_t xleft, int32_t ybottom, int32_t min_y, int32_t max_y, int32_t occupation[], int32_t thresholds[])
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
double textord_ascheight_mode_fraction