tesseract
5.0.0-alpha-619-ge9db
|
#include <vector>
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"
#include <algorithm>
Go to the source code of this file.
|
|
Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.
|
float | make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks) |
|
|
Arrange the blobs into rows.
|
float | make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks) |
|
|
Arrange the good blobs into rows of text.
|
void | make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on) |
|
|
Fit an LMS line to a row.
|
void | fit_lms_line (TO_ROW *row) |
|
|
Delete this row if it has a neighbour with better dropout characteristics. true is returned if the row should be deleted.
|
bool | find_best_dropout_row (TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on) |
|
|
Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.
|
TBOX | deskew_block_coords (TO_BLOCK *block, float gradient) |
|
|
Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.
|
void | compute_line_occupation (TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas) |
|
void | compute_occupation_threshold (int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds) |
|
|
Compute the distance from each coordinate to the nearest dropout.
|
void | compute_dropout_distances (int32_t *occupation, int32_t *thresholds, int32_t line_count) |
|
|
Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.
|
void | expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on) |
|
void | adjust_row_limits (TO_BLOCK *block) |
|
|
Compute the linespacing and offset.
|
void | compute_row_stats (TO_BLOCK *block, bool testing_on) |
|
|
Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.
|
void | fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights) |
|
|
Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, ‘, ", ’, etc) If cap_only, then force finding of only the top mode.
|
int | compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise) |
|
|
Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.
|
int32_t | compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights) |
|
|
Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.
|
int32_t | compute_height_modes (STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes) |
|
|
Adjust the xheight etc of this row if not within reasonable limits of the average for the block.
|
void | correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop) |
|
|
Test wide objects for being potential underlines. If they are then put them in a separate list in the block.
|
void | separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on) |
|
|
Associate overlapping blobs and fake chop wide blobs.
|
void | pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on) |
|
|
Re-fit the rows in the block to the given gradient.
|
void | fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on) |
|
|
Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.
|
void | fit_parallel_lms (float gradient, TO_ROW *row) |
|
|
Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.
|
void | make_baseline_spline (TO_ROW *row, TO_BLOCK *block) |
|
|
Divide the baseline up into segments which require a different quadratic fitted to them. Return true if enough blobs were far enough away to need a quadratic.
|
bool | segment_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts) |
|
|
Divide the baseline up into segments which require a different quadratic fitted to them. - Returns
- true if enough blobs were far enough away to need a quadratic.
|
double * | linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[]) |
|
|
Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.
|
void | assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew) |
|
|
Return the row which most overlaps the blob.
|
OVERLAP_STATE | most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob) |
|
|
Sort function to sort blobs in x from page left.
|
int | blob_x_order (const void *item1, const void *item2) |
|
|
Sort function to sort rows in y from page top.
|
int | row_y_order (const void *item1, const void *item2) |
|
|
Qsort style function to compare 2 TO_ROWS based on their spacing value.
|
int | row_spacing_order (const void *item1, const void *item2) |
|
|
Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.
|
void | mark_repeated_chars (TO_ROW *row) |
|
|
Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.
|
const double | kNoiseSize = 0.5 |
|
const int | kMinSize = 8 |
|
void | compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err) |
|
void | vigorous_noise_removal (TO_BLOCK *block) |
|
void | cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on) |
|
void | delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on) |
|
◆ MAX_HEIGHT_MODES
#define MAX_HEIGHT_MODES 12 |
◆ adjust_row_limits()
void adjust_row_limits |
( |
TO_BLOCK * |
block | ) |
|
adjust_row_limits
Change the limits of rows to suit the default fractions.
Definition at line 1107 of file makerow.cpp.
1114 TO_ROW_IT row_it = block->
get_rows ();
1117 tprintf(
"Adjusting row limits for block(%d,%d)\n",
1120 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1121 row = row_it.data ();
1124 tprintf(
"Row at %f has min %f, max %f, size %f\n",
◆ assign_blobs_to_rows()
void assign_blobs_to_rows |
( |
TO_BLOCK * |
block, |
|
|
float * |
gradient, |
|
|
int |
pass, |
|
|
bool |
reject_misses, |
|
|
bool |
make_new_rows, |
|
|
bool |
drawing_skew |
|
) |
| |
Definition at line 2278 of file makerow.cpp.
2289 float g_length = 1.0f;
2294 float smooth_factor;
2299 TO_ROW *dest_row =
nullptr;
2301 BLOBNBOX_IT blob_it = &block->
blobs;
2302 TO_ROW_IT row_it = block->
get_rows ();
2307 if (gradient !=
nullptr)
2308 g_length = sqrt (1 + *gradient * *gradient);
2309 #ifndef GRAPHICS_DISABLED
2315 smooth_factor = 1.0;
2317 row_count = row_it.length ();
2318 if (!blob_it.empty ()) {
2319 left_x = blob_it.data ()->bounding_box ().left ();
2325 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
2326 blob = blob_it.data ();
2327 if (gradient !=
nullptr) {
2332 && last_x - left_x > block->
line_size * 2
2333 && textord_interpolating_skew) {
2336 / (last_x - left_x);
2342 #ifndef GRAPHICS_DISABLED
2346 if (!row_it.empty ()) {
2347 for (row_it.move_to_first ();
2348 !row_it.at_last () && row_it.data ()->min_y () > top;
2350 row = row_it.data ();
2351 if (row->
min_y () <= top && row->
max_y () >= bottom) {
2359 if (overlap_result ==
NEW_ROW && !reject_misses)
2364 if (!make_new_rows) {
2365 near_dist = row_it.data_relative (-1)->min_y () - top;
2367 if (bottom < row->min_y ()) {
2368 if (row->
min_y () - bottom <=
2376 else if (near_dist > 0
2377 && near_dist < bottom - row->max_y ()) {
2379 dest_row = row_it.data ();
2380 if (dest_row->
min_y () - bottom <=
2388 if (top - row->
max_y () <=
2390 block->
line_size) * (textord_overlap_x +
2399 if (overlap_result ==
ASSIGN)
2400 dest_row->
add_blob (blob_it.extract (), top, bottom,
2402 if (overlap_result ==
NEW_ROW) {
2403 if (make_new_rows && top - bottom < block->max_blob_size) {
2405 new TO_ROW (blob_it.extract (), top, bottom,
2408 if (bottom > row_it.data ()->min_y ())
2409 row_it.add_before_then_move (dest_row);
2412 row_it.add_after_then_move (dest_row);
2415 textord_skewsmooth_offset);
2421 else if (make_new_rows && top - bottom < block->max_blob_size) {
2426 row_it.add_after_then_move(dest_row);
2428 textord_skewsmooth_offset2);
2433 if (overlap_result !=
REJECT) {
2434 tprintf(
"Test blob assigned to row at (%g,%g) on pass %d\n",
2438 tprintf(
"Test blob assigned to no row on pass %d\n", pass);
2441 if (overlap_result !=
REJECT) {
2442 while (!row_it.at_first() &&
2443 row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
2444 row = row_it.extract();
2446 row_it.add_before_then_move(row);
2448 while (!row_it.at_last() &&
2449 row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
2450 row = row_it.extract();
2453 row_it.add_after_then_move(row);
2455 BLOBNBOX_IT added_blob_it(dest_row->
blob_list());
2456 added_blob_it.move_to_last();
2457 TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
2458 if (dest_row->
blob_list()->singleton() ||
2460 block_skew = (1 - smooth_factor) * block_skew
2466 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
2467 if (row_it.data()->blob_list()->empty())
2468 delete row_it.extract();
◆ blob_x_order()
int blob_x_order |
( |
const void * |
item1, |
|
|
const void * |
item2 |
|
) |
| |
Definition at line 2573 of file makerow.cpp.
2577 const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX* const*>(item1);
2579 const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX* const*>(item2);
◆ cleanup_rows_making()
void cleanup_rows_making |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_edge, |
|
|
bool |
testing_on |
|
) |
| |
cleanup_rows_making
Remove overlapping rows and fit all the blobs to what's left.
Definition at line 517 of file makerow.cpp.
526 BLOBNBOX_IT blob_it = &block->
blobs;
527 TO_ROW_IT row_it = block->
get_rows ();
529 #ifndef GRAPHICS_DISABLED
546 expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
547 blob_it.set_to_list (&block->
blobs);
548 row_it.set_to_list (block->
get_rows ());
549 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
550 blob_it.add_list_after (row_it.data ()->blob_list ());
554 blob_it.set_to_list (&block->
blobs);
558 blob_it.set_to_list (&block->
blobs);
◆ compute_dropout_distances()
void compute_dropout_distances |
( |
int32_t * |
occupation, |
|
|
int32_t * |
thresholds, |
|
|
int32_t |
line_count |
|
) |
| |
Definition at line 902 of file makerow.cpp.
911 int32_t prev_threshold;
918 prev_threshold = thresholds[line_index];
923 while (line_index < line_count
924 && (occupation[line_index] < thresholds[line_index]
925 || occupation[line_index - 1] >= prev_threshold));
926 if (line_index < line_count) {
927 back_index = line_index - 1;
929 while (next_dist < -distance && back_index >= 0) {
930 thresholds[back_index] = next_dist;
938 while (line_index < line_count);
◆ compute_height_modes()
int32_t compute_height_modes |
( |
STATS * |
heights, |
|
|
int32_t |
min_height, |
|
|
int32_t |
max_height, |
|
|
int32_t * |
modes, |
|
|
int32_t |
maxmodes |
|
) |
| |
Definition at line 1623 of file makerow.cpp.
1631 int32_t least_count;
1632 int32_t least_index;
1635 src_count = max_height + 1 - min_height;
1637 least_count = INT32_MAX;
1639 for (src_index = 0; src_index < src_count; src_index++) {
1640 pile_count = heights->
pile_count(min_height + src_index);
1641 if (pile_count > 0) {
1642 if (dest_count < maxmodes) {
1643 if (pile_count < least_count) {
1645 least_count = pile_count;
1646 least_index = dest_count;
1648 modes[dest_count++] = min_height + src_index;
1649 }
else if (pile_count >= least_count) {
1650 while (least_index < maxmodes - 1) {
1651 modes[least_index] = modes[least_index + 1];
1656 modes[maxmodes - 1] = min_height + src_index;
1657 if (pile_count == least_count) {
1659 least_index = maxmodes - 1;
1663 for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1664 pile_count = heights->
pile_count(modes[dest_count]);
1665 if (pile_count < least_count) {
1667 least_count = pile_count;
1668 least_index = dest_count;
◆ compute_line_occupation()
void compute_line_occupation |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
int32_t |
min_y, |
|
|
int32_t |
max_y, |
|
|
int32_t * |
occupation, |
|
|
int32_t * |
deltas |
|
) |
| |
Definition at line 768 of file makerow.cpp.
780 TO_ROW_IT row_it = block->
get_rows ();
787 line_count = max_y - min_y + 1;
788 length = sqrt (gradient * gradient + 1);
789 rotation =
FCOORD (1 / length, -gradient / length);
790 for (line_index = 0; line_index < line_count; line_index++)
791 deltas[line_index] = 0;
792 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
793 row = row_it.data ();
795 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
796 blob_it.forward ()) {
797 blob = blob_it.data ();
799 blob_box.
rotate (rotation);
800 int32_t width = blob_box.
right() - blob_box.
left();
801 index = blob_box.
bottom() - min_y;
804 deltas[index] += width;
805 index = blob_box.
top() - min_y;
807 deltas[index] -= width;
810 occupation[0] = deltas[0];
811 for (line_index = 1; line_index < line_count; line_index++)
812 occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
◆ compute_occupation_threshold()
void compute_occupation_threshold |
( |
int32_t |
low_window, |
|
|
int32_t |
high_window, |
|
|
int32_t |
line_count, |
|
|
int32_t * |
occupation, |
|
|
int32_t * |
thresholds |
|
) |
| |
compute_occupation_threshold
Compute thresholds for textline or not for the occupation array.
Definition at line 821 of file makerow.cpp.
839 if (low_window + high_window < line_count) {
840 for (sum = 0, high_index = 0; high_index < low_window; high_index++)
841 sum += occupation[high_index];
842 for (low_index = 0; low_index < high_window; low_index++, high_index++)
843 sum += occupation[high_index];
844 min_occ = occupation[0];
846 for (test_index = 1; test_index < high_index; test_index++) {
847 if (occupation[test_index] <= min_occ) {
848 min_occ = occupation[test_index];
849 min_index = test_index;
852 for (line_index = 0; line_index < low_window; line_index++)
853 thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
855 for (low_index = 0; high_index < line_count; low_index++, high_index++) {
856 sum -= occupation[low_index];
857 sum += occupation[high_index];
858 if (occupation[high_index] <= min_occ) {
860 min_occ = occupation[high_index];
861 min_index = high_index;
864 if (min_index <= low_index) {
865 min_occ = occupation[low_index + 1];
866 min_index = low_index + 1;
867 for (test_index = low_index + 2; test_index <= high_index;
869 if (occupation[test_index] <= min_occ) {
870 min_occ = occupation[test_index];
872 min_index = test_index;
876 thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
880 min_occ = occupation[0];
882 for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
883 if (occupation[low_index] < min_occ) {
884 min_occ = occupation[low_index];
885 min_index = low_index;
887 sum += occupation[low_index];
891 for (; line_index < line_count; line_index++)
892 thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
◆ compute_page_skew()
void compute_page_skew |
( |
TO_BLOCK_LIST * |
blocks, |
|
|
float & |
page_m, |
|
|
float & |
page_err |
|
) |
| |
Definition at line 286 of file makerow.cpp.
296 TO_BLOCK_IT block_it = blocks;
300 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
301 block_it.forward ()) {
302 POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
303 if (pb !=
nullptr && !pb->
IsText())
305 row_count += block_it.data ()->get_rows ()->length ();
307 TO_ROW_IT row_it(block_it.data()->get_rows());
308 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
309 blob_count += row_it.data ()->blob_list ()->length ();
311 if (row_count == 0) {
317 std::vector<float> gradients(blob_count);
319 std::vector<float> errors(blob_count);
322 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
323 block_it.forward ()) {
324 POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
325 if (pb !=
nullptr && !pb->
IsText())
327 TO_ROW_IT row_it(block_it.data ()->get_rows());
328 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
329 row = row_it.data ();
330 blob_count = row->
blob_list ()->length ();
331 row_err = static_cast<int32_t>(ceil (row->
line_error ()));
334 if (textord_biased_skewcalc) {
335 blob_count /= row_err;
336 for (blob_count /= row_err; blob_count > 0; blob_count--) {
337 gradients[row_index] = row->
line_m ();
344 gradients[row_index] = row->
line_m ();
350 if (row_index == 0) {
352 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
353 block_it.forward ()) {
354 POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
355 if (pb !=
nullptr && !pb->
IsText())
357 TO_ROW_IT row_it(block_it.data()->get_rows());
358 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
360 row = row_it.data ();
361 gradients[row_index] = row->
line_m ();
367 row_count = row_index;
369 &gradients[0], row_count);
370 page_m = gradients[row_index];
372 &errors[0], row_count);
373 page_err = errors[row_index];
◆ compute_row_descdrop()
int32_t compute_row_descdrop |
( |
TO_ROW * |
row, |
|
|
float |
gradient, |
|
|
int |
xheight_blob_count, |
|
|
STATS * |
asc_heights |
|
) |
| |
Definition at line 1563 of file makerow.cpp.
1568 i_min = static_cast<int>(
1575 int num_potential_asc = 0;
1576 for (
int i = i_min; i <= i_max; ++i) {
1577 num_potential_asc += asc_heights->
pile_count(i);
1587 STATS heights (min_height, max_height + 1);
1588 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1589 blob = blob_it.data();
1593 height = (gradient * xcentre + row->
parallel_c() -
1595 if (height >= min_height && height <= max_height)
1596 heights.add(static_cast<int>(floor(height + 0.5)), 1);
1599 int blob_index = heights.mode();
1600 int blob_count = heights.pile_count(blob_index);
1601 float total_fraction =
1603 if (static_cast<float>(blob_count + num_potential_asc) <
1604 xheight_blob_count * total_fraction) {
1607 int descdrop = blob_count > 0 ? -blob_index : 0;
1609 tprintf(
"Descdrop: %d (potential ascenders %d, descenders %d)\n",
1610 descdrop, num_potential_asc, blob_count);
◆ compute_row_stats()
void compute_row_stats |
( |
TO_BLOCK * |
block, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1143 of file makerow.cpp.
1151 TO_ROW_IT row_it = block->
get_rows ();
1153 int16_t rowcount = row_it.length ();
1155 std::vector<TO_ROW*> rows(rowcount);
1158 row_it.move_to_last ();
1160 row = row_it.data ();
1161 if (prev_row !=
nullptr) {
1162 rows[rowcount++] = prev_row;
1165 tprintf (
"Row at %g yields spacing of %g\n",
1171 while (!row_it.at_last ());
1176 tprintf (
"Blob based spacing=(%g,%g), offset=%g",
1181 iqr = rows[row_index]->spacing;
1184 iqr -= rows[row_index]->spacing;
1187 block->
key_row = rows[row_index];
1189 tprintf (
" row based=%g(%g)", rows[row_index]->spacing, iqr);
1193 if (rows[row_index]->spacing < block->line_spacing
1194 && rows[row_index]->spacing > block->
line_size)
1196 block->
line_size = rows[row_index]->spacing;
1198 else if (rows[row_index]->spacing > block->
line_spacing)
1203 if (rows[row_index]->spacing < block->line_spacing)
1204 block->
line_size = rows[row_index]->spacing;
1219 tprintf (
"\nEstimate line size=%g, spacing=%g, offset=%g\n",
◆ compute_xheight_from_modes()
int compute_xheight_from_modes |
( |
STATS * |
heights, |
|
|
STATS * |
floating_heights, |
|
|
bool |
cap_only, |
|
|
int |
min_height, |
|
|
int |
max_height, |
|
|
float * |
xheight, |
|
|
float * |
ascrise |
|
) |
| |
Definition at line 1467 of file makerow.cpp.
1470 int blob_index = heights->
mode();
1471 int blob_count = heights->
pile_count(blob_index);
1473 tprintf(
"min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
1474 min_height, max_height, blob_index, blob_count,
1477 floating_heights->
print();
1479 if (blob_count == 0)
return 0;
1481 bool in_best_pile =
false;
1482 int prev_size = -INT32_MAX;
1486 if (cap_only && mode_count > 1)
1490 tprintf(
"found %d modes: ", mode_count);
1491 for (x = 0; x < mode_count; x++)
tprintf(
"%d ", modes[x]);
1495 for (x = 0; x < mode_count - 1; x++) {
1496 if (modes[x] != prev_size + 1)
1497 in_best_pile =
false;
1498 int modes_x_count = heights->
pile_count(modes[x]) -
1501 (in_best_pile || modes_x_count > best_count)) {
1502 for (
int asc = x + 1; asc < mode_count; asc++) {
1504 static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1509 if (modes_x_count > best_count) {
1510 in_best_pile =
true;
1511 best_count = modes_x_count;
1514 tprintf(
"X=%d, asc=%d, count=%d, ratio=%g\n",
1515 modes[x], modes[asc]-modes[x], modes_x_count, ratio);
1517 prev_size = modes[x];
1518 *xheight = static_cast<float>(modes[x]);
1519 *ascrise = static_cast<float>(modes[asc] - modes[x]);
1524 if (*xheight == 0) {
1531 if (floating_heights->
get_total() > 0) {
1532 for (x = min_height; x < max_height; ++x) {
1535 blob_index = heights->
mode();
1536 for (x = min_height; x < max_height; ++x) {
1540 *xheight = static_cast<float>(blob_index);
1542 best_count = heights->
pile_count(blob_index);
1544 tprintf(
"Single mode xheight set to %g\n", *xheight);
1546 tprintf(
"Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
◆ correct_row_xheight()
void correct_row_xheight |
( |
TO_ROW * |
row, |
|
|
float |
xheight, |
|
|
float |
ascrise, |
|
|
float |
descdrop |
|
) |
| |
Definition at line 1685 of file makerow.cpp.
1689 tprintf(
"correcting row xheight: row->xheight %.4f"
1690 ", row->acrise %.4f row->descdrop %.4f\n",
1693 bool normal_xheight =
1710 (normal_xheight || cap_xheight)) ||
1711 (row_category ==
ROW_UNKNOWN && normal_xheight)) {
1735 if (row->
xheight < xheight + ascrise && row->
xheight > xheight) {
1738 tprintf(
"all caps with irregular xheight\n");
1747 tprintf(
"corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
◆ delete_non_dropout_rows()
void delete_non_dropout_rows |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_edge, |
|
|
bool |
testing_on |
|
) |
| |
delete_non_dropout_rows
Compute the linespacing and offset.
Definition at line 570 of file makerow.cpp.
586 TO_ROW_IT row_it = block->
get_rows ();
587 BLOBNBOX_IT blob_it = &block->
blobs;
589 if (row_it.length () == 0)
594 min_y = block_box.
bottom () - 1;
595 max_y = block_box.
top () + 1;
596 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
597 line_index = static_cast<int32_t>(floor (row_it.data ()->intercept ()));
598 if (line_index <= min_y)
599 min_y = line_index - 1;
600 if (line_index >= max_y)
601 max_y = line_index + 1;
603 line_count = max_y - min_y + 1;
607 std::vector<int32_t> deltas(line_count);
609 std::vector<int32_t> occupation(line_count);
618 max_y - min_y + 1, &occupation[0], &deltas[0]);
619 #ifndef GRAPHICS_DISABLED
621 draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]);
625 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
626 row = row_it.data ();
627 line_index = static_cast<int32_t>(floor (row->
intercept ()));
628 distance = deltas[line_index - min_y];
630 line_index, &row_it, testing_on)) {
631 #ifndef GRAPHICS_DISABLED
636 blob_it.add_list_after (row_it.data ()->blob_list ());
637 delete row_it.extract ();
640 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
641 blob_it.add_list_after (row_it.data ()->blob_list ());
◆ deskew_block_coords()
TBOX deskew_block_coords |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient |
|
) |
| |
Definition at line 732 of file makerow.cpp.
740 TO_ROW_IT row_it = block->
get_rows ();
745 length = sqrt (gradient * gradient + 1);
746 rotation =
FCOORD (1 / length, -gradient / length);
747 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
748 row = row_it.data ();
750 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
751 blob_it.forward ()) {
752 blob = blob_it.data ();
754 blob_box.
rotate (rotation);
◆ expand_rows()
void expand_rows |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_edge, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 949 of file makerow.cpp.
959 float y_bottom, y_top;
963 BLOBNBOX_IT blob_it = &block->
blobs;
964 TO_ROW_IT row_it = block->
get_rows ();
966 #ifndef GRAPHICS_DISABLED
975 if (block->
get_rows ()->length () == 0)
981 if (block->
get_rows ()->length () == 0)
990 row_it.move_to_last ();
992 row = row_it.data ();
993 y_max = row->
max_y ();
994 y_min = row->
min_y ();
1000 if (y_min > y_bottom) {
1002 tprintf(
"Expanding bottom of row at %f from %f to %f\n",
1005 swallowed_row =
true;
1006 while (swallowed_row && !row_it.at_last ()) {
1007 swallowed_row =
false;
1009 test_row = row_it.data_relative (1);
1011 if (test_row->
max_y () > y_bottom) {
1012 if (test_row->
min_y () > y_bottom) {
1016 #ifndef GRAPHICS_DISABLED
1024 blob_it.set_to_list (row->
blob_list ());
1025 blob_it.add_list_after (test_row->
blob_list ());
1027 delete row_it.extract ();
1029 swallowed_row =
true;
1031 else if (test_row->
max_y () < y_min) {
1033 y_bottom = test_row->
max_y ();
1035 tprintf(
"Truncating limit to %f due to touching row at %f\n",
1041 tprintf(
"Not expanding limit beyond %f due to touching row at %f\n",
1048 if (y_max < y_top) {
1050 tprintf(
"Expanding top of row at %f from %f to %f\n",
1052 swallowed_row =
true;
1053 while (swallowed_row && !row_it.at_first ()) {
1054 swallowed_row =
false;
1056 test_row = row_it.data_relative (-1);
1057 if (test_row->
min_y () < y_top) {
1058 if (test_row->
max_y () < y_top) {
1062 blob_it.set_to_list (row->
blob_list ());
1063 #ifndef GRAPHICS_DISABLED
1071 blob_it.add_list_after (test_row->
blob_list ());
1073 delete row_it.extract ();
1075 swallowed_row =
true;
1077 else if (test_row->
min_y () < y_max) {
1079 y_top = test_row->
min_y ();
1081 tprintf(
"Truncating limit to %f due to touching row at %f\n",
1087 tprintf(
"Not expanding limit beyond %f due to touching row at %f\n",
1098 while (!row_it.at_last ());
◆ fill_heights()
void fill_heights |
( |
TO_ROW * |
row, |
|
|
float |
gradient, |
|
|
int |
min_height, |
|
|
int |
max_height, |
|
|
STATS * |
heights, |
|
|
STATS * |
floating_heights |
|
) |
| |
Definition at line 1406 of file makerow.cpp.
1414 if (blob_it.empty())
return;
1415 bool has_rep_chars =
1418 blob = blob_it.data();
1427 top -= gradient * xcentre + row->
parallel_c();
1428 if (top >= min_height && top <= max_height) {
1429 heights->
add(static_cast<int32_t>(floor(top + 0.5)), 1);
1431 floating_heights->
add(static_cast<int32_t>(floor(top + 0.5)), 1);
1439 while (!blob_it.at_first() &&
1440 blob_it.data()->repeated_set() == repeated_set) {
1443 tprintf(
"Skipping repeated char when computing xheight\n");
1448 }
while (!blob_it.at_first());
◆ find_best_dropout_row()
bool find_best_dropout_row |
( |
TO_ROW * |
row, |
|
|
int32_t |
distance, |
|
|
float |
dist_limit, |
|
|
int32_t |
line_index, |
|
|
TO_ROW_IT * |
row_it, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 652 of file makerow.cpp.
667 tprintf (
"Row at %g(%g), dropout dist=%d,",
677 if (abs_dist > dist_limit) {
679 tprintf (
" too far - deleting\n");
683 if ((distance < 0 && !row_it->at_last ())
684 || (
distance >= 0 && !row_it->at_first ())) {
685 row_offset = row_inc;
687 next_row = row_it->data_relative (row_offset);
688 next_index = static_cast<int32_t>(floor (next_row->
intercept ()));
690 && next_index < line_index
693 && next_index > line_index
696 tprintf (
" nearer neighbour (%d) at %g\n",
702 else if (next_index == line_index
706 tprintf (
" equal but more believable at %g (%g/%g)\n",
714 row_offset += row_inc;
716 while ((next_index == line_index
718 && row_offset < row_it->length ());
◆ fit_lms_line()
void fit_lms_line |
( |
TO_ROW * |
row | ) |
|
Definition at line 266 of file makerow.cpp.
271 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
272 const TBOX& box = blob_it.data()->bounding_box();
275 double error = lms.
Fit(&m, &c);
◆ fit_parallel_lms()
void fit_parallel_lms |
( |
float |
gradient, |
|
|
TO_ROW * |
row |
|
) |
| |
Definition at line 1973 of file makerow.cpp.
1980 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1981 if (!blob_it.data()->joined_to_prev()) {
1982 const TBOX& box = blob_it.data()->bounding_box();
1990 error = lms.
Fit(&gradient, &c);
◆ fit_parallel_rows()
void fit_parallel_rows |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_edge, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1931 of file makerow.cpp.
1938 #ifndef GRAPHICS_DISABLED
1941 TO_ROW_IT row_it = block->
get_rows ();
1943 row_it.move_to_first ();
1944 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1945 if (row_it.data ()->blob_list ()->empty ())
1946 delete row_it.extract ();
1950 #ifndef GRAPHICS_DISABLED
1953 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1955 block_edge, colour, rotation);
1956 colour = static_cast<ScrollView::Color>(colour + 1);
◆ linear_spline_baseline()
double* linear_spline_baseline |
( |
TO_ROW * |
row, |
|
|
TO_BLOCK * |
block, |
|
|
int32_t & |
segments, |
|
|
int32_t |
xstarts[] |
|
) |
| |
Definition at line 2187 of file makerow.cpp.
2196 int blobs_per_segment;
2200 BLOBNBOX_IT blob_it = row->
blob_list ();
2201 BLOBNBOX_IT new_it = blob_it;
2207 xstarts[0] = box.
left ();
2209 while (!blob_it.at_first ()) {
2216 blobs_per_segment = blobcount / segments;
2218 auto *coeffs =
new double[segments * 3];
2221 (
"Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
2222 blobcount, box.
left (), box.
bottom (), segments, blobs_per_segment);
2224 for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
2229 blobindex += blobs_per_segment;
2231 while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2233 int middle = (box.
left() + box.
right()) / 2;
2236 if (index1 == blobindex - blobs_per_segment / 2
2237 || index1 == blobcount - 1) {
2238 xstarts[segment] = box.
left ();
2242 coeffs[segment * 3 - 3] = 0;
2243 coeffs[segment * 3 - 2] = b;
2244 coeffs[segment * 3 - 1] = c;
2246 if (segment > segments)
2249 blobindex += blobs_per_segment;
2251 while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2253 int middle = (new_box.
left() + new_box.
right()) / 2;
2256 if (index2 == blobindex - blobs_per_segment / 2
2257 || index2 == blobcount - 1) {
2258 xstarts[segment] = new_box.
left ();
2262 coeffs[segment * 3 - 3] = 0;
2263 coeffs[segment * 3 - 2] = b;
2264 coeffs[segment * 3 - 1] = c;
2267 while (segment <= segments);
◆ make_baseline_spline()
Definition at line 2056 of file makerow.cpp.
2062 auto *xstarts =
new int32_t[row->
blob_list()->length() + 1];
2067 xstarts[1] = xstarts[segments];
2069 coeffs =
new double[3];
2071 coeffs[1] = row->
line_m ();
2072 coeffs[2] = row->
line_c ();
◆ make_initial_textrows()
Definition at line 226 of file makerow.cpp.
232 TO_ROW_IT row_it = block->
get_rows ();
234 #ifndef GRAPHICS_DISABLED
244 row_it.move_to_first ();
245 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
247 #ifndef GRAPHICS_DISABLED
250 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
252 colour = static_cast<ScrollView::Color>(colour + 1);
◆ make_rows()
float make_rows |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK_LIST * |
port_blocks |
|
) |
| |
Definition at line 200 of file makerow.cpp.
203 TO_BLOCK_IT block_it;
205 block_it.set_to_list(port_blocks);
206 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
212 block_it.set_to_list(port_blocks);
213 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
215 block_it.data()->block->pdblk.bounding_box().left(),
◆ make_single_row()
float make_single_row |
( |
ICOORD |
page_tr, |
|
|
bool |
allow_sub_blobs, |
|
|
TO_BLOCK * |
block, |
|
|
TO_BLOCK_LIST * |
blocks |
|
) |
| |
Definition at line 163 of file makerow.cpp.
165 BLOBNBOX_IT blob_it = &block->
blobs;
166 TO_ROW_IT row_it = block->
get_rows();
172 if (block->
blobs.singleton() && allow_sub_blobs) {
173 blob_it.move_to_first();
174 float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
177 }
else if (block->
blobs.empty()) {
182 blob_it.add_after_then_move(bblob);
184 MakeRowFromBlobs(block->
line_size, &blob_it, &row_it);
186 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
◆ mark_repeated_chars()
void mark_repeated_chars |
( |
TO_ROW * |
row | ) |
|
Definition at line 2639 of file makerow.cpp.
2641 int num_repeated_sets = 0;
2642 if (!box_it.empty()) {
2645 int repeat_length = 1;
2648 BLOBNBOX_IT test_it(box_it);
2649 for (test_it.forward(); !test_it.at_first();) {
2650 bblob = test_it.data();
2654 bblob = test_it.data();
2663 num_repeated_sets++;
2664 for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2665 bblob = box_it.data();
2672 }
while (!box_it.at_first());
◆ most_overlapping_row()
OVERLAP_STATE most_overlapping_row |
( |
TO_ROW_IT * |
row_it, |
|
|
TO_ROW *& |
best_row, |
|
|
float |
top, |
|
|
float |
bottom, |
|
|
float |
rowsize, |
|
|
bool |
testing_blob |
|
) |
| |
Definition at line 2478 of file makerow.cpp.
2489 float merge_top, merge_bottom;
2493 BLOBNBOX_IT blob_it;
2496 row = row_it->data ();
2497 bestover = top - bottom;
2498 if (top > row->
max_y ())
2499 bestover -= top - row->
max_y ();
2500 if (bottom < row->min_y ())
2502 bestover -= row->
min_y () - bottom;
2504 tprintf(
"Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
2505 bottom, top, row->
min_y(), row->
max_y(), rowsize, bestover);
2509 if (!row_it->at_last ()) {
2511 test_row = row_it->data ();
2512 if (test_row->
min_y () <= top && test_row->
max_y () >= bottom) {
2514 test_row->
max_y () >
2517 test_row->
min_y () <
2519 if (merge_top - merge_bottom <= rowsize) {
2521 tprintf (
"Merging rows at (%g,%g), (%g,%g)\n",
2525 test_row->
set_limits (merge_bottom, merge_top);
2526 blob_it.set_to_list (test_row->
blob_list ());
2527 blob_it.add_list_after (row->
blob_list ());
2529 row_it->backward ();
2530 delete row_it->extract ();
2534 overlap = top - bottom;
2535 if (top > test_row->
max_y ())
2536 overlap -= top - test_row->
max_y ();
2537 if (bottom < test_row->min_y ())
2538 overlap -= test_row->
min_y () - bottom;
2539 if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2542 if (overlap > bestover) {
2547 tprintf(
"Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
2548 bottom, top, test_row->
min_y(), test_row->
max_y(),
2549 rowsize, overlap, bestover);
2554 while (!row_it->at_last ()
2555 && test_row->
min_y () <= top && test_row->
max_y () >= bottom);
2556 while (row_it->data () != row)
2557 row_it->backward ();
2559 if (top - bottom - bestover > rowsize * textord_overlap_x &&
◆ pre_associate_blobs()
Definition at line 1845 of file makerow.cpp.
1851 #ifndef GRAPHICS_DISABLED
1858 BLOBNBOX_IT blob_it;
1859 BLOBNBOX_IT start_it;
1860 TO_ROW_IT row_it = block->
get_rows ();
1862 #ifndef GRAPHICS_DISABLED
1866 blob_rotation =
FCOORD (rotation.
x (), -rotation.
y ());
1867 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1869 blob_it.set_to_list (row_it.data ()->blob_list ());
1870 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1871 blob_it.forward ()) {
1872 blob = blob_it.data ();
1885 if (!blob_it.at_last ()) {
1886 nextblob = blob_it.data_relative(1);
1889 blob->
merge(nextblob);
1896 blob->
chop (&start_it, &blob_it,
1902 #ifndef GRAPHICS_DISABLED
1907 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1908 blob_it.forward ()) {
1909 blob = blob_it.data ();
1911 blob_box.
rotate (rotation);
1914 blob_box.
right (), blob_box.
top ());
1917 colour = static_cast<ScrollView::Color>(colour + 1);
◆ row_spacing_order()
int row_spacing_order |
( |
const void * |
item1, |
|
|
const void * |
item2 |
|
) |
| |
Definition at line 2617 of file makerow.cpp.
2621 const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
2623 const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
◆ row_y_order()
int row_y_order |
( |
const void * |
item1, |
|
|
const void * |
item2 |
|
) |
| |
Definition at line 2595 of file makerow.cpp.
2599 const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
2601 const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
◆ segment_baseline()
bool segment_baseline |
( |
TO_ROW * |
row, |
|
|
TO_BLOCK * |
block, |
|
|
int32_t & |
segments, |
|
|
int32_t * |
xstarts |
|
) |
| |
Definition at line 2088 of file makerow.cpp.
2104 BLOBNBOX_IT blob_it = row->
blob_list ();
2105 BLOBNBOX_IT new_it = blob_it;
2108 needs_curve =
false;
2110 xstarts[0] = box.
left ();
2112 blobcount = row->
blob_list ()->length ();
2114 tprintf (
"Segmenting baseline of %d blobs at (%d,%d)\n",
2118 blob_it.move_to_last ();
2119 box = blob_it.data ()->bounding_box ();
2120 xstarts[1] = box.
right ();
2124 new_it.mark_cycle_pt ();
2127 middle = (new_box.
left () + new_box.
right ()) / 2.0;
2130 yshifts.
add (yshift, blobindex);
2131 if (new_it.cycled_list ()) {
2132 xstarts[1] = new_box.
right ();
2153 xstarts[segments++] = box.
left ();
2159 middle = (new_box.
left () + new_box.
right ()) / 2.0;
2161 yshifts.
add (yshift, blobindex);
2165 while (!new_it.cycled_list ());
2167 xstarts[segments] = new_box.
right ();
2170 xstarts[--segments] = new_box.
right ();
2173 tprintf (
"Made %d segments on row at (%d,%d)\n",
◆ separate_underlines()
void separate_underlines |
( |
TO_BLOCK * |
block, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1772 of file makerow.cpp.
1783 BLOBNBOX_IT blob_it;
1787 TO_ROW_IT row_it = block->
get_rows();
1792 length = sqrt(1 + gradient * gradient);
1793 g_vec =
FCOORD(1 / length, -gradient / length);
1794 blob_rotation =
FCOORD(rotation.
x(), -rotation.
y());
1795 blob_rotation.
rotate(g_vec);
1796 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1797 row = row_it.data();
1800 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
1801 blob_it.forward()) {
1802 blob = blob_it.data();
1810 rotated_blob, static_cast<int16_t>(row->
intercept()),
1811 static_cast<int16_t>(
1815 under_it.add_after_then_move(blob_it.extract());
1817 tprintf(
"Underlined blob at:");
1818 rotated_blob->bounding_box().print();
1822 }
else if (CountOverlaps(blob->
bounding_box(), min_blob_height,
1824 textord_max_blob_overlaps) {
1825 large_it.add_after_then_move(blob_it.extract());
1827 tprintf(
"Large blob overlaps %d blobs at:",
1828 CountOverlaps(blob_box, min_blob_height,
1833 delete rotated_blob;
◆ vigorous_noise_removal()
void vigorous_noise_removal |
( |
TO_BLOCK * |
block | ) |
|
Definition at line 466 of file makerow.cpp.
467 TO_ROW_IT row_it = block->
get_rows ();
468 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
469 TO_ROW* row = row_it.data();
473 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
478 STATS hstats(0, max_height + 1);
479 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
485 float xheight = hstats.median();
488 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
493 if (prev !=
nullptr) {
494 if (dot_of_i(blob, prev, row))
497 if (!b_it.at_last()) {
498 BLOBNBOX* next = b_it.data_relative(1);
499 if (dot_of_i(blob, next, row))
503 delete blob->
cblob();
504 delete b_it.extract();
◆ kMinLeaderCount
const int kMinLeaderCount = 5 |
◆ kMinSize
◆ kNoiseSize
const double kNoiseSize = 0.5 |
◆ textord_ascheight_mode_fraction
double textord_ascheight_mode_fraction = 0.08 |
"Min pile height to make ascheight"
Definition at line 91 of file makerow.cpp.
◆ textord_ascx_ratio_max
double textord_ascx_ratio_max = 1.8 |
◆ textord_ascx_ratio_min
double textord_ascx_ratio_min = 1.25 |
◆ textord_chop_width
double textord_chop_width = 1.5 |
"Max width before chopping"
Definition at line 76 of file makerow.cpp.
◆ textord_debug_blob
bool textord_debug_blob = false |
"Print test blob information"
Definition at line 101 of file makerow.cpp.
◆ textord_debug_xheights
bool textord_debug_xheights = false |
"Test xheight algorithms"
Definition at line 55 of file makerow.cpp.
◆ textord_descx_ratio_max
double textord_descx_ratio_max = 0.6 |
◆ textord_descx_ratio_min
double textord_descx_ratio_min = 0.25 |
◆ textord_excess_blobsize
double textord_excess_blobsize = 1.3 |
"New row made if blob makes row this big"
Definition at line 83 of file makerow.cpp.
◆ textord_fix_makerow_bug
bool textord_fix_makerow_bug = true |
"Prevent multiple baselines"
Definition at line 54 of file makerow.cpp.
◆ textord_fix_xheight_bug
bool textord_fix_xheight_bug = true |
◆ textord_heavy_nr
bool textord_heavy_nr = false |
"Vigorously remove noise"
Definition at line 42 of file makerow.cpp.
◆ textord_linespace_iqrlimit
double textord_linespace_iqrlimit = 0.2 |
"Max iqr/median for linespace"
Definition at line 74 of file makerow.cpp.
◆ textord_lms_line_trials
int textord_lms_line_trials = 12 |
"Number of linew fits to do"
Definition at line 99 of file makerow.cpp.
◆ textord_min_blob_height_fraction
double textord_min_blob_height_fraction = 0.75 |
"Min blob height/top to include blob top into xheight stats"
Definition at line 87 of file makerow.cpp.
◆ textord_min_blobs_in_row
int textord_min_blobs_in_row = 4 |
"Min blobs before gradient counted"
Definition at line 62 of file makerow.cpp.
◆ textord_min_linesize
double textord_min_linesize = 1.25 |
"* blob height for initial linesize"
Definition at line 81 of file makerow.cpp.
◆ textord_min_xheight
int textord_min_xheight = 10 |
"Min credible pixel xheight"
Definition at line 67 of file makerow.cpp.
◆ textord_minxh
double textord_minxh = 0.25 |
"fraction of linesize for min xheight"
Definition at line 80 of file makerow.cpp.
◆ textord_new_initial_xheight
bool textord_new_initial_xheight = true |
"Use test xheight mechanism"
Definition at line 100 of file makerow.cpp.
◆ textord_occupancy_threshold
double textord_occupancy_threshold = 0.4 |
"Fraction of neighbourhood"
Definition at line 84 of file makerow.cpp.
◆ textord_old_baselines
bool textord_old_baselines = true |
"Use old baseline algorithm"
Definition at line 51 of file makerow.cpp.
◆ textord_old_xheight
bool textord_old_xheight = false |
"Use old xheight algorithm"
Definition at line 52 of file makerow.cpp.
◆ textord_parallel_baselines
bool textord_parallel_baselines = true |
"Force parallel baselines"
Definition at line 49 of file makerow.cpp.
◆ textord_show_expanded_rows
bool textord_show_expanded_rows = false |
"Display rows after expanding"
Definition at line 45 of file makerow.cpp.
◆ textord_show_final_blobs
bool textord_show_final_blobs = false |
"Display blob bounds after pre-ass"
Definition at line 47 of file makerow.cpp.
◆ textord_show_final_rows
bool textord_show_final_rows = false |
"Display rows after final fitting"
Definition at line 46 of file makerow.cpp.
◆ textord_show_initial_rows
bool textord_show_initial_rows = false |
"Display row accumulation"
Definition at line 43 of file makerow.cpp.
◆ textord_show_parallel_rows
bool textord_show_parallel_rows = false |
"Display page correlated rows"
Definition at line 44 of file makerow.cpp.
◆ textord_skew_ile
double textord_skew_ile = 0.5 |
"Ile of gradients for page skew"
Definition at line 72 of file makerow.cpp.
◆ textord_skew_lag
double textord_skew_lag = 0.02 |
"Lag for skew on row accumulation"
Definition at line 73 of file makerow.cpp.
◆ textord_spline_medianwin
int textord_spline_medianwin = 6 |
"Size of window for spline segmentation"
Definition at line 64 of file makerow.cpp.
◆ textord_spline_minblobs
int textord_spline_minblobs = 8 |
"Min blobs in each spline segment"
Definition at line 63 of file makerow.cpp.
◆ textord_spline_outlier_fraction
double textord_spline_outlier_fraction = 0.1 |
"Fraction of line spacing for outlier"
Definition at line 71 of file makerow.cpp.
◆ textord_spline_shift_fraction
double textord_spline_shift_fraction = 0.02 |
"Fraction of line spacing for quad"
Definition at line 69 of file makerow.cpp.
◆ textord_straight_baselines
bool textord_straight_baselines = false |
"Force straight baselines"
Definition at line 50 of file makerow.cpp.
◆ textord_test_landscape
bool textord_test_landscape = false |
"Tests refer to land/port"
Definition at line 48 of file makerow.cpp.
◆ textord_test_x
int textord_test_x = -INT32_MAX |
◆ textord_test_y
int textord_test_y = -INT32_MAX |
◆ textord_underline_width
double textord_underline_width = 2.0 |
"Multiple of line_size for underline"
Definition at line 85 of file makerow.cpp.
◆ textord_width_limit
double textord_width_limit = 8 |
"Max width of blobs to make rows"
Definition at line 75 of file makerow.cpp.
◆ textord_xheight_error_margin
double textord_xheight_error_margin = 0.1 |
◆ textord_xheight_mode_fraction
double textord_xheight_mode_fraction = 0.4 |
"Min pile height to make xheight"
Definition at line 89 of file makerow.cpp.
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
double textord_ascx_ratio_max
bool textord_fix_xheight_bug
void set_num_repeated_sets(int num_sets)
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
int32_t get_total() const
bool textord_show_parallel_rows
BLOBNBOX_LIST small_blobs
double textord_linespace_iqrlimit
static C_BLOB * FakeBlob(const TBOX &box)
int32_t min_bucket() const
bool textord_fix_makerow_bug
static const double kXHeightFraction
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
ScrollView * create_to_win(ICOORD page_tr)
void fit_parallel_lms(float gradient, TO_ROW *row)
BLOBNBOX_LIST noise_blobs
bool test_underline(bool testing_on, C_BLOB *blob, int16_t baseline, int16_t xheight)
int32_t compute_height_modes(STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
void set_repeated_set(int set_id)
double textord_chop_width
void set_parallel_line(float gradient, float new_c, float new_error)
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
bool textord_straight_baselines
void compute_line_occupation(TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
bool textord_test_landscape
double textord_spline_shift_fraction
bool contains(const FCOORD pt) const
int32_t max_bucket() const
int32_t pile_count(int32_t value) const
double textord_occupancy_threshold
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
void DrawTo(int x, int y)
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
bool textord_debug_xheights
void fit_lms_line(TO_ROW *row)
double textord_descx_ratio_max
int textord_spline_minblobs
void rotate(const FCOORD &vec)
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
double textord_underline_width
double textord_excess_blobsize
bool textord_show_final_blobs
int blob_x_order(const void *item1, const void *item2)
ROW_CATEGORY get_row_category(const TO_ROW *row)
bool textord_parallel_baselines
double textord_xheight_mode_fraction
float initial_min_y() const
bool rep_chars_marked() const
PDBLK pdblk
Page Description Block.
BLOBNBOX_LIST large_blobs
void compute_row_stats(TO_BLOCK *block, bool testing_on)
bool major_x_overlap(const TBOX &box) const
int textord_spline_medianwin
void compute_dropout_distances(int32_t *occupation, int32_t *thresholds, int32_t line_count)
bool within_error_margin(float test, float num, float margin)
int textord_lms_line_trials
const int kMinLeaderCount
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
void adjust_row_limits(TO_BLOCK *block)
int num_repeated_sets() const
bool joined_to_prev() const
bool textord_show_initial_rows
int textord_min_blobs_in_row
double textord_min_blob_height_fraction
double textord_descx_ratio_min
static const double kAscenderFraction
bool segment_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
static const double kDescenderFraction
void compute_occupation_threshold(int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
void rotate(const FCOORD vec)
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
const TBOX & bounding_box() const
bool textord_show_final_rows
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
int row_y_order(const void *item1, const void *item2)
bool textord_show_expanded_rows
bool find_best_dropout_row(TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
void merge(BLOBNBOX *nextblob)
BlobTextFlowType flow() const
float believability() const
void add(int32_t value, int32_t count)
bool textord_new_initial_xheight
double Fit(ICOORD *pt1, ICOORD *pt2)
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
void Add(const ICOORD &pt)
DLLSYM void tprintf(const char *format,...)
void add(float value, int32_t key)
double textord_ascx_ratio_min
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
void set_line(float new_m, float new_c, float new_error)
void SetCursor(int x, int y)
void Rectangle(int x1, int y1, int x2, int y2)
void set_limits(float new_min, float new_max)
int row_spacing_order(const void *item1, const void *item2)
double textord_xheight_error_margin
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
BLOBNBOX_LIST * blob_list()
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
void draw_occupation(int32_t xleft, int32_t ybottom, int32_t min_y, int32_t max_y, int32_t occupation[], int32_t thresholds[])
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
double textord_ascheight_mode_fraction