tesseract  4.0.0-1-g2a2b
makerow.cpp File Reference
#include <vector>
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"
#include <algorithm>

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define MAX_HEIGHT_MODES   12
 

Functions

make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
 
make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)
 
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. TRUE is returned if the row should be deleted.

bool find_best_dropout_row (TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
 
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
 
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
 
void compute_occupation_threshold (int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
 
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (int32_t *occupation, int32_t *thresholds, int32_t line_count)
 
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
void adjust_row_limits (TO_BLOCK *block)
 
compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, bool testing_on)
 
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
 
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, ‘, ", ’, etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
 
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

int32_t compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
 
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

int32_t compute_height_modes (STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
 
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
 
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
 
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)
 
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
 
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return TRUE if enough blobs were far enough away to need a quadratic.

bool segment_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
 
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns
TRUE if enough blobs were far enough away to need a quadratic.
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
 
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
 
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
 
blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)
 
row_y_order

Sort function to sort rows in y from page top.

int row_y_order (const void *item1, const void *item2)
 
row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

int row_spacing_order (const void *item1, const void *item2)
 
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)
 

Variables

bool textord_heavy_nr = FALSE
 
bool textord_show_initial_rows = FALSE
 
bool textord_show_parallel_rows = FALSE
 
bool textord_show_expanded_rows = FALSE
 
bool textord_show_final_rows = FALSE
 
bool textord_show_final_blobs = FALSE
 
bool textord_test_landscape = FALSE
 
bool textord_parallel_baselines = TRUE
 
bool textord_straight_baselines = FALSE
 
bool textord_old_baselines = TRUE
 
bool textord_old_xheight = FALSE
 
bool textord_fix_xheight_bug = TRUE
 
bool textord_fix_makerow_bug = TRUE
 
bool textord_debug_xheights = FALSE
 
bool textord_biased_skewcalc = TRUE
 
bool textord_interpolating_skew = TRUE
 
int textord_skewsmooth_offset = 4
 
int textord_skewsmooth_offset2 = 1
 
int textord_test_x = -INT32_MAX
 
int textord_test_y = -INT32_MAX
 
int textord_min_blobs_in_row = 4
 
int textord_spline_minblobs = 8
 
int textord_spline_medianwin = 6
 
int textord_max_blob_overlaps = 4
 
int textord_min_xheight = 10
 
double textord_spline_shift_fraction = 0.02
 
double textord_spline_outlier_fraction = 0.1
 
double textord_skew_ile = 0.5
 
double textord_skew_lag = 0.02
 
double textord_linespace_iqrlimit = 0.2
 
double textord_width_limit = 8
 
double textord_chop_width = 1.5
 
double textord_expansion_factor = 1.0
 
double textord_overlap_x = 0.375
 
double textord_minxh = 0.25
 
double textord_min_linesize = 1.25
 
double textord_excess_blobsize = 1.3
 
double textord_occupancy_threshold = 0.4
 
double textord_underline_width = 2.0
 
double textord_min_blob_height_fraction = 0.75
 
double textord_xheight_mode_fraction = 0.4
 
double textord_ascheight_mode_fraction = 0.08
 
double textord_descheight_mode_fraction = 0.08
 
double textord_ascx_ratio_min = 1.25
 
double textord_ascx_ratio_max = 1.8
 
double textord_descx_ratio_min = 0.25
 
double textord_descx_ratio_max = 0.6
 
double textord_xheight_error_margin = 0.1
 
int textord_lms_line_trials = 12
 
bool textord_new_initial_xheight = TRUE
 
bool textord_debug_blob = FALSE
 
const int kMinLeaderCount = 5
 

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double kNoiseSize = 0.5
 
const int kMinSize = 8
 
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
 
void vigorous_noise_removal (TO_BLOCK *block)
 
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 

Macro Definition Documentation

◆ MAX_HEIGHT_MODES

#define MAX_HEIGHT_MODES   12

Definition at line 104 of file makerow.cpp.

Function Documentation

◆ adjust_row_limits()

void adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1109 of file makerow.cpp.

1111  {
1112  TO_ROW *row; //current row
1113  float size; //size of row
1114  float ymax; //top of row
1115  float ymin; //bottom of row
1116  TO_ROW_IT row_it = block->get_rows ();
1117 
1119  tprintf("Adjusting row limits for block(%d,%d)\n",
1120  block->block->pdblk.bounding_box().left(),
1121  block->block->pdblk.bounding_box().top());
1122  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1123  row = row_it.data ();
1124  size = row->max_y () - row->min_y ();
1126  tprintf("Row at %f has min %f, max %f, size %f\n",
1127  row->intercept(), row->min_y(), row->max_y(), size);
1131  ymax = size * (tesseract::CCStruct::kXHeightFraction +
1134  row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
1135  row->merged = false;
1136  }
1137 }
float intercept() const
Definition: blobbox.h:601
bool textord_show_expanded_rows
Definition: makerow.cpp:46
static const double kDescenderFraction
Definition: ccstruct.h:33
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
static const double kXHeightFraction
Definition: ccstruct.h:34
bool merged
Definition: blobbox.h:658
void set_limits(float new_min, float new_max)
Definition: blobbox.h:635
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float min_y() const
Definition: blobbox.h:574
BLOCK * block
Definition: blobbox.h:790
static const double kAscenderFraction
Definition: ccstruct.h:35
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:60
PDBLK pdblk
Definition: ocrblock.h:192
float max_y() const
Definition: blobbox.h:571

◆ assign_blobs_to_rows()

void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
bool  reject_misses,
bool  make_new_rows,
bool  drawing_skew 
)

Definition at line 2280 of file makerow.cpp.

2287  {
2288  OVERLAP_STATE overlap_result; //what to do with it
2289  float ycoord; //current y
2290  float top, bottom; //of blob
2291  float g_length = 1.0f; //from gradient
2292  int16_t row_count; //no of rows
2293  int16_t left_x; //left edge
2294  int16_t last_x; //previous edge
2295  float block_skew; //y delta
2296  float smooth_factor; //for new coords
2297  float near_dist; //dist to nearest row
2298  ICOORD testpt; //testing only
2299  BLOBNBOX *blob; //current blob
2300  TO_ROW *row; //current row
2301  TO_ROW *dest_row = nullptr; //row to put blob in
2302  //iterators
2303  BLOBNBOX_IT blob_it = &block->blobs;
2304  TO_ROW_IT row_it = block->get_rows ();
2305 
2306  ycoord =
2307  (block->block->pdblk.bounding_box ().bottom () +
2308  block->block->pdblk.bounding_box ().top ()) / 2.0f;
2309  if (gradient != nullptr)
2310  g_length = sqrt (1 + *gradient * *gradient);
2311 #ifndef GRAPHICS_DISABLED
2312  if (drawing_skew)
2313  to_win->SetCursor(block->block->pdblk.bounding_box ().left (), ycoord);
2314 #endif
2315  testpt = ICOORD (textord_test_x, textord_test_y);
2316  blob_it.sort (blob_x_order);
2317  smooth_factor = 1.0;
2318  block_skew = 0.0f;
2319  row_count = row_it.length (); //might have rows
2320  if (!blob_it.empty ()) {
2321  left_x = blob_it.data ()->bounding_box ().left ();
2322  }
2323  else {
2324  left_x = block->block->pdblk.bounding_box ().left ();
2325  }
2326  last_x = left_x;
2327  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
2328  blob = blob_it.data ();
2329  if (gradient != nullptr) {
2330  block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
2331  + *gradient / g_length * blob->bounding_box ().left ();
2332  }
2333  else if (blob->bounding_box ().left () - last_x > block->line_size / 2
2334  && last_x - left_x > block->line_size * 2
2336  // tprintf("Interpolating skew from %g",block_skew);
2337  block_skew *= (float) (blob->bounding_box ().left () - left_x)
2338  / (last_x - left_x);
2339  // tprintf("to %g\n",block_skew);
2340  }
2341  last_x = blob->bounding_box ().left ();
2342  top = blob->bounding_box ().top () - block_skew;
2343  bottom = blob->bounding_box ().bottom () - block_skew;
2344 #ifndef GRAPHICS_DISABLED
2345  if (drawing_skew)
2346  to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
2347 #endif
2348  if (!row_it.empty ()) {
2349  for (row_it.move_to_first ();
2350  !row_it.at_last () && row_it.data ()->min_y () > top;
2351  row_it.forward ());
2352  row = row_it.data ();
2353  if (row->min_y () <= top && row->max_y () >= bottom) {
2354  //any overlap
2355  dest_row = row;
2356  overlap_result = most_overlapping_row (&row_it, dest_row,
2357  top, bottom,
2358  block->line_size,
2359  blob->bounding_box ().
2360  contains (testpt));
2361  if (overlap_result == NEW_ROW && !reject_misses)
2362  overlap_result = ASSIGN;
2363  }
2364  else {
2365  overlap_result = NEW_ROW;
2366  if (!make_new_rows) {
2367  near_dist = row_it.data_relative (-1)->min_y () - top;
2368  //below bottom
2369  if (bottom < row->min_y ()) {
2370  if (row->min_y () - bottom <=
2371  (block->line_spacing -
2373  //done it
2374  overlap_result = ASSIGN;
2375  dest_row = row;
2376  }
2377  }
2378  else if (near_dist > 0
2379  && near_dist < bottom - row->max_y ()) {
2380  row_it.backward ();
2381  dest_row = row_it.data ();
2382  if (dest_row->min_y () - bottom <=
2383  (block->line_spacing -
2385  //done it
2386  overlap_result = ASSIGN;
2387  }
2388  }
2389  else {
2390  if (top - row->max_y () <=
2391  (block->line_spacing -
2392  block->line_size) * (textord_overlap_x +
2394  //done it
2395  overlap_result = ASSIGN;
2396  dest_row = row;
2397  }
2398  }
2399  }
2400  }
2401  if (overlap_result == ASSIGN)
2402  dest_row->add_blob (blob_it.extract (), top, bottom,
2403  block->line_size);
2404  if (overlap_result == NEW_ROW) {
2405  if (make_new_rows && top - bottom < block->max_blob_size) {
2406  dest_row =
2407  new TO_ROW (blob_it.extract (), top, bottom,
2408  block->line_size);
2409  row_count++;
2410  if (bottom > row_it.data ()->min_y ())
2411  row_it.add_before_then_move (dest_row);
2412  //insert in right place
2413  else
2414  row_it.add_after_then_move (dest_row);
2415  smooth_factor =
2416  1.0 / (row_count * textord_skew_lag +
2418  }
2419  else
2420  overlap_result = REJECT;
2421  }
2422  }
2423  else if (make_new_rows && top - bottom < block->max_blob_size) {
2424  overlap_result = NEW_ROW;
2425  dest_row =
2426  new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
2427  row_count++;
2428  row_it.add_after_then_move(dest_row);
2429  smooth_factor = 1.0 / (row_count * textord_skew_lag +
2431  }
2432  else
2433  overlap_result = REJECT;
2434  if (blob->bounding_box ().contains(testpt) && textord_debug_blob) {
2435  if (overlap_result != REJECT) {
2436  tprintf("Test blob assigned to row at (%g,%g) on pass %d\n",
2437  dest_row->min_y(), dest_row->max_y(), pass);
2438  }
2439  else {
2440  tprintf("Test blob assigned to no row on pass %d\n", pass);
2441  }
2442  }
2443  if (overlap_result != REJECT) {
2444  while (!row_it.at_first() &&
2445  row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
2446  row = row_it.extract();
2447  row_it.backward();
2448  row_it.add_before_then_move(row);
2449  }
2450  while (!row_it.at_last() &&
2451  row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
2452  row = row_it.extract();
2453  row_it.forward();
2454  // Keep rows in order.
2455  row_it.add_after_then_move(row);
2456  }
2457  BLOBNBOX_IT added_blob_it(dest_row->blob_list());
2458  added_blob_it.move_to_last();
2459  TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
2460  if (dest_row->blob_list()->singleton() ||
2461  !prev_box.major_x_overlap(blob->bounding_box())) {
2462  block_skew = (1 - smooth_factor) * block_skew
2463  + smooth_factor * (blob->bounding_box().bottom() -
2464  dest_row->initial_min_y());
2465  }
2466  }
2467  }
2468  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
2469  if (row_it.data()->blob_list()->empty())
2470  delete row_it.extract(); // Discard empty rows.
2471  }
2472 }
void DrawTo(int x, int y)
Definition: scrollview.cpp:527
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
Definition: makerow.cpp:2480
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:734
double textord_skew_lag
Definition: makerow.cpp:74
Definition: rect.h:34
Definition: makerow.h:30
int textord_skewsmooth_offset
Definition: makerow.cpp:59
void SetCursor(int x, int y)
Definition: scrollview.cpp:521
static const double kDescenderFraction
Definition: ccstruct.h:33
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2575
float line_spacing
Definition: blobbox.h:792
int16_t left() const
Definition: rect.h:72
OVERLAP_STATE
Definition: makerow.h:28
int16_t top() const
Definition: rect.h:58
integer coordinate
Definition: points.h:32
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:412
bool textord_interpolating_skew
Definition: makerow.cpp:58
int textord_skewsmooth_offset2
Definition: makerow.cpp:60
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float initial_min_y() const
Definition: blobbox.h:580
int textord_test_y
Definition: makerow.cpp:62
float min_y() const
Definition: blobbox.h:574
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
BLOCK * block
Definition: blobbox.h:790
int textord_test_x
Definition: makerow.cpp:61
bool textord_debug_blob
Definition: makerow.cpp:102
static const double kAscenderFraction
Definition: ccstruct.h:35
bool contains(const FCOORD pt) const
Definition: rect.h:333
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:60
const TBOX & bounding_box() const
Definition: blobbox.h:231
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
double textord_overlap_x
Definition: makerow.cpp:80
Definition: makerow.h:31
int16_t bottom() const
Definition: rect.h:65
PDBLK pdblk
Definition: ocrblock.h:192
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
float line_size
Definition: blobbox.h:798
float max_y() const
Definition: blobbox.h:571

◆ blob_x_order()

int blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2575 of file makerow.cpp.

2577  {
2578  //converted ptr
2579  const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX* const*>(item1);
2580  //converted ptr
2581  const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX* const*>(item2);
2582 
2583  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
2584  return -1;
2585  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
2586  return 1;
2587  else
2588  return 0;
2589 }
int16_t left() const
Definition: rect.h:72
const TBOX & bounding_box() const
Definition: blobbox.h:231

◆ cleanup_rows_making()

void cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 518 of file makerow.cpp.

525  {
526  //iterators
527  BLOBNBOX_IT blob_it = &block->blobs;
528  TO_ROW_IT row_it = block->get_rows ();
529 
530 #ifndef GRAPHICS_DISABLED
531  if (textord_show_parallel_rows && testing_on) {
532  if (to_win == nullptr)
533  create_to_win(page_tr);
534  }
535 #endif
536  //get row coords
537  fit_parallel_rows(block,
538  gradient,
539  rotation,
540  block_edge,
541  textord_show_parallel_rows && testing_on);
543  gradient,
544  rotation,
545  block_edge,
546  textord_show_parallel_rows && testing_on);
547  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
548  blob_it.set_to_list (&block->blobs);
549  row_it.set_to_list (block->get_rows ());
550  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
551  blob_it.add_list_after (row_it.data ()->blob_list ());
552  //give blobs back
553  assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);
554  //now new rows must be genuine
555  blob_it.set_to_list (&block->blobs);
556  blob_it.add_list_after (&block->large_blobs);
557  assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
558  //safe to use big ones now
559  blob_it.set_to_list (&block->blobs);
560  //throw all blobs in
561  blob_it.add_list_after (&block->noise_blobs);
562  blob_it.add_list_after (&block->small_blobs);
563  assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
564 }
bool textord_show_parallel_rows
Definition: makerow.cpp:45
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:571
#define TRUE
Definition: capi.h:51
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:951
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
Definition: makerow.cpp:2280
#define FALSE
Definition: capi.h:52
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:1933
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:46
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:788
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:787

◆ compute_dropout_distances()

void compute_dropout_distances ( int32_t *  occupation,
int32_t *  thresholds,
int32_t  line_count 
)

Definition at line 904 of file makerow.cpp.

908  {
909  int32_t line_index; //of thresholds line
910  int32_t distance; //from prev dropout
911  int32_t next_dist; //to next dropout
912  int32_t back_index; //for back filling
913  int32_t prev_threshold; //before overwrite
914 
915  distance = -line_count;
916  line_index = 0;
917  do {
918  do {
919  distance--;
920  prev_threshold = thresholds[line_index];
921  //distance from prev
922  thresholds[line_index] = distance;
923  line_index++;
924  }
925  while (line_index < line_count
926  && (occupation[line_index] < thresholds[line_index]
927  || occupation[line_index - 1] >= prev_threshold));
928  if (line_index < line_count) {
929  back_index = line_index - 1;
930  next_dist = 1;
931  while (next_dist < -distance && back_index >= 0) {
932  thresholds[back_index] = next_dist;
933  back_index--;
934  next_dist++;
935  distance++;
936  }
937  distance = 1;
938  }
939  }
940  while (line_index < line_count);
941 }

◆ compute_height_modes()

int32_t compute_height_modes ( STATS heights,
int32_t  min_height,
int32_t  max_height,
int32_t *  modes,
int32_t  maxmodes 
)

Definition at line 1625 of file makerow.cpp.

1629  { // size of modes
1630  int32_t pile_count; // no in source pile
1631  int32_t src_count; // no of source entries
1632  int32_t src_index; // current entry
1633  int32_t least_count; // height of smalllest
1634  int32_t least_index; // index of least
1635  int32_t dest_count; // index in modes
1636 
1637  src_count = max_height + 1 - min_height;
1638  dest_count = 0;
1639  least_count = INT32_MAX;
1640  least_index = -1;
1641  for (src_index = 0; src_index < src_count; src_index++) {
1642  pile_count = heights->pile_count(min_height + src_index);
1643  if (pile_count > 0) {
1644  if (dest_count < maxmodes) {
1645  if (pile_count < least_count) {
1646  // find smallest in array
1647  least_count = pile_count;
1648  least_index = dest_count;
1649  }
1650  modes[dest_count++] = min_height + src_index;
1651  } else if (pile_count >= least_count) {
1652  while (least_index < maxmodes - 1) {
1653  modes[least_index] = modes[least_index + 1];
1654  // shuffle up
1655  least_index++;
1656  }
1657  // new one on end
1658  modes[maxmodes - 1] = min_height + src_index;
1659  if (pile_count == least_count) {
1660  // new smallest
1661  least_index = maxmodes - 1;
1662  } else {
1663  least_count = heights->pile_count(modes[0]);
1664  least_index = 0;
1665  for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1666  pile_count = heights->pile_count(modes[dest_count]);
1667  if (pile_count < least_count) {
1668  // find smallest
1669  least_count = pile_count;
1670  least_index = dest_count;
1671  }
1672  }
1673  }
1674  }
1675  }
1676  }
1677  return dest_count;
1678 }
int32_t pile_count(int32_t value) const
Definition: statistc.h:78

◆ compute_line_occupation()

void compute_line_occupation ( TO_BLOCK block,
float  gradient,
int32_t  min_y,
int32_t  max_y,
int32_t *  occupation,
int32_t *  deltas 
)

Definition at line 770 of file makerow.cpp.

777  {
778  int32_t line_count; //maxy-miny+1
779  int32_t line_index; //of scan line
780  int index; //array index for daft compilers
781  TO_ROW *row; //current row
782  TO_ROW_IT row_it = block->get_rows ();
783  BLOBNBOX *blob; //current blob
784  BLOBNBOX_IT blob_it; //iterator
785  float length; //of skew vector
786  TBOX blob_box; //bounding box
787  FCOORD rotation; //inverse of skew
788 
789  line_count = max_y - min_y + 1;
790  length = sqrt (gradient * gradient + 1);
791  rotation = FCOORD (1 / length, -gradient / length);
792  for (line_index = 0; line_index < line_count; line_index++)
793  deltas[line_index] = 0;
794  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
795  row = row_it.data ();
796  blob_it.set_to_list (row->blob_list ());
797  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
798  blob_it.forward ()) {
799  blob = blob_it.data ();
800  blob_box = blob->bounding_box ();
801  blob_box.rotate (rotation);//de-skew it
802  int32_t width = blob_box.right() - blob_box.left();
803  index = blob_box.bottom() - min_y;
804  ASSERT_HOST(index >= 0 && index < line_count);
805  // count transitions
806  deltas[index] += width;
807  index = blob_box.top() - min_y;
808  ASSERT_HOST(index >= 0 && index < line_count);
809  deltas[index] -= width;
810  }
811  }
812  occupation[0] = deltas[0];
813  for (line_index = 1; line_index < line_count; line_index++)
814  occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
815 }
void rotate(const FCOORD &vec)
Definition: rect.h:197
Definition: rect.h:34
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
Definition: points.h:189
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ compute_occupation_threshold()

void compute_occupation_threshold ( int32_t  low_window,
int32_t  high_window,
int32_t  line_count,
int32_t *  occupation,
int32_t *  thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 823 of file makerow.cpp.

829  {
830  int32_t line_index; //of thresholds line
831  int32_t low_index; //in occupation
832  int32_t high_index; //in occupation
833  int32_t sum; //current average
834  int32_t divisor; //to get thresholds
835  int32_t min_index; //of min occ
836  int32_t min_occ; //min in locality
837  int32_t test_index; //for finding min
838 
839  divisor =
840  (int32_t) ceil ((low_window + high_window) / textord_occupancy_threshold);
841  if (low_window + high_window < line_count) {
842  for (sum = 0, high_index = 0; high_index < low_window; high_index++)
843  sum += occupation[high_index];
844  for (low_index = 0; low_index < high_window; low_index++, high_index++)
845  sum += occupation[high_index];
846  min_occ = occupation[0];
847  min_index = 0;
848  for (test_index = 1; test_index < high_index; test_index++) {
849  if (occupation[test_index] <= min_occ) {
850  min_occ = occupation[test_index];
851  min_index = test_index; //find min in region
852  }
853  }
854  for (line_index = 0; line_index < low_window; line_index++)
855  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
856  //same out to end
857  for (low_index = 0; high_index < line_count; low_index++, high_index++) {
858  sum -= occupation[low_index];
859  sum += occupation[high_index];
860  if (occupation[high_index] <= min_occ) {
861  //find min in region
862  min_occ = occupation[high_index];
863  min_index = high_index;
864  }
865  //lost min from region
866  if (min_index <= low_index) {
867  min_occ = occupation[low_index + 1];
868  min_index = low_index + 1;
869  for (test_index = low_index + 2; test_index <= high_index;
870  test_index++) {
871  if (occupation[test_index] <= min_occ) {
872  min_occ = occupation[test_index];
873  //find min in region
874  min_index = test_index;
875  }
876  }
877  }
878  thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
879  }
880  }
881  else {
882  min_occ = occupation[0];
883  min_index = 0;
884  for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
885  if (occupation[low_index] < min_occ) {
886  min_occ = occupation[low_index];
887  min_index = low_index;
888  }
889  sum += occupation[low_index];
890  }
891  line_index = 0;
892  }
893  for (; line_index < line_count; line_index++)
894  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
895  //same out to end
896 }
double textord_occupancy_threshold
Definition: makerow.cpp:85

◆ compute_page_skew()

void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 287 of file makerow.cpp.

291  {
292  int32_t row_count; //total rows
293  int32_t blob_count; //total_blobs
294  int32_t row_err; //integer error
295  int32_t row_index; //of total
296  TO_ROW *row; //current row
297  TO_BLOCK_IT block_it = blocks; //iterator
298 
299  row_count = 0;
300  blob_count = 0;
301  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
302  block_it.forward ()) {
303  POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
304  if (pb != nullptr && !pb->IsText())
305  continue; // Pretend non-text blocks don't exist.
306  row_count += block_it.data ()->get_rows ()->length ();
307  //count up rows
308  TO_ROW_IT row_it(block_it.data()->get_rows());
309  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
310  blob_count += row_it.data ()->blob_list ()->length ();
311  }
312  if (row_count == 0) {
313  page_m = 0.0f;
314  page_err = 0.0f;
315  return;
316  }
317  // of rows
318  std::vector<float> gradients(blob_count);
319  // of rows
320  std::vector<float> errors(blob_count);
321 
322  row_index = 0;
323  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
324  block_it.forward ()) {
325  POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
326  if (pb != nullptr && !pb->IsText())
327  continue; // Pretend non-text blocks don't exist.
328  TO_ROW_IT row_it(block_it.data ()->get_rows());
329  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
330  row = row_it.data ();
331  blob_count = row->blob_list ()->length ();
332  row_err = (int32_t) ceil (row->line_error ());
333  if (row_err <= 0)
334  row_err = 1;
336  blob_count /= row_err;
337  for (blob_count /= row_err; blob_count > 0; blob_count--) {
338  gradients[row_index] = row->line_m ();
339  errors[row_index] = row->line_error ();
340  row_index++;
341  }
342  }
343  else if (blob_count >= textord_min_blobs_in_row) {
344  //get gradient
345  gradients[row_index] = row->line_m ();
346  errors[row_index] = row->line_error ();
347  row_index++;
348  }
349  }
350  }
351  if (row_index == 0) {
352  //desperate
353  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
354  block_it.forward ()) {
355  POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
356  if (pb != nullptr && !pb->IsText())
357  continue; // Pretend non-text blocks don't exist.
358  TO_ROW_IT row_it(block_it.data()->get_rows());
359  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
360  row_it.forward ()) {
361  row = row_it.data ();
362  gradients[row_index] = row->line_m ();
363  errors[row_index] = row->line_error ();
364  row_index++;
365  }
366  }
367  }
368  row_count = row_index;
369  row_index = choose_nth_item ((int32_t) (row_count * textord_skew_ile),
370  &gradients[0], row_count);
371  page_m = gradients[row_index];
372  row_index = choose_nth_item ((int32_t) (row_count * textord_skew_ile),
373  &errors[0], row_count);
374  page_err = errors[row_index];
375 }
int textord_min_blobs_in_row
Definition: makerow.cpp:63
float line_m() const
Definition: blobbox.h:583
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
Definition: statistc.cpp:637
bool textord_biased_skewcalc
Definition: makerow.cpp:57
double textord_skew_ile
Definition: makerow.cpp:73
bool IsText() const
Definition: polyblk.h:49
float line_error() const
Definition: blobbox.h:589
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ compute_row_descdrop()

int32_t compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS asc_heights 
)

Definition at line 1565 of file makerow.cpp.

1566  {
1567  // Count how many potential ascenders are in this row.
1568  int i_min = asc_heights->min_bucket();
1569  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
1570  i_min = static_cast<int>(
1571  floor(row->xheight * textord_ascx_ratio_min + 0.5));
1572  }
1573  int i_max = asc_heights->max_bucket();
1574  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
1575  i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
1576  }
1577  int num_potential_asc = 0;
1578  for (int i = i_min; i <= i_max; ++i) {
1579  num_potential_asc += asc_heights->pile_count(i);
1580  }
1581  int32_t min_height =
1582  static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_min + 0.5));
1583  int32_t max_height =
1584  static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_max));
1585  float xcentre; // centre of blob
1586  float height; // height of blob
1587  BLOBNBOX_IT blob_it = row->blob_list();
1588  BLOBNBOX *blob; // current blob
1589  STATS heights (min_height, max_height + 1);
1590  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1591  blob = blob_it.data();
1592  if (!blob->joined_to_prev()) {
1593  xcentre = (blob->bounding_box().left() +
1594  blob->bounding_box().right()) / 2.0f;
1595  height = (gradient * xcentre + row->parallel_c() -
1596  blob->bounding_box().bottom());
1597  if (height >= min_height && height <= max_height)
1598  heights.add(static_cast<int>(floor(height + 0.5)), 1);
1599  }
1600  }
1601  int blob_index = heights.mode(); // find mode
1602  int blob_count = heights.pile_count(blob_index); // get count of mode
1603  float total_fraction =
1605  if (static_cast<float>(blob_count + num_potential_asc) <
1606  xheight_blob_count * total_fraction) {
1607  blob_count = 0;
1608  }
1609  int descdrop = blob_count > 0 ? -blob_index : 0;
1610  if (textord_debug_xheights) {
1611  tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
1612  descdrop, num_potential_asc, blob_count);
1613  heights.print();
1614  }
1615  return descdrop;
1616 }
int32_t pile_count(int32_t value) const
Definition: statistc.h:78
int32_t min_bucket() const
Definition: statistc.cpp:205
double textord_descx_ratio_max
Definition: makerow.cpp:98
int32_t max_bucket() const
Definition: statistc.cpp:220
double textord_descheight_mode_fraction
Definition: makerow.cpp:94
Definition: statistc.h:33
float xheight
Definition: blobbox.h:670
int16_t left() const
Definition: rect.h:72
bool joined_to_prev() const
Definition: blobbox.h:257
double textord_ascx_ratio_min
Definition: makerow.cpp:95
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float parallel_c() const
Definition: blobbox.h:592
double textord_ascx_ratio_max
Definition: makerow.cpp:96
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
double textord_ascheight_mode_fraction
Definition: makerow.cpp:92
double textord_descx_ratio_min
Definition: makerow.cpp:97
int16_t bottom() const
Definition: rect.h:65
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
bool textord_debug_xheights
Definition: makerow.cpp:56

◆ compute_row_stats()

void compute_row_stats ( TO_BLOCK block,
bool  testing_on 
)

Definition at line 1145 of file makerow.cpp.

1148  {
1149  int32_t row_index; //of median
1150  TO_ROW *row; //current row
1151  TO_ROW *prev_row; //previous row
1152  float iqr; //inter quartile range
1153  TO_ROW_IT row_it = block->get_rows ();
1154  //number of rows
1155  int16_t rowcount = row_it.length ();
1156  // for choose nth
1157  std::vector<TO_ROW*> rows(rowcount);
1158  rowcount = 0;
1159  prev_row = nullptr;
1160  row_it.move_to_last (); //start at bottom
1161  do {
1162  row = row_it.data ();
1163  if (prev_row != nullptr) {
1164  rows[rowcount++] = prev_row;
1165  prev_row->spacing = row->intercept () - prev_row->intercept ();
1166  if (testing_on)
1167  tprintf ("Row at %g yields spacing of %g\n",
1168  row->intercept (), prev_row->spacing);
1169  }
1170  prev_row = row;
1171  row_it.backward ();
1172  }
1173  while (!row_it.at_last ());
1174  block->key_row = prev_row;
1175  block->baseline_offset =
1176  fmod (prev_row->parallel_c (), block->line_spacing);
1177  if (testing_on)
1178  tprintf ("Blob based spacing=(%g,%g), offset=%g",
1179  block->line_size, block->line_spacing, block->baseline_offset);
1180  if (rowcount > 0) {
1181  row_index = choose_nth_item(rowcount * 3 / 4, &rows[0], rowcount,
1182  sizeof (TO_ROW *), row_spacing_order);
1183  iqr = rows[row_index]->spacing;
1184  row_index = choose_nth_item(rowcount / 4, &rows[0], rowcount,
1185  sizeof (TO_ROW *), row_spacing_order);
1186  iqr -= rows[row_index]->spacing;
1187  row_index = choose_nth_item(rowcount / 2, &rows[0], rowcount,
1188  sizeof (TO_ROW *), row_spacing_order);
1189  block->key_row = rows[row_index];
1190  if (testing_on)
1191  tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
1192  if (rowcount > 2
1193  && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
1195  if (rows[row_index]->spacing < block->line_spacing
1196  && rows[row_index]->spacing > block->line_size)
1197  //within range
1198  block->line_size = rows[row_index]->spacing;
1199  //spacing=size
1200  else if (rows[row_index]->spacing > block->line_spacing)
1201  block->line_size = block->line_spacing;
1202  //too big so use max
1203  }
1204  else {
1205  if (rows[row_index]->spacing < block->line_spacing)
1206  block->line_size = rows[row_index]->spacing;
1207  else
1208  block->line_size = block->line_spacing;
1209  //too big so use max
1210  }
1211  if (block->line_size < textord_min_xheight)
1212  block->line_size = (float) textord_min_xheight;
1213  block->line_spacing = rows[row_index]->spacing;
1214  block->max_blob_size =
1216  }
1217  block->baseline_offset = fmod (rows[row_index]->intercept (),
1218  block->line_spacing);
1219  }
1220  if (testing_on)
1221  tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
1222  block->line_size, block->line_spacing, block->baseline_offset);
1223 }
float intercept() const
Definition: blobbox.h:601
bool textord_new_initial_xheight
Definition: makerow.cpp:101
TO_ROW * key_row
Definition: blobbox.h:811
double textord_excess_blobsize
Definition: makerow.cpp:84
int textord_min_xheight
Definition: makerow.cpp:68
float spacing
Definition: blobbox.h:669
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
Definition: statistc.cpp:637
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
float line_spacing
Definition: blobbox.h:792
double textord_linespace_iqrlimit
Definition: makerow.cpp:75
float max_blob_size
Definition: blobbox.h:799
float baseline_offset
Definition: blobbox.h:800
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float parallel_c() const
Definition: blobbox.h:592
int row_spacing_order(const void *item1, const void *item2)
Definition: makerow.cpp:2619
float line_size
Definition: blobbox.h:798

◆ compute_xheight_from_modes()

int compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1469 of file makerow.cpp.

1471  {
1472  int blob_index = heights->mode(); // find mode
1473  int blob_count = heights->pile_count(blob_index); // get count of mode
1474  if (textord_debug_xheights) {
1475  tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
1476  min_height, max_height, blob_index, blob_count,
1477  heights->get_total());
1478  heights->print();
1479  floating_heights->print();
1480  }
1481  if (blob_count == 0) return 0;
1482  int modes[MAX_HEIGHT_MODES]; // biggest piles
1483  bool in_best_pile = FALSE;
1484  int prev_size = -INT32_MAX;
1485  int best_count = 0;
1486  int mode_count = compute_height_modes(heights, min_height, max_height,
1487  modes, MAX_HEIGHT_MODES);
1488  if (cap_only && mode_count > 1)
1489  mode_count = 1;
1490  int x;
1491  if (textord_debug_xheights) {
1492  tprintf("found %d modes: ", mode_count);
1493  for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
1494  tprintf("\n");
1495  }
1496 
1497  for (x = 0; x < mode_count - 1; x++) {
1498  if (modes[x] != prev_size + 1)
1499  in_best_pile = FALSE; // had empty height
1500  int modes_x_count = heights->pile_count(modes[x]) -
1501  floating_heights->pile_count(modes[x]);
1502  if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
1503  (in_best_pile || modes_x_count > best_count)) {
1504  for (int asc = x + 1; asc < mode_count; asc++) {
1505  float ratio =
1506  static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1507  if (textord_ascx_ratio_min < ratio &&
1508  ratio < textord_ascx_ratio_max &&
1509  (heights->pile_count(modes[asc]) >=
1510  blob_count * textord_ascheight_mode_fraction)) {
1511  if (modes_x_count > best_count) {
1512  in_best_pile = true;
1513  best_count = modes_x_count;
1514  }
1515  if (textord_debug_xheights) {
1516  tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
1517  modes[x], modes[asc]-modes[x], modes_x_count, ratio);
1518  }
1519  prev_size = modes[x];
1520  *xheight = static_cast<float>(modes[x]);
1521  *ascrise = static_cast<float>(modes[asc] - modes[x]);
1522  }
1523  }
1524  }
1525  }
1526  if (*xheight == 0) { // single mode
1527  // Remove counts of the "floating" blobs (the one whose height is too
1528  // small in relation to it's top end of the bounding box) from heights
1529  // before computing the single-mode xheight.
1530  // Restore the counts in heights after the mode is found, since
1531  // floating blobs might be useful for determining potential ascenders
1532  // in compute_row_descdrop().
1533  if (floating_heights->get_total() > 0) {
1534  for (x = min_height; x < max_height; ++x) {
1535  heights->add(x, -(floating_heights->pile_count(x)));
1536  }
1537  blob_index = heights->mode(); // find the modified mode
1538  for (x = min_height; x < max_height; ++x) {
1539  heights->add(x, floating_heights->pile_count(x));
1540  }
1541  }
1542  *xheight = static_cast<float>(blob_index);
1543  *ascrise = 0.0f;
1544  best_count = heights->pile_count(blob_index);
1546  tprintf("Single mode xheight set to %g\n", *xheight);
1547  } else if (textord_debug_xheights) {
1548  tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
1549  }
1550  return best_count;
1551 }
int32_t pile_count(int32_t value) const
Definition: statistc.h:78
int32_t compute_height_modes(STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
Definition: makerow.cpp:1625
int32_t mode() const
Definition: statistc.cpp:114
#define FALSE
Definition: capi.h:52
double textord_ascx_ratio_min
Definition: makerow.cpp:95
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
double textord_xheight_mode_fraction
Definition: makerow.cpp:90
double textord_ascx_ratio_max
Definition: makerow.cpp:96
void add(int32_t value, int32_t count)
Definition: statistc.cpp:100
void print() const
Definition: statistc.cpp:533
double textord_ascheight_mode_fraction
Definition: makerow.cpp:92
#define MAX_HEIGHT_MODES
Definition: makerow.cpp:104
int32_t get_total() const
Definition: statistc.h:86
bool textord_debug_xheights
Definition: makerow.cpp:56

◆ correct_row_xheight()

void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1687 of file makerow.cpp.

1688  {
1689  ROW_CATEGORY row_category = get_row_category(row);
1690  if (textord_debug_xheights) {
1691  tprintf("correcting row xheight: row->xheight %.4f"
1692  ", row->acrise %.4f row->descdrop %.4f\n",
1693  row->xheight, row->ascrise, row->descdrop);
1694  }
1695  bool normal_xheight =
1697  bool cap_xheight =
1698  within_error_margin(row->xheight, xheight + ascrise,
1700  // Use the average xheight/ascrise for the following cases:
1701  // -- the xheight of the row could not be determined at all
1702  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
1703  // and its xheight is close to either cap height or average xheight
1704  // -- the row does not have ascenders or descenders, but its xheight
1705  // is close to the average block xheight (e.g. row with "www.mmm.com")
1706  if (row_category == ROW_ASCENDERS_FOUND) {
1707  if (row->descdrop >= 0.0) {
1708  row->descdrop = row->xheight * (descdrop / xheight);
1709  }
1710  } else if (row_category == ROW_INVALID ||
1711  (row_category == ROW_DESCENDERS_FOUND &&
1712  (normal_xheight || cap_xheight)) ||
1713  (row_category == ROW_UNKNOWN && normal_xheight)) {
1714  if (textord_debug_xheights) tprintf("using average xheight\n");
1715  row->xheight = xheight;
1716  row->ascrise = ascrise;
1717  row->descdrop = descdrop;
1718  } else if (row_category == ROW_DESCENDERS_FOUND) {
1719  // Assume this is a row with mostly lowercase letters and it's xheight
1720  // is computed correctly (unfortunately there is no way to distinguish
1721  // this from the case when descenders are found, but the most common
1722  // height is capheight).
1723  if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
1724  row->ascrise = row->xheight * (ascrise / xheight);
1725  } else if (row_category == ROW_UNKNOWN) {
1726  // Otherwise assume this row is an all-caps or small-caps row
1727  // and adjust xheight and ascrise of the row.
1728 
1729  row->all_caps = true;
1730  if (cap_xheight) { // regular all caps
1731  if (textord_debug_xheights) tprintf("all caps\n");
1732  row->xheight = xheight;
1733  row->ascrise = ascrise;
1734  row->descdrop = descdrop;
1735  } else { // small caps or caps with an odd xheight
1736  if (textord_debug_xheights) {
1737  if (row->xheight < xheight + ascrise && row->xheight > xheight) {
1738  tprintf("small caps\n");
1739  } else {
1740  tprintf("all caps with irregular xheight\n");
1741  }
1742  }
1743  row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
1744  row->xheight -= row->ascrise;
1745  row->descdrop = row->xheight * (descdrop / xheight);
1746  }
1747  }
1748  if (textord_debug_xheights) {
1749  tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
1750  " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
1751  }
1752 }
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:122
float descdrop
Definition: blobbox.h:673
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:128
double textord_xheight_error_margin
Definition: makerow.cpp:99
float xheight
Definition: blobbox.h:670
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float ascrise
Definition: blobbox.h:672
bool all_caps
Definition: blobbox.h:659
ROW_CATEGORY
Definition: makerow.h:35
bool textord_debug_xheights
Definition: makerow.cpp:56

◆ delete_non_dropout_rows()

void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 571 of file makerow.cpp.

577  {
578  TBOX block_box; //deskewed block
579  int32_t max_y; //in block
580  int32_t min_y;
581  int32_t line_index; //of scan line
582  int32_t line_count; //no of scan lines
583  int32_t distance; //to drop-out
584  int32_t xleft; //of block
585  int32_t ybottom; //of block
586  TO_ROW *row; //current row
587  TO_ROW_IT row_it = block->get_rows ();
588  BLOBNBOX_IT blob_it = &block->blobs;
589 
590  if (row_it.length () == 0)
591  return; //empty block
592  block_box = deskew_block_coords (block, gradient);
593  xleft = block->block->pdblk.bounding_box ().left ();
594  ybottom = block->block->pdblk.bounding_box ().bottom ();
595  min_y = block_box.bottom () - 1;
596  max_y = block_box.top () + 1;
597  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
598  line_index = (int32_t) floor (row_it.data ()->intercept ());
599  if (line_index <= min_y)
600  min_y = line_index - 1;
601  if (line_index >= max_y)
602  max_y = line_index + 1;
603  }
604  line_count = max_y - min_y + 1;
605  if (line_count <= 0)
606  return; //empty block
607  // change in occupation
608  std::vector<int32_t> deltas(line_count);
609  // of pixel coords
610  std::vector<int32_t> occupation(line_count);
611 
612  compute_line_occupation(block, gradient, min_y, max_y, &occupation[0], &deltas[0]);
614  ceil (block->line_spacing *
617  (int32_t) ceil (block->line_spacing *
620  max_y - min_y + 1, &occupation[0], &deltas[0]);
621 #ifndef GRAPHICS_DISABLED
622  if (testing_on) {
623  draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]);
624  }
625 #endif
626  compute_dropout_distances(&occupation[0], &deltas[0], line_count);
627  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
628  row = row_it.data ();
629  line_index = (int32_t) floor (row->intercept ());
630  distance = deltas[line_index - min_y];
631  if (find_best_dropout_row (row, distance, block->line_spacing / 2,
632  line_index, &row_it, testing_on)) {
633 #ifndef GRAPHICS_DISABLED
634  if (testing_on)
635  plot_parallel_row(row, gradient, block_edge,
636  ScrollView::WHITE, rotation);
637 #endif
638  blob_it.add_list_after (row_it.data ()->blob_list ());
639  delete row_it.extract (); //too far away
640  }
641  }
642  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
643  blob_it.add_list_after (row_it.data ()->blob_list ());
644  }
645 }
float intercept() const
Definition: blobbox.h:601
void compute_dropout_distances(int32_t *occupation, int32_t *thresholds, int32_t line_count)
Definition: makerow.cpp:904
Definition: rect.h:34
void compute_line_occupation(TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
Definition: makerow.cpp:770
static const double kDescenderFraction
Definition: ccstruct.h:33
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
float line_spacing
Definition: blobbox.h:792
static const double kXHeightFraction
Definition: ccstruct.h:34
void compute_occupation_threshold(int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
Definition: makerow.cpp:823
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:734
int16_t top() const
Definition: rect.h:58
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:124
BLOCK * block
Definition: blobbox.h:790
void draw_occupation(int32_t xleft, int32_t ybottom, int32_t min_y, int32_t max_y, int32_t occupation[], int32_t thresholds[])
Definition: drawtord.cpp:165
static const double kAscenderFraction
Definition: ccstruct.h:35
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:60
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
int16_t bottom() const
Definition: rect.h:65
PDBLK pdblk
Definition: ocrblock.h:192
bool find_best_dropout_row(TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
Definition: makerow.cpp:654

◆ deskew_block_coords()

TBOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 734 of file makerow.cpp.

737  {
738  TBOX result; //block bounds
739  TBOX blob_box; //of block
740  FCOORD rotation; //deskew vector
741  float length; //of gradient vector
742  TO_ROW_IT row_it = block->get_rows ();
743  TO_ROW *row; //current row
744  BLOBNBOX *blob; //current blob
745  BLOBNBOX_IT blob_it; //iterator
746 
747  length = sqrt (gradient * gradient + 1);
748  rotation = FCOORD (1 / length, -gradient / length);
749  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
750  row = row_it.data ();
751  blob_it.set_to_list (row->blob_list ());
752  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
753  blob_it.forward ()) {
754  blob = blob_it.data ();
755  blob_box = blob->bounding_box ();
756  blob_box.rotate (rotation);//de-skew it
757  result += blob_box;
758  }
759  }
760  return result;
761 }
void rotate(const FCOORD &vec)
Definition: rect.h:197
Definition: rect.h:34
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
Definition: points.h:189
const TBOX & bounding_box() const
Definition: blobbox.h:231
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ expand_rows()

void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

Definition at line 951 of file makerow.cpp.

958  {
959  bool swallowed_row; //eaten a neighbour
960  float y_max, y_min; //new row limits
961  float y_bottom, y_top; //allowed limits
962  TO_ROW *test_row; //next row
963  TO_ROW *row; //current row
964  //iterators
965  BLOBNBOX_IT blob_it = &block->blobs;
966  TO_ROW_IT row_it = block->get_rows ();
967 
968 #ifndef GRAPHICS_DISABLED
969  if (textord_show_expanded_rows && testing_on) {
970  if (to_win == nullptr)
971  create_to_win(page_tr);
972  }
973 #endif
974 
975  adjust_row_limits(block); //shift min,max.
977  if (block->get_rows ()->length () == 0)
978  return;
979  compute_row_stats(block, textord_show_expanded_rows && testing_on);
980  }
981  assign_blobs_to_rows (block, &gradient, 4, true, false, false);
982  //get real membership
983  if (block->get_rows ()->length () == 0)
984  return;
985  fit_parallel_rows(block,
986  gradient,
987  rotation,
988  block_edge,
989  textord_show_expanded_rows && testing_on);
991  compute_row_stats(block, textord_show_expanded_rows && testing_on);
992  row_it.move_to_last ();
993  do {
994  row = row_it.data ();
995  y_max = row->max_y (); //get current limits
996  y_min = row->min_y ();
997  y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
999  y_top = row->intercept () + block->line_size * textord_expansion_factor *
1002  if (y_min > y_bottom) { //expansion allowed
1003  if (textord_show_expanded_rows && testing_on)
1004  tprintf("Expanding bottom of row at %f from %f to %f\n",
1005  row->intercept(), y_min, y_bottom);
1006  //expandable
1007  swallowed_row = true;
1008  while (swallowed_row && !row_it.at_last ()) {
1009  swallowed_row = false;
1010  //get next one
1011  test_row = row_it.data_relative (1);
1012  //overlaps space
1013  if (test_row->max_y () > y_bottom) {
1014  if (test_row->min_y () > y_bottom) {
1015  if (textord_show_expanded_rows && testing_on)
1016  tprintf("Eating row below at %f\n", test_row->intercept());
1017  row_it.forward ();
1018 #ifndef GRAPHICS_DISABLED
1019  if (textord_show_expanded_rows && testing_on)
1020  plot_parallel_row(test_row,
1021  gradient,
1022  block_edge,
1024  rotation);
1025 #endif
1026  blob_it.set_to_list (row->blob_list ());
1027  blob_it.add_list_after (test_row->blob_list ());
1028  //swallow complete row
1029  delete row_it.extract ();
1030  row_it.backward ();
1031  swallowed_row = true;
1032  }
1033  else if (test_row->max_y () < y_min) {
1034  //shorter limit
1035  y_bottom = test_row->max_y ();
1036  if (textord_show_expanded_rows && testing_on)
1037  tprintf("Truncating limit to %f due to touching row at %f\n",
1038  y_bottom, test_row->intercept());
1039  }
1040  else {
1041  y_bottom = y_min; //can't expand it
1042  if (textord_show_expanded_rows && testing_on)
1043  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1044  y_bottom, test_row->intercept());
1045  }
1046  }
1047  }
1048  y_min = y_bottom; //expand it
1049  }
1050  if (y_max < y_top) { //expansion allowed
1051  if (textord_show_expanded_rows && testing_on)
1052  tprintf("Expanding top of row at %f from %f to %f\n",
1053  row->intercept(), y_max, y_top);
1054  swallowed_row = true;
1055  while (swallowed_row && !row_it.at_first ()) {
1056  swallowed_row = false;
1057  //get one above
1058  test_row = row_it.data_relative (-1);
1059  if (test_row->min_y () < y_top) {
1060  if (test_row->max_y () < y_top) {
1061  if (textord_show_expanded_rows && testing_on)
1062  tprintf("Eating row above at %f\n", test_row->intercept());
1063  row_it.backward ();
1064  blob_it.set_to_list (row->blob_list ());
1065 #ifndef GRAPHICS_DISABLED
1066  if (textord_show_expanded_rows && testing_on)
1067  plot_parallel_row(test_row,
1068  gradient,
1069  block_edge,
1071  rotation);
1072 #endif
1073  blob_it.add_list_after (test_row->blob_list ());
1074  //swallow complete row
1075  delete row_it.extract ();
1076  row_it.forward ();
1077  swallowed_row = true;
1078  }
1079  else if (test_row->min_y () < y_max) {
1080  //shorter limit
1081  y_top = test_row->min_y ();
1082  if (textord_show_expanded_rows && testing_on)
1083  tprintf("Truncating limit to %f due to touching row at %f\n",
1084  y_top, test_row->intercept());
1085  }
1086  else {
1087  y_top = y_max; //can't expand it
1088  if (textord_show_expanded_rows && testing_on)
1089  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1090  y_top, test_row->intercept());
1091  }
1092  }
1093  }
1094  y_max = y_top;
1095  }
1096  //new limits
1097  row->set_limits (y_min, y_max);
1098  row_it.backward ();
1099  }
1100  while (!row_it.at_last ());
1101 }
float intercept() const
Definition: blobbox.h:601
bool textord_new_initial_xheight
Definition: makerow.cpp:101
double textord_expansion_factor
Definition: makerow.cpp:79
bool textord_show_expanded_rows
Definition: makerow.cpp:46
void compute_row_stats(TO_BLOCK *block, bool testing_on)
Definition: makerow.cpp:1145
static const double kDescenderFraction
Definition: ccstruct.h:33
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
static const double kXHeightFraction
Definition: ccstruct.h:34
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1109
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
Definition: makerow.cpp:2280
void set_limits(float new_min, float new_max)
Definition: blobbox.h:635
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:124
float min_y() const
Definition: blobbox.h:574
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
static const double kAscenderFraction
Definition: ccstruct.h:35
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:1933
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:46
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
float line_size
Definition: blobbox.h:798
float max_y() const
Definition: blobbox.h:571

◆ fill_heights()

void fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1408 of file makerow.cpp.

1409  {
1410  float xcentre; // centre of blob
1411  float top; // top y coord of blob
1412  float height; // height of blob
1413  BLOBNBOX *blob; // current blob
1414  int repeated_set;
1415  BLOBNBOX_IT blob_it = row->blob_list();
1416  if (blob_it.empty()) return; // no blobs in this row
1417  bool has_rep_chars =
1418  row->rep_chars_marked() && row->num_repeated_sets() > 0;
1419  do {
1420  blob = blob_it.data();
1421  if (!blob->joined_to_prev()) {
1422  xcentre = (blob->bounding_box().left() +
1423  blob->bounding_box().right()) / 2.0f;
1424  top = blob->bounding_box().top();
1425  height = blob->bounding_box().height();
1427  top -= row->baseline.y(xcentre);
1428  else
1429  top -= gradient * xcentre + row->parallel_c();
1430  if (top >= min_height && top <= max_height) {
1431  heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
1432  if (height / top < textord_min_blob_height_fraction) {
1433  floating_heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
1434  }
1435  }
1436  }
1437  // Skip repeated chars, since they are likely to skew the height stats.
1438  if (has_rep_chars && blob->repeated_set() != 0) {
1439  repeated_set = blob->repeated_set();
1440  blob_it.forward();
1441  while (!blob_it.at_first() &&
1442  blob_it.data()->repeated_set() == repeated_set) {
1443  blob_it.forward();
1445  tprintf("Skipping repeated char when computing xheight\n");
1446  }
1447  } else {
1448  blob_it.forward();
1449  }
1450  } while (!blob_it.at_first());
1451 }
QSPLINE baseline
Definition: blobbox.h:683
int num_repeated_sets() const
Definition: blobbox.h:650
bool textord_fix_xheight_bug
Definition: makerow.cpp:54
double y(double x) const
Definition: quspline.cpp:209
double textord_min_blob_height_fraction
Definition: makerow.cpp:88
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
bool rep_chars_marked() const
Definition: blobbox.h:644
int repeated_set() const
Definition: blobbox.h:263
bool joined_to_prev() const
Definition: blobbox.h:257
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float parallel_c() const
Definition: blobbox.h:592
void add(int32_t value, int32_t count)
Definition: statistc.cpp:100
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
int16_t height() const
Definition: rect.h:108
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
bool textord_debug_xheights
Definition: makerow.cpp:56

◆ find_best_dropout_row()

bool find_best_dropout_row ( TO_ROW row,
int32_t  distance,
float  dist_limit,
int32_t  line_index,
TO_ROW_IT *  row_it,
bool  testing_on 
)

Definition at line 654 of file makerow.cpp.

661  {
662  int32_t next_index; // of neighbouring row
663  int32_t row_offset; //from current row
664  int32_t abs_dist; //absolute distance
665  int8_t row_inc; //increment to row_index
666  TO_ROW *next_row; //nextious row
667 
668  if (testing_on)
669  tprintf ("Row at %g(%g), dropout dist=%d,",
670  row->intercept (), row->parallel_c (), distance);
671  if (distance < 0) {
672  row_inc = 1;
673  abs_dist = -distance;
674  }
675  else {
676  row_inc = -1;
677  abs_dist = distance;
678  }
679  if (abs_dist > dist_limit) {
680  if (testing_on) {
681  tprintf (" too far - deleting\n");
682  }
683  return true;
684  }
685  if ((distance < 0 && !row_it->at_last ())
686  || (distance >= 0 && !row_it->at_first ())) {
687  row_offset = row_inc;
688  do {
689  next_row = row_it->data_relative (row_offset);
690  next_index = (int32_t) floor (next_row->intercept ());
691  if ((distance < 0
692  && next_index < line_index
693  && next_index > line_index + distance + distance)
694  || (distance >= 0
695  && next_index > line_index
696  && next_index < line_index + distance + distance)) {
697  if (testing_on) {
698  tprintf (" nearer neighbour (%d) at %g\n",
699  line_index + distance - next_index,
700  next_row->intercept ());
701  }
702  return true; //other is nearer
703  }
704  else if (next_index == line_index
705  || next_index == line_index + distance + distance) {
706  if (row->believability () <= next_row->believability ()) {
707  if (testing_on) {
708  tprintf (" equal but more believable at %g (%g/%g)\n",
709  next_row->intercept (),
710  row->believability (),
711  next_row->believability ());
712  }
713  return true; //other is more believable
714  }
715  }
716  row_offset += row_inc;
717  }
718  while ((next_index == line_index
719  || next_index == line_index + distance + distance)
720  && row_offset < row_it->length ());
721  if (testing_on)
722  tprintf (" keeping\n");
723  }
724  return false;
725 }
float intercept() const
Definition: blobbox.h:601
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float parallel_c() const
Definition: blobbox.h:592
float believability() const
Definition: blobbox.h:598

◆ fit_lms_line()

void fit_lms_line ( TO_ROW row)

Definition at line 267 of file makerow.cpp.

267  {
268  float m, c; // fitted line
270  BLOBNBOX_IT blob_it = row->blob_list();
271 
272  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
273  const TBOX& box = blob_it.data()->bounding_box();
274  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
275  }
276  double error = lms.Fit(&m, &c);
277  row->set_line(m, c, error);
278 }
Definition: rect.h:34
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:616
int16_t left() const
Definition: rect.h:72
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
integer coordinate
Definition: points.h:32
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ fit_parallel_lms()

void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 1975 of file makerow.cpp.

1975  {
1976  float c; // fitted line
1977  int blobcount; // no of blobs
1979  BLOBNBOX_IT blob_it = row->blob_list();
1980 
1981  blobcount = 0;
1982  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1983  if (!blob_it.data()->joined_to_prev()) {
1984  const TBOX& box = blob_it.data()->bounding_box();
1985  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
1986  blobcount++;
1987  }
1988  }
1989  double error = lms.ConstrainedFit(gradient, &c);
1990  row->set_parallel_line(gradient, c, error);
1992  error = lms.Fit(&gradient, &c);
1993  }
1994  //set the other too
1995  row->set_line(gradient, c, error);
1996 }
int textord_lms_line_trials
Definition: makerow.cpp:100
Definition: rect.h:34
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:616
int16_t left() const
Definition: rect.h:72
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:624
integer coordinate
Definition: points.h:32
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
int16_t right() const
Definition: rect.h:79
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:130
int16_t bottom() const
Definition: rect.h:65
bool textord_straight_baselines
Definition: makerow.cpp:51
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ fit_parallel_rows()

void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

Definition at line 1933 of file makerow.cpp.

1939  {
1940 #ifndef GRAPHICS_DISABLED
1941  ScrollView::Color colour; //of row
1942 #endif
1943  TO_ROW_IT row_it = block->get_rows ();
1944 
1945  row_it.move_to_first ();
1946  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1947  if (row_it.data ()->blob_list ()->empty ())
1948  delete row_it.extract (); //nothing in it
1949  else
1950  fit_parallel_lms (gradient, row_it.data ());
1951  }
1952 #ifndef GRAPHICS_DISABLED
1953  if (testing_on) {
1954  colour = ScrollView::RED;
1955  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1956  plot_parallel_row (row_it.data (), gradient,
1957  block_edge, colour, rotation);
1958  colour = (ScrollView::Color) (colour + 1);
1959  if (colour > ScrollView::MAGENTA)
1960  colour = ScrollView::RED;
1961  }
1962  }
1963 #endif
1964  row_it.sort (row_y_order); //may have gone out of order
1965 }
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:124
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:1975
int row_y_order(const void *item1, const void *item2)
Definition: makerow.cpp:2597

◆ linear_spline_baseline()

double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
int32_t &  segments,
int32_t  xstarts[] 
)

Definition at line 2189 of file makerow.cpp.

2194  {
2195  int blobcount; //no of blobs
2196  int blobindex; //current blob
2197  int index1, index2; //blob numbers
2198  int blobs_per_segment; //blobs in each
2199  TBOX box; //blob box
2200  TBOX new_box; //new_it box
2201  //blobs
2202  BLOBNBOX_IT blob_it = row->blob_list ();
2203  BLOBNBOX_IT new_it = blob_it; //front end
2204  float b, c; //fitted curve
2206  int32_t segment; //current segment
2207 
2208  box = box_next_pre_chopped (&blob_it);
2209  xstarts[0] = box.left ();
2210  blobcount = 1;
2211  while (!blob_it.at_first ()) {
2212  blobcount++;
2213  box = box_next_pre_chopped (&blob_it);
2214  }
2215  segments = blobcount / textord_spline_medianwin;
2216  if (segments < 1)
2217  segments = 1;
2218  blobs_per_segment = blobcount / segments;
2219  // quadratic coeffs
2220  double *coeffs = new double[segments * 3];
2221  if (textord_oldbl_debug)
2222  tprintf
2223  ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
2224  blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
2225  segment = 1;
2226  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
2227  box_next_pre_chopped(&new_it);
2228  index1 = 0;
2229  blobindex = index2;
2230  do {
2231  blobindex += blobs_per_segment;
2232  lms.Clear();
2233  while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2234  box = box_next_pre_chopped (&blob_it);
2235  int middle = (box.left() + box.right()) / 2;
2236  lms.Add(ICOORD(middle, box.bottom()));
2237  index1++;
2238  if (index1 == blobindex - blobs_per_segment / 2
2239  || index1 == blobcount - 1) {
2240  xstarts[segment] = box.left ();
2241  }
2242  }
2243  lms.Fit(&b, &c);
2244  coeffs[segment * 3 - 3] = 0;
2245  coeffs[segment * 3 - 2] = b;
2246  coeffs[segment * 3 - 1] = c;
2247  segment++;
2248  if (segment > segments)
2249  break;
2250 
2251  blobindex += blobs_per_segment;
2252  lms.Clear();
2253  while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2254  new_box = box_next_pre_chopped (&new_it);
2255  int middle = (new_box.left() + new_box.right()) / 2;
2256  lms.Add(ICOORD (middle, new_box.bottom()));
2257  index2++;
2258  if (index2 == blobindex - blobs_per_segment / 2
2259  || index2 == blobcount - 1) {
2260  xstarts[segment] = new_box.left ();
2261  }
2262  }
2263  lms.Fit(&b, &c);
2264  coeffs[segment * 3 - 3] = 0;
2265  coeffs[segment * 3 - 2] = b;
2266  coeffs[segment * 3 - 1] = c;
2267  segment++;
2268  }
2269  while (segment <= segments);
2270  return coeffs;
2271 }
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:666
Definition: rect.h:34
int textord_spline_medianwin
Definition: makerow.cpp:65
int16_t left() const
Definition: rect.h:72
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
integer coordinate
Definition: points.h:32
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:42
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ make_baseline_spline()

void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2058 of file makerow.cpp.

2059  {
2060  double *coeffs; // quadratic coeffs
2061  int32_t segments; // no of segments
2062 
2063  // spline boundaries
2064  int32_t *xstarts = new int32_t[row->blob_list()->length() + 1];
2065  if (segment_baseline(row, block, segments, xstarts)
2067  coeffs = linear_spline_baseline(row, block, segments, xstarts);
2068  } else {
2069  xstarts[1] = xstarts[segments];
2070  segments = 1;
2071  coeffs = new double[3];
2072  coeffs[0] = 0;
2073  coeffs[1] = row->line_m ();
2074  coeffs[2] = row->line_c ();
2075  }
2076  row->baseline = QSPLINE (segments, xstarts, coeffs);
2077  delete[] coeffs;
2078  delete[] xstarts;
2079 }
QSPLINE baseline
Definition: blobbox.h:683
float line_m() const
Definition: blobbox.h:583
bool segment_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
Definition: makerow.cpp:2090
bool textord_parallel_baselines
Definition: makerow.cpp:50
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
Definition: makerow.cpp:2189
float line_c() const
Definition: blobbox.h:586
bool textord_straight_baselines
Definition: makerow.cpp:51
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ make_initial_textrows()

void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 227 of file makerow.cpp.

232  {
233  TO_ROW_IT row_it = block->get_rows ();
234 
235 #ifndef GRAPHICS_DISABLED
236  ScrollView::Color colour; //of row
237 
238  if (textord_show_initial_rows && testing_on) {
239  if (to_win == nullptr)
240  create_to_win(page_tr);
241  }
242 #endif
243  //guess skew
244  assign_blobs_to_rows (block, nullptr, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
245  row_it.move_to_first ();
246  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
247  fit_lms_line (row_it.data ());
248 #ifndef GRAPHICS_DISABLED
249  if (textord_show_initial_rows && testing_on) {
250  colour = ScrollView::RED;
251  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
252  plot_to_row (row_it.data (), colour, rotation);
253  colour = (ScrollView::Color) (colour + 1);
254  if (colour > ScrollView::MAGENTA)
255  colour = ScrollView::RED;
256  }
257  }
258 #endif
259 }
#define TRUE
Definition: capi.h:51
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
Definition: makerow.cpp:2280
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
bool textord_show_initial_rows
Definition: makerow.cpp:44
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:90
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:46

◆ make_rows()

float make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 201 of file makerow.cpp.

201  {
202  float port_m; // global skew
203  float port_err; // global noise
204  TO_BLOCK_IT block_it; // iterator
205 
206  block_it.set_to_list(port_blocks);
207  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
208  block_it.forward())
209  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
211  // compute globally
212  compute_page_skew(port_blocks, port_m, port_err);
213  block_it.set_to_list(port_blocks);
214  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
215  cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
216  block_it.data()->block->pdblk.bounding_box().left(),
218  }
219  return port_m; // global skew
220 }
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:227
bool textord_test_landscape
Definition: makerow.cpp:49
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:518
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
Definition: points.h:189

◆ make_single_row()

float make_single_row ( ICOORD  page_tr,
bool  allow_sub_blobs,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 164 of file makerow.cpp.

165  {
166  BLOBNBOX_IT blob_it = &block->blobs;
167  TO_ROW_IT row_it = block->get_rows();
168 
169  // Include all the small blobs and large blobs.
170  blob_it.add_list_after(&block->small_blobs);
171  blob_it.add_list_after(&block->noise_blobs);
172  blob_it.add_list_after(&block->large_blobs);
173  if (block->blobs.singleton() && allow_sub_blobs) {
174  blob_it.move_to_first();
175  float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
176  if (size > block->line_size)
177  block->line_size = size;
178  } else if (block->blobs.empty()) {
179  // Make a fake blob.
180  C_BLOB* blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box());
181  // The blobnbox owns the blob.
182  BLOBNBOX* bblob = new BLOBNBOX(blob);
183  blob_it.add_after_then_move(bblob);
184  }
185  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
186  // Fit an LMS line to the rows.
187  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
188  fit_lms_line(row_it.data());
189  float gradient;
190  float fit_error;
191  // Compute the skew based on the fitted line.
192  compute_page_skew(blocks, gradient, fit_error);
193  return gradient;
194 }
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
BLOCK * block
Definition: blobbox.h:790
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:60
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:243
PDBLK pdblk
Definition: ocrblock.h:192
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:788
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:787
float line_size
Definition: blobbox.h:798

◆ mark_repeated_chars()

void mark_repeated_chars ( TO_ROW row)

Definition at line 2641 of file makerow.cpp.

2641  {
2642  BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
2643  int num_repeated_sets = 0;
2644  if (!box_it.empty()) {
2645  do {
2646  BLOBNBOX* bblob = box_it.data();
2647  int repeat_length = 1;
2648  if (bblob->flow() == BTFT_LEADER &&
2649  !bblob->joined_to_prev() && bblob->cblob() != nullptr) {
2650  BLOBNBOX_IT test_it(box_it);
2651  for (test_it.forward(); !test_it.at_first();) {
2652  bblob = test_it.data();
2653  if (bblob->flow() != BTFT_LEADER)
2654  break;
2655  test_it.forward();
2656  bblob = test_it.data();
2657  if (bblob->joined_to_prev() || bblob->cblob() == nullptr) {
2658  repeat_length = 0;
2659  break;
2660  }
2661  ++repeat_length;
2662  }
2663  }
2664  if (repeat_length >= kMinLeaderCount) {
2665  num_repeated_sets++;
2666  for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2667  bblob = box_it.data();
2668  bblob->set_repeated_set(num_repeated_sets);
2669  }
2670  } else {
2671  bblob->set_repeated_set(0);
2672  box_it.forward();
2673  }
2674  } while (!box_it.at_first()); // until all done
2675  }
2676  row->set_num_repeated_sets(num_repeated_sets);
2677 }
void set_repeated_set(int set_id)
Definition: blobbox.h:266
const int kMinLeaderCount
Definition: makerow.cpp:106
BlobTextFlowType flow() const
Definition: blobbox.h:296
void set_num_repeated_sets(int num_sets)
Definition: blobbox.h:653
bool joined_to_prev() const
Definition: blobbox.h:257
C_BLOB * cblob() const
Definition: blobbox.h:269
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ most_overlapping_row()

OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
bool  testing_blob 
)

Definition at line 2480 of file makerow.cpp.

2487  {
2488  OVERLAP_STATE result; //result of tests
2489  float overlap; //of blob & row
2490  float bestover; //nearest row
2491  float merge_top, merge_bottom; //size of merged row
2492  ICOORD testpt; //testing only
2493  TO_ROW *row; //current row
2494  TO_ROW *test_row; //for multiple overlaps
2495  BLOBNBOX_IT blob_it; //for merging rows
2496 
2497  result = ASSIGN;
2498  row = row_it->data ();
2499  bestover = top - bottom;
2500  if (top > row->max_y ())
2501  bestover -= top - row->max_y ();
2502  if (bottom < row->min_y ())
2503  //compute overlap
2504  bestover -= row->min_y () - bottom;
2505  if (testing_blob && textord_debug_blob) {
2506  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
2507  bottom, top, row->min_y(), row->max_y(), rowsize, bestover);
2508  }
2509  test_row = row;
2510  do {
2511  if (!row_it->at_last ()) {
2512  row_it->forward ();
2513  test_row = row_it->data ();
2514  if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
2515  merge_top =
2516  test_row->max_y () >
2517  row->max_y ()? test_row->max_y () : row->max_y ();
2518  merge_bottom =
2519  test_row->min_y () <
2520  row->min_y ()? test_row->min_y () : row->min_y ();
2521  if (merge_top - merge_bottom <= rowsize) {
2522  if (testing_blob && textord_debug_blob) {
2523  tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
2524  row->min_y (), row->max_y (),
2525  test_row->min_y (), test_row->max_y ());
2526  }
2527  test_row->set_limits (merge_bottom, merge_top);
2528  blob_it.set_to_list (test_row->blob_list ());
2529  blob_it.add_list_after (row->blob_list ());
2530  blob_it.sort (blob_x_order);
2531  row_it->backward ();
2532  delete row_it->extract ();
2533  row_it->forward ();
2534  bestover = -1.0f; //force replacement
2535  }
2536  overlap = top - bottom;
2537  if (top > test_row->max_y ())
2538  overlap -= top - test_row->max_y ();
2539  if (bottom < test_row->min_y ())
2540  overlap -= test_row->min_y () - bottom;
2541  if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2542  result = REJECT;
2543  }
2544  if (overlap > bestover) {
2545  bestover = overlap; //find biggest overlap
2546  row = test_row;
2547  }
2548  if (testing_blob && textord_debug_blob) {
2549  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
2550  bottom, top, test_row->min_y(), test_row->max_y(),
2551  rowsize, overlap, bestover);
2552  }
2553  }
2554  }
2555  }
2556  while (!row_it->at_last ()
2557  && test_row->min_y () <= top && test_row->max_y () >= bottom);
2558  while (row_it->data () != row)
2559  row_it->backward (); //make it point to row
2560  //doesn't overlap much
2561  if (top - bottom - bestover > rowsize * textord_overlap_x &&
2562  (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
2563  && result == ASSIGN)
2564  result = NEW_ROW; //doesn't overlap enough
2565  best_row = row;
2566  return result;
2567 }
Definition: makerow.h:30
bool textord_fix_makerow_bug
Definition: makerow.cpp:55
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2575
OVERLAP_STATE
Definition: makerow.h:28
integer coordinate
Definition: points.h:32
void set_limits(float new_min, float new_max)
Definition: blobbox.h:635
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float min_y() const
Definition: blobbox.h:574
bool textord_debug_blob
Definition: makerow.cpp:102
double textord_overlap_x
Definition: makerow.cpp:80
Definition: makerow.h:31
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
float max_y() const
Definition: blobbox.h:571

◆ pre_associate_blobs()

void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 1847 of file makerow.cpp.

1852  {
1853 #ifndef GRAPHICS_DISABLED
1854  ScrollView::Color colour; //of boxes
1855 #endif
1856  BLOBNBOX *blob; //current blob
1857  BLOBNBOX *nextblob; //next in list
1858  TBOX blob_box;
1859  FCOORD blob_rotation; //inverse of rotation
1860  BLOBNBOX_IT blob_it; //iterator
1861  BLOBNBOX_IT start_it; //iterator
1862  TO_ROW_IT row_it = block->get_rows ();
1863 
1864 #ifndef GRAPHICS_DISABLED
1865  colour = ScrollView::RED;
1866 #endif
1867 
1868  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
1869  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1870  //get blobs
1871  blob_it.set_to_list (row_it.data ()->blob_list ());
1872  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1873  blob_it.forward ()) {
1874  blob = blob_it.data ();
1875  blob_box = blob->bounding_box ();
1876  start_it = blob_it; //save start point
1877  // if (testing_on && textord_show_final_blobs)
1878  // {
1879  // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
1880  // blob_box.left(),blob_box.bottom(),
1881  // blob_box.right(),blob_box.top(),
1882  // (void*)blob,blob_it.length());
1883  // }
1884  bool overlap;
1885  do {
1886  overlap = false;
1887  if (!blob_it.at_last ()) {
1888  nextblob = blob_it.data_relative(1);
1889  overlap = blob_box.major_x_overlap(nextblob->bounding_box());
1890  if (overlap) {
1891  blob->merge(nextblob); // merge new blob
1892  blob_box = blob->bounding_box(); // get bigger box
1893  blob_it.forward();
1894  }
1895  }
1896  }
1897  while (overlap);
1898  blob->chop (&start_it, &blob_it,
1899  blob_rotation,
1902  //attempt chop
1903  }
1904 #ifndef GRAPHICS_DISABLED
1905  if (testing_on && textord_show_final_blobs) {
1906  if (to_win == nullptr)
1907  create_to_win(page_tr);
1908  to_win->Pen(colour);
1909  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1910  blob_it.forward ()) {
1911  blob = blob_it.data ();
1912  blob_box = blob->bounding_box ();
1913  blob_box.rotate (rotation);
1914  if (!blob->joined_to_prev ()) {
1915  to_win->Rectangle (blob_box.left (), blob_box.bottom (),
1916  blob_box.right (), blob_box.top ());
1917  }
1918  }
1919  colour = (ScrollView::Color) (colour + 1);
1920  if (colour > ScrollView::MAGENTA)
1921  colour = ScrollView::RED;
1922  }
1923 #endif
1924  }
1925 }
void rotate(const FCOORD &vec)
Definition: rect.h:197
double textord_chop_width
Definition: makerow.cpp:77
Definition: rect.h:34
void merge(BLOBNBOX *nextblob)
Definition: blobbox.cpp:93
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
static const double kXHeightFraction
Definition: ccstruct.h:34
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:412
bool joined_to_prev() const
Definition: blobbox.h:257
bool textord_show_final_blobs
Definition: makerow.cpp:48
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
Definition: blobbox.cpp:121
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
Definition: points.h:189
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
float x() const
Definition: points.h:208
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:602
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:46
void Pen(Color color)
Definition: scrollview.cpp:722
int16_t bottom() const
Definition: rect.h:65
float y() const
Definition: points.h:211
float line_size
Definition: blobbox.h:798

◆ row_spacing_order()

int row_spacing_order ( const void *  item1,
const void *  item2 
)

Definition at line 2619 of file makerow.cpp.

2621  {
2622  //converted ptr
2623  const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
2624  //converted ptr
2625  const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
2626 
2627  if (row1->spacing < row2->spacing)
2628  return -1;
2629  else if (row1->spacing > row2->spacing)
2630  return 1;
2631  else
2632  return 0;
2633 }
float spacing
Definition: blobbox.h:669

◆ row_y_order()

int row_y_order ( const void *  item1,
const void *  item2 
)

Definition at line 2597 of file makerow.cpp.

2599  {
2600  //converted ptr
2601  const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
2602  //converted ptr
2603  const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
2604 
2605  if (row1->parallel_c () > row2->parallel_c ())
2606  return -1;
2607  else if (row1->parallel_c () < row2->parallel_c ())
2608  return 1;
2609  else
2610  return 0;
2611 }
float parallel_c() const
Definition: blobbox.h:592

◆ segment_baseline()

bool segment_baseline ( TO_ROW row,
TO_BLOCK block,
int32_t &  segments,
int32_t *  xstarts 
)

Definition at line 2090 of file makerow.cpp.

2095  {
2096  bool needs_curve; //needs curved line
2097  int blobcount; //no of blobs
2098  int blobindex; //current blob
2099  int last_state; //above, on , below
2100  int state; //of current blob
2101  float yshift; //from baseline
2102  TBOX box; //blob box
2103  TBOX new_box; //new_it box
2104  float middle; //xcentre of blob
2105  //blobs
2106  BLOBNBOX_IT blob_it = row->blob_list ();
2107  BLOBNBOX_IT new_it = blob_it; //front end
2108  SORTED_FLOATS yshifts; //shifts from baseline
2109 
2110  needs_curve = false;
2111  box = box_next_pre_chopped (&blob_it);
2112  xstarts[0] = box.left ();
2113  segments = 1;
2114  blobcount = row->blob_list ()->length ();
2115  if (textord_oldbl_debug)
2116  tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
2117  blobcount, box.left (), box.bottom ());
2118  if (blobcount <= textord_spline_medianwin
2119  || blobcount < textord_spline_minblobs) {
2120  blob_it.move_to_last ();
2121  box = blob_it.data ()->bounding_box ();
2122  xstarts[1] = box.right ();
2123  return false;
2124  }
2125  last_state = 0;
2126  new_it.mark_cycle_pt ();
2127  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
2128  new_box = box_next_pre_chopped (&new_it);
2129  middle = (new_box.left () + new_box.right ()) / 2.0;
2130  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2131  //record shift
2132  yshifts.add (yshift, blobindex);
2133  if (new_it.cycled_list ()) {
2134  xstarts[1] = new_box.right ();
2135  return false;
2136  }
2137  }
2138  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
2139  box = box_next_pre_chopped (&blob_it);
2140  do {
2141  new_box = box_next_pre_chopped (&new_it);
2142  //get middle one
2143  yshift = yshifts[textord_spline_medianwin / 2];
2144  if (yshift > textord_spline_shift_fraction * block->line_size)
2145  state = 1;
2146  else if (-yshift > textord_spline_shift_fraction * block->line_size)
2147  state = -1;
2148  else
2149  state = 0;
2150  if (state != 0)
2151  needs_curve = true;
2152  // tprintf("State=%d, prev=%d, shift=%g\n",
2153  // state,last_state,yshift);
2154  if (state != last_state && blobcount > textord_spline_minblobs) {
2155  xstarts[segments++] = box.left ();
2156  blobcount = 0;
2157  }
2158  last_state = state;
2159  yshifts.remove (blobindex - textord_spline_medianwin);
2160  box = box_next_pre_chopped (&blob_it);
2161  middle = (new_box.left () + new_box.right ()) / 2.0;
2162  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2163  yshifts.add (yshift, blobindex);
2164  blobindex++;
2165  blobcount++;
2166  }
2167  while (!new_it.cycled_list ());
2168  if (blobcount > textord_spline_minblobs || segments == 1) {
2169  xstarts[segments] = new_box.right ();
2170  }
2171  else {
2172  xstarts[--segments] = new_box.right ();
2173  }
2174  if (textord_oldbl_debug)
2175  tprintf ("Made %d segments on row at (%d,%d)\n",
2176  segments, box.right (), box.bottom ());
2177  return needs_curve;
2178 }
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:666
float line_m() const
Definition: blobbox.h:583
Definition: rect.h:34
double textord_spline_shift_fraction
Definition: makerow.cpp:70
int textord_spline_medianwin
Definition: makerow.cpp:65
void remove(int32_t key)
Definition: sortflts.cpp:52
int16_t left() const
Definition: rect.h:72
float line_c() const
Definition: blobbox.h:586
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int textord_spline_minblobs
Definition: makerow.cpp:64
int16_t right() const
Definition: rect.h:79
void add(float value, int32_t key)
Definition: sortflts.cpp:27
int16_t bottom() const
Definition: rect.h:65
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:42
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
float line_size
Definition: blobbox.h:798

◆ separate_underlines()

void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 1774 of file makerow.cpp.

1777  { // correct orientation
1778  BLOBNBOX *blob; // current blob
1779  C_BLOB *rotated_blob; // rotated blob
1780  TO_ROW *row; // current row
1781  float length; // of g_vec
1782  TBOX blob_box;
1783  FCOORD blob_rotation; // inverse of rotation
1784  FCOORD g_vec; // skew rotation
1785  BLOBNBOX_IT blob_it; // iterator
1786  // iterator
1787  BLOBNBOX_IT under_it = &block->underlines;
1788  BLOBNBOX_IT large_it = &block->large_blobs;
1789  TO_ROW_IT row_it = block->get_rows();
1790  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
1791  block->line_size + 0.5);
1792 
1793  // length of vector
1794  length = sqrt(1 + gradient * gradient);
1795  g_vec = FCOORD(1 / length, -gradient / length);
1796  blob_rotation = FCOORD(rotation.x(), -rotation.y());
1797  blob_rotation.rotate(g_vec); // undoing everything
1798  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1799  row = row_it.data();
1800  // get blobs
1801  blob_it.set_to_list(row->blob_list());
1802  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
1803  blob_it.forward()) {
1804  blob = blob_it.data();
1805  blob_box = blob->bounding_box();
1806  if (blob_box.width() > block->line_size * textord_underline_width) {
1807  ASSERT_HOST(blob->cblob() != nullptr);
1808  rotated_blob = crotate_cblob (blob->cblob(),
1809  blob_rotation);
1810  if (test_underline(
1811  testing_on && textord_show_final_rows,
1812  rotated_blob, static_cast<int16_t>(row->intercept()),
1813  static_cast<int16_t>(
1814  block->line_size *
1817  under_it.add_after_then_move(blob_it.extract());
1818  if (testing_on && textord_show_final_rows) {
1819  tprintf("Underlined blob at:");
1820  rotated_blob->bounding_box().print();
1821  tprintf("Was:");
1822  blob_box.print();
1823  }
1824  } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
1825  row->blob_list()) >
1827  large_it.add_after_then_move(blob_it.extract());
1828  if (testing_on && textord_show_final_rows) {
1829  tprintf("Large blob overlaps %d blobs at:",
1830  CountOverlaps(blob_box, min_blob_height,
1831  row->blob_list()));
1832  blob_box.print();
1833  }
1834  }
1835  delete rotated_blob;
1836  }
1837  }
1838  }
1839 }
float intercept() const
Definition: blobbox.h:601
void print() const
Definition: rect.h:278
int textord_max_blob_overlaps
Definition: makerow.cpp:67
void rotate(const FCOORD vec)
Definition: points.h:764
Definition: rect.h:34
double textord_min_blob_height_fraction
Definition: makerow.cpp:88
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
int16_t width() const
Definition: rect.h:115
static const double kXHeightFraction
Definition: ccstruct.h:34
bool textord_show_final_rows
Definition: makerow.cpp:47
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool test_underline(bool testing_on, C_BLOB *blob, int16_t baseline, int16_t xheight)
Definition: blkocc.cpp:53
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
Definition: blobbox.cpp:612
static const double kAscenderFraction
Definition: ccstruct.h:35
double textord_underline_width
Definition: makerow.cpp:86
Definition: points.h:189
const TBOX & bounding_box() const
Definition: blobbox.h:231
float x() const
Definition: points.h:208
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789
BLOBNBOX_LIST underlines
Definition: blobbox.h:786
C_BLOB * cblob() const
Definition: blobbox.h:269
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
float y() const
Definition: points.h:211
float line_size
Definition: blobbox.h:798
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ vigorous_noise_removal()

void vigorous_noise_removal ( TO_BLOCK block)

Definition at line 467 of file makerow.cpp.

467  {
468  TO_ROW_IT row_it = block->get_rows ();
469  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
470  TO_ROW* row = row_it.data();
471  BLOBNBOX_IT b_it = row->blob_list();
472  // Estimate the xheight on the row.
473  int max_height = 0;
474  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
475  BLOBNBOX* blob = b_it.data();
476  if (blob->bounding_box().height() > max_height)
477  max_height = blob->bounding_box().height();
478  }
479  STATS hstats(0, max_height + 1);
480  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
481  BLOBNBOX* blob = b_it.data();
482  int height = blob->bounding_box().height();
483  if (height >= kMinSize)
484  hstats.add(blob->bounding_box().height(), 1);
485  }
486  float xheight = hstats.median();
487  // Delete small objects.
488  BLOBNBOX* prev = nullptr;
489  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
490  BLOBNBOX* blob = b_it.data();
491  const TBOX& box = blob->bounding_box();
492  if (box.height() < kNoiseSize * xheight) {
493  // Small so delete unless it looks like an i dot.
494  if (prev != nullptr) {
495  if (dot_of_i(blob, prev, row))
496  continue; // Looks OK.
497  }
498  if (!b_it.at_last()) {
499  BLOBNBOX* next = b_it.data_relative(1);
500  if (dot_of_i(blob, next, row))
501  continue; // Looks OK.
502  }
503  // It might be noise so get rid of it.
504  delete blob->cblob();
505  delete b_it.extract();
506  } else {
507  prev = blob;
508  }
509  }
510  }
511 }
Definition: rect.h:34
Definition: statistc.h:33
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
const int kMinSize
Definition: makerow.cpp:378
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t height() const
Definition: rect.h:108
C_BLOB * cblob() const
Definition: blobbox.h:269
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
const double kNoiseSize
Definition: makerow.cpp:377

Variable Documentation

◆ kMinLeaderCount

const int kMinLeaderCount = 5

Definition at line 106 of file makerow.cpp.

◆ kMinSize

const int kMinSize = 8

Definition at line 378 of file makerow.cpp.

◆ kNoiseSize

const double kNoiseSize = 0.5

Definition at line 377 of file makerow.cpp.

◆ textord_ascheight_mode_fraction

double textord_ascheight_mode_fraction = 0.08

"Min pile height to make ascheight"

Definition at line 92 of file makerow.cpp.

◆ textord_ascx_ratio_max

double textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 96 of file makerow.cpp.

◆ textord_ascx_ratio_min

double textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 95 of file makerow.cpp.

◆ textord_biased_skewcalc

bool textord_biased_skewcalc = TRUE

"Bias skew estimates with line length"

Definition at line 57 of file makerow.cpp.

◆ textord_chop_width

double textord_chop_width = 1.5

"Max width before chopping"

Definition at line 77 of file makerow.cpp.

◆ textord_debug_blob

bool textord_debug_blob = FALSE

"Print test blob information"

Definition at line 102 of file makerow.cpp.

◆ textord_debug_xheights

bool textord_debug_xheights = FALSE

"Test xheight algorithms"

Definition at line 56 of file makerow.cpp.

◆ textord_descheight_mode_fraction

double textord_descheight_mode_fraction = 0.08

"Min pile height to make descheight"

Definition at line 94 of file makerow.cpp.

◆ textord_descx_ratio_max

double textord_descx_ratio_max = 0.6

"Max desc/xheight"

Definition at line 98 of file makerow.cpp.

◆ textord_descx_ratio_min

double textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 97 of file makerow.cpp.

◆ textord_excess_blobsize

double textord_excess_blobsize = 1.3

"New row made if blob makes row this big"

Definition at line 84 of file makerow.cpp.

◆ textord_expansion_factor

double textord_expansion_factor = 1.0

"Factor to expand rows by in expand_rows"

Definition at line 79 of file makerow.cpp.

◆ textord_fix_makerow_bug

bool textord_fix_makerow_bug = TRUE

"Prevent multiple baselines"

Definition at line 55 of file makerow.cpp.

◆ textord_fix_xheight_bug

bool textord_fix_xheight_bug = TRUE

"Use spline baseline"

Definition at line 54 of file makerow.cpp.

◆ textord_heavy_nr

bool textord_heavy_nr = FALSE

"Vigorously remove noise"

Definition at line 43 of file makerow.cpp.

◆ textord_interpolating_skew

bool textord_interpolating_skew = TRUE

"Interpolate across gaps"

Definition at line 58 of file makerow.cpp.

◆ textord_linespace_iqrlimit

double textord_linespace_iqrlimit = 0.2

"Max iqr/median for linespace"

Definition at line 75 of file makerow.cpp.

◆ textord_lms_line_trials

int textord_lms_line_trials = 12

"Number of linew fits to do"

Definition at line 100 of file makerow.cpp.

◆ textord_max_blob_overlaps

int textord_max_blob_overlaps = 4

"Max number of blobs a big blob can overlap"

Definition at line 67 of file makerow.cpp.

◆ textord_min_blob_height_fraction

double textord_min_blob_height_fraction = 0.75

"Min blob height/top to include blob top into xheight stats"

Definition at line 88 of file makerow.cpp.

◆ textord_min_blobs_in_row

int textord_min_blobs_in_row = 4

"Min blobs before gradient counted"

Definition at line 63 of file makerow.cpp.

◆ textord_min_linesize

double textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 82 of file makerow.cpp.

◆ textord_min_xheight

int textord_min_xheight = 10

"Min credible pixel xheight"

Definition at line 68 of file makerow.cpp.

◆ textord_minxh

double textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 81 of file makerow.cpp.

◆ textord_new_initial_xheight

bool textord_new_initial_xheight = TRUE

"Use test xheight mechanism"

Definition at line 101 of file makerow.cpp.

◆ textord_occupancy_threshold

double textord_occupancy_threshold = 0.4

"Fraction of neighbourhood"

Definition at line 85 of file makerow.cpp.

◆ textord_old_baselines

bool textord_old_baselines = TRUE

"Use old baseline algorithm"

Definition at line 52 of file makerow.cpp.

◆ textord_old_xheight

bool textord_old_xheight = FALSE

"Use old xheight algorithm"

Definition at line 53 of file makerow.cpp.

◆ textord_overlap_x

double textord_overlap_x = 0.375

"Fraction of linespace for good overlap"

Definition at line 80 of file makerow.cpp.

◆ textord_parallel_baselines

bool textord_parallel_baselines = TRUE

"Force parallel baselines"

Definition at line 50 of file makerow.cpp.

◆ textord_show_expanded_rows

bool textord_show_expanded_rows = FALSE

"Display rows after expanding"

Definition at line 46 of file makerow.cpp.

◆ textord_show_final_blobs

bool textord_show_final_blobs = FALSE

"Display blob bounds after pre-ass"

Definition at line 48 of file makerow.cpp.

◆ textord_show_final_rows

bool textord_show_final_rows = FALSE

"Display rows after final fitting"

Definition at line 47 of file makerow.cpp.

◆ textord_show_initial_rows

bool textord_show_initial_rows = FALSE

"Display row accumulation"

Definition at line 44 of file makerow.cpp.

◆ textord_show_parallel_rows

bool textord_show_parallel_rows = FALSE

"Display page correlated rows"

Definition at line 45 of file makerow.cpp.

◆ textord_skew_ile

double textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 73 of file makerow.cpp.

◆ textord_skew_lag

double textord_skew_lag = 0.02

"Lag for skew on row accumulation"

Definition at line 74 of file makerow.cpp.

◆ textord_skewsmooth_offset

int textord_skewsmooth_offset = 4

"For smooth factor"

Definition at line 59 of file makerow.cpp.

◆ textord_skewsmooth_offset2

int textord_skewsmooth_offset2 = 1

"For smooth factor"

Definition at line 60 of file makerow.cpp.

◆ textord_spline_medianwin

int textord_spline_medianwin = 6

"Size of window for spline segmentation"

Definition at line 65 of file makerow.cpp.

◆ textord_spline_minblobs

int textord_spline_minblobs = 8

"Min blobs in each spline segment"

Definition at line 64 of file makerow.cpp.

◆ textord_spline_outlier_fraction

double textord_spline_outlier_fraction = 0.1

"Fraction of line spacing for outlier"

Definition at line 72 of file makerow.cpp.

◆ textord_spline_shift_fraction

double textord_spline_shift_fraction = 0.02

"Fraction of line spacing for quad"

Definition at line 70 of file makerow.cpp.

◆ textord_straight_baselines

bool textord_straight_baselines = FALSE

"Force straight baselines"

Definition at line 51 of file makerow.cpp.

◆ textord_test_landscape

bool textord_test_landscape = FALSE

"Tests refer to land/port"

Definition at line 49 of file makerow.cpp.

◆ textord_test_x

int textord_test_x = -INT32_MAX

"coord of test pt"

Definition at line 61 of file makerow.cpp.

◆ textord_test_y

int textord_test_y = -INT32_MAX

"coord of test pt"

Definition at line 62 of file makerow.cpp.

◆ textord_underline_width

double textord_underline_width = 2.0

"Multiple of line_size for underline"

Definition at line 86 of file makerow.cpp.

◆ textord_width_limit

double textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 76 of file makerow.cpp.

◆ textord_xheight_error_margin

double textord_xheight_error_margin = 0.1

"Accepted variation"

Definition at line 99 of file makerow.cpp.

◆ textord_xheight_mode_fraction

double textord_xheight_mode_fraction = 0.4

"Min pile height to make xheight"

Definition at line 90 of file makerow.cpp.