tesseract  5.0.0-alpha-619-ge9db
makerow.cpp File Reference
#include <vector>
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"
#include <algorithm>

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define MAX_HEIGHT_MODES   12
 

Functions

make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
 
make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)
 
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. true is returned if the row should be deleted.

bool find_best_dropout_row (TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
 
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
 
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
 
void compute_occupation_threshold (int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
 
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (int32_t *occupation, int32_t *thresholds, int32_t line_count)
 
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
void adjust_row_limits (TO_BLOCK *block)
 
compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, bool testing_on)
 
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
 
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, ‘, ", ’, etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
 
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

int32_t compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
 
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

int32_t compute_height_modes (STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
 
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
 
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
 
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)
 
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
 
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return true if enough blobs were far enough away to need a quadratic.

bool segment_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
 
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns
true if enough blobs were far enough away to need a quadratic.
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
 
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
 
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
 
blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)
 
row_y_order

Sort function to sort rows in y from page top.

int row_y_order (const void *item1, const void *item2)
 
row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

int row_spacing_order (const void *item1, const void *item2)
 
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)
 

Variables

bool textord_heavy_nr = false
 
bool textord_show_initial_rows = false
 
bool textord_show_parallel_rows = false
 
bool textord_show_expanded_rows = false
 
bool textord_show_final_rows = false
 
bool textord_show_final_blobs = false
 
bool textord_test_landscape = false
 
bool textord_parallel_baselines = true
 
bool textord_straight_baselines = false
 
bool textord_old_baselines = true
 
bool textord_old_xheight = false
 
bool textord_fix_xheight_bug = true
 
bool textord_fix_makerow_bug = true
 
bool textord_debug_xheights = false
 
int textord_test_x = -INT32_MAX
 
int textord_test_y = -INT32_MAX
 
int textord_min_blobs_in_row = 4
 
int textord_spline_minblobs = 8
 
int textord_spline_medianwin = 6
 
int textord_min_xheight = 10
 
double textord_spline_shift_fraction = 0.02
 
double textord_spline_outlier_fraction = 0.1
 
double textord_skew_ile = 0.5
 
double textord_skew_lag = 0.02
 
double textord_linespace_iqrlimit = 0.2
 
double textord_width_limit = 8
 
double textord_chop_width = 1.5
 
double textord_minxh = 0.25
 
double textord_min_linesize = 1.25
 
double textord_excess_blobsize = 1.3
 
double textord_occupancy_threshold = 0.4
 
double textord_underline_width = 2.0
 
double textord_min_blob_height_fraction = 0.75
 
double textord_xheight_mode_fraction = 0.4
 
double textord_ascheight_mode_fraction = 0.08
 
double textord_ascx_ratio_min = 1.25
 
double textord_ascx_ratio_max = 1.8
 
double textord_descx_ratio_min = 0.25
 
double textord_descx_ratio_max = 0.6
 
double textord_xheight_error_margin = 0.1
 
int textord_lms_line_trials = 12
 
bool textord_new_initial_xheight = true
 
bool textord_debug_blob = false
 
const int kMinLeaderCount = 5
 

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double kNoiseSize = 0.5
 
const int kMinSize = 8
 
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
 
void vigorous_noise_removal (TO_BLOCK *block)
 
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
 

Macro Definition Documentation

◆ MAX_HEIGHT_MODES

#define MAX_HEIGHT_MODES   12

Definition at line 103 of file makerow.cpp.

Function Documentation

◆ adjust_row_limits()

void adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1107 of file makerow.cpp.

1109  {
1110  TO_ROW *row; //current row
1111  float size; //size of row
1112  float ymax; //top of row
1113  float ymin; //bottom of row
1114  TO_ROW_IT row_it = block->get_rows ();
1115 
1117  tprintf("Adjusting row limits for block(%d,%d)\n",
1118  block->block->pdblk.bounding_box().left(),
1119  block->block->pdblk.bounding_box().top());
1120  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1121  row = row_it.data ();
1122  size = row->max_y () - row->min_y ();
1124  tprintf("Row at %f has min %f, max %f, size %f\n",
1125  row->intercept(), row->min_y(), row->max_y(), size);
1129  ymax = size * (tesseract::CCStruct::kXHeightFraction +
1132  row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
1133  row->merged = false;
1134  }
1135 }

◆ assign_blobs_to_rows()

void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
bool  reject_misses,
bool  make_new_rows,
bool  drawing_skew 
)

Definition at line 2278 of file makerow.cpp.

2285  {
2286  OVERLAP_STATE overlap_result; //what to do with it
2287  float ycoord; //current y
2288  float top, bottom; //of blob
2289  float g_length = 1.0f; //from gradient
2290  int16_t row_count; //no of rows
2291  int16_t left_x; //left edge
2292  int16_t last_x; //previous edge
2293  float block_skew; //y delta
2294  float smooth_factor; //for new coords
2295  float near_dist; //dist to nearest row
2296  ICOORD testpt; //testing only
2297  BLOBNBOX *blob; //current blob
2298  TO_ROW *row; //current row
2299  TO_ROW *dest_row = nullptr; //row to put blob in
2300  //iterators
2301  BLOBNBOX_IT blob_it = &block->blobs;
2302  TO_ROW_IT row_it = block->get_rows ();
2303 
2304  ycoord =
2305  (block->block->pdblk.bounding_box ().bottom () +
2306  block->block->pdblk.bounding_box ().top ()) / 2.0f;
2307  if (gradient != nullptr)
2308  g_length = sqrt (1 + *gradient * *gradient);
2309 #ifndef GRAPHICS_DISABLED
2310  if (drawing_skew)
2311  to_win->SetCursor(block->block->pdblk.bounding_box ().left (), ycoord);
2312 #endif
2313  testpt = ICOORD (textord_test_x, textord_test_y);
2314  blob_it.sort (blob_x_order);
2315  smooth_factor = 1.0;
2316  block_skew = 0.0f;
2317  row_count = row_it.length (); //might have rows
2318  if (!blob_it.empty ()) {
2319  left_x = blob_it.data ()->bounding_box ().left ();
2320  }
2321  else {
2322  left_x = block->block->pdblk.bounding_box ().left ();
2323  }
2324  last_x = left_x;
2325  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
2326  blob = blob_it.data ();
2327  if (gradient != nullptr) {
2328  block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
2329  + *gradient / g_length * blob->bounding_box ().left ();
2330  }
2331  else if (blob->bounding_box ().left () - last_x > block->line_size / 2
2332  && last_x - left_x > block->line_size * 2
2333  && textord_interpolating_skew) {
2334  // tprintf("Interpolating skew from %g",block_skew);
2335  block_skew *= static_cast<float>(blob->bounding_box ().left () - left_x)
2336  / (last_x - left_x);
2337  // tprintf("to %g\n",block_skew);
2338  }
2339  last_x = blob->bounding_box ().left ();
2340  top = blob->bounding_box ().top () - block_skew;
2341  bottom = blob->bounding_box ().bottom () - block_skew;
2342 #ifndef GRAPHICS_DISABLED
2343  if (drawing_skew)
2344  to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
2345 #endif
2346  if (!row_it.empty ()) {
2347  for (row_it.move_to_first ();
2348  !row_it.at_last () && row_it.data ()->min_y () > top;
2349  row_it.forward ());
2350  row = row_it.data ();
2351  if (row->min_y () <= top && row->max_y () >= bottom) {
2352  //any overlap
2353  dest_row = row;
2354  overlap_result = most_overlapping_row (&row_it, dest_row,
2355  top, bottom,
2356  block->line_size,
2357  blob->bounding_box ().
2358  contains (testpt));
2359  if (overlap_result == NEW_ROW && !reject_misses)
2360  overlap_result = ASSIGN;
2361  }
2362  else {
2363  overlap_result = NEW_ROW;
2364  if (!make_new_rows) {
2365  near_dist = row_it.data_relative (-1)->min_y () - top;
2366  //below bottom
2367  if (bottom < row->min_y ()) {
2368  if (row->min_y () - bottom <=
2369  (block->line_spacing -
2371  //done it
2372  overlap_result = ASSIGN;
2373  dest_row = row;
2374  }
2375  }
2376  else if (near_dist > 0
2377  && near_dist < bottom - row->max_y ()) {
2378  row_it.backward ();
2379  dest_row = row_it.data ();
2380  if (dest_row->min_y () - bottom <=
2381  (block->line_spacing -
2383  //done it
2384  overlap_result = ASSIGN;
2385  }
2386  }
2387  else {
2388  if (top - row->max_y () <=
2389  (block->line_spacing -
2390  block->line_size) * (textord_overlap_x +
2392  //done it
2393  overlap_result = ASSIGN;
2394  dest_row = row;
2395  }
2396  }
2397  }
2398  }
2399  if (overlap_result == ASSIGN)
2400  dest_row->add_blob (blob_it.extract (), top, bottom,
2401  block->line_size);
2402  if (overlap_result == NEW_ROW) {
2403  if (make_new_rows && top - bottom < block->max_blob_size) {
2404  dest_row =
2405  new TO_ROW (blob_it.extract (), top, bottom,
2406  block->line_size);
2407  row_count++;
2408  if (bottom > row_it.data ()->min_y ())
2409  row_it.add_before_then_move (dest_row);
2410  //insert in right place
2411  else
2412  row_it.add_after_then_move (dest_row);
2413  smooth_factor =
2414  1.0 / (row_count * textord_skew_lag +
2415  textord_skewsmooth_offset);
2416  }
2417  else
2418  overlap_result = REJECT;
2419  }
2420  }
2421  else if (make_new_rows && top - bottom < block->max_blob_size) {
2422  overlap_result = NEW_ROW;
2423  dest_row =
2424  new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
2425  row_count++;
2426  row_it.add_after_then_move(dest_row);
2427  smooth_factor = 1.0 / (row_count * textord_skew_lag +
2428  textord_skewsmooth_offset2);
2429  }
2430  else
2431  overlap_result = REJECT;
2432  if (blob->bounding_box ().contains(testpt) && textord_debug_blob) {
2433  if (overlap_result != REJECT) {
2434  tprintf("Test blob assigned to row at (%g,%g) on pass %d\n",
2435  dest_row->min_y(), dest_row->max_y(), pass);
2436  }
2437  else {
2438  tprintf("Test blob assigned to no row on pass %d\n", pass);
2439  }
2440  }
2441  if (overlap_result != REJECT) {
2442  while (!row_it.at_first() &&
2443  row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
2444  row = row_it.extract();
2445  row_it.backward();
2446  row_it.add_before_then_move(row);
2447  }
2448  while (!row_it.at_last() &&
2449  row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
2450  row = row_it.extract();
2451  row_it.forward();
2452  // Keep rows in order.
2453  row_it.add_after_then_move(row);
2454  }
2455  BLOBNBOX_IT added_blob_it(dest_row->blob_list());
2456  added_blob_it.move_to_last();
2457  TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
2458  if (dest_row->blob_list()->singleton() ||
2459  !prev_box.major_x_overlap(blob->bounding_box())) {
2460  block_skew = (1 - smooth_factor) * block_skew
2461  + smooth_factor * (blob->bounding_box().bottom() -
2462  dest_row->initial_min_y());
2463  }
2464  }
2465  }
2466  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
2467  if (row_it.data()->blob_list()->empty())
2468  delete row_it.extract(); // Discard empty rows.
2469  }
2470 }

◆ blob_x_order()

int blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2573 of file makerow.cpp.

2575  {
2576  //converted ptr
2577  const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX* const*>(item1);
2578  //converted ptr
2579  const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX* const*>(item2);
2580 
2581  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
2582  return -1;
2583  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
2584  return 1;
2585  else
2586  return 0;
2587 }

◆ cleanup_rows_making()

void cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 517 of file makerow.cpp.

524  {
525  //iterators
526  BLOBNBOX_IT blob_it = &block->blobs;
527  TO_ROW_IT row_it = block->get_rows ();
528 
529 #ifndef GRAPHICS_DISABLED
530  if (textord_show_parallel_rows && testing_on) {
531  if (to_win == nullptr)
532  create_to_win(page_tr);
533  }
534 #endif
535  //get row coords
536  fit_parallel_rows(block,
537  gradient,
538  rotation,
539  block_edge,
540  textord_show_parallel_rows && testing_on);
542  gradient,
543  rotation,
544  block_edge,
545  textord_show_parallel_rows && testing_on);
546  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
547  blob_it.set_to_list (&block->blobs);
548  row_it.set_to_list (block->get_rows ());
549  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
550  blob_it.add_list_after (row_it.data ()->blob_list ());
551  //give blobs back
552  assign_blobs_to_rows (block, &gradient, 1, false, false, false);
553  //now new rows must be genuine
554  blob_it.set_to_list (&block->blobs);
555  blob_it.add_list_after (&block->large_blobs);
556  assign_blobs_to_rows (block, &gradient, 2, true, true, false);
557  //safe to use big ones now
558  blob_it.set_to_list (&block->blobs);
559  //throw all blobs in
560  blob_it.add_list_after (&block->noise_blobs);
561  blob_it.add_list_after (&block->small_blobs);
562  assign_blobs_to_rows (block, &gradient, 3, false, false, false);
563 }

◆ compute_dropout_distances()

void compute_dropout_distances ( int32_t *  occupation,
int32_t *  thresholds,
int32_t  line_count 
)

Definition at line 902 of file makerow.cpp.

906  {
907  int32_t line_index; //of thresholds line
908  int32_t distance; //from prev dropout
909  int32_t next_dist; //to next dropout
910  int32_t back_index; //for back filling
911  int32_t prev_threshold; //before overwrite
912 
913  distance = -line_count;
914  line_index = 0;
915  do {
916  do {
917  distance--;
918  prev_threshold = thresholds[line_index];
919  //distance from prev
920  thresholds[line_index] = distance;
921  line_index++;
922  }
923  while (line_index < line_count
924  && (occupation[line_index] < thresholds[line_index]
925  || occupation[line_index - 1] >= prev_threshold));
926  if (line_index < line_count) {
927  back_index = line_index - 1;
928  next_dist = 1;
929  while (next_dist < -distance && back_index >= 0) {
930  thresholds[back_index] = next_dist;
931  back_index--;
932  next_dist++;
933  distance++;
934  }
935  distance = 1;
936  }
937  }
938  while (line_index < line_count);
939 }

◆ compute_height_modes()

int32_t compute_height_modes ( STATS heights,
int32_t  min_height,
int32_t  max_height,
int32_t *  modes,
int32_t  maxmodes 
)

Definition at line 1623 of file makerow.cpp.

1627  { // size of modes
1628  int32_t pile_count; // no in source pile
1629  int32_t src_count; // no of source entries
1630  int32_t src_index; // current entry
1631  int32_t least_count; // height of smalllest
1632  int32_t least_index; // index of least
1633  int32_t dest_count; // index in modes
1634 
1635  src_count = max_height + 1 - min_height;
1636  dest_count = 0;
1637  least_count = INT32_MAX;
1638  least_index = -1;
1639  for (src_index = 0; src_index < src_count; src_index++) {
1640  pile_count = heights->pile_count(min_height + src_index);
1641  if (pile_count > 0) {
1642  if (dest_count < maxmodes) {
1643  if (pile_count < least_count) {
1644  // find smallest in array
1645  least_count = pile_count;
1646  least_index = dest_count;
1647  }
1648  modes[dest_count++] = min_height + src_index;
1649  } else if (pile_count >= least_count) {
1650  while (least_index < maxmodes - 1) {
1651  modes[least_index] = modes[least_index + 1];
1652  // shuffle up
1653  least_index++;
1654  }
1655  // new one on end
1656  modes[maxmodes - 1] = min_height + src_index;
1657  if (pile_count == least_count) {
1658  // new smallest
1659  least_index = maxmodes - 1;
1660  } else {
1661  least_count = heights->pile_count(modes[0]);
1662  least_index = 0;
1663  for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1664  pile_count = heights->pile_count(modes[dest_count]);
1665  if (pile_count < least_count) {
1666  // find smallest
1667  least_count = pile_count;
1668  least_index = dest_count;
1669  }
1670  }
1671  }
1672  }
1673  }
1674  }
1675  return dest_count;
1676 }

◆ compute_line_occupation()

void compute_line_occupation ( TO_BLOCK block,
float  gradient,
int32_t  min_y,
int32_t  max_y,
int32_t *  occupation,
int32_t *  deltas 
)

Definition at line 768 of file makerow.cpp.

775  {
776  int32_t line_count; //maxy-miny+1
777  int32_t line_index; //of scan line
778  int index; //array index for daft compilers
779  TO_ROW *row; //current row
780  TO_ROW_IT row_it = block->get_rows ();
781  BLOBNBOX *blob; //current blob
782  BLOBNBOX_IT blob_it; //iterator
783  float length; //of skew vector
784  TBOX blob_box; //bounding box
785  FCOORD rotation; //inverse of skew
786 
787  line_count = max_y - min_y + 1;
788  length = sqrt (gradient * gradient + 1);
789  rotation = FCOORD (1 / length, -gradient / length);
790  for (line_index = 0; line_index < line_count; line_index++)
791  deltas[line_index] = 0;
792  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
793  row = row_it.data ();
794  blob_it.set_to_list (row->blob_list ());
795  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
796  blob_it.forward ()) {
797  blob = blob_it.data ();
798  blob_box = blob->bounding_box ();
799  blob_box.rotate (rotation);//de-skew it
800  int32_t width = blob_box.right() - blob_box.left();
801  index = blob_box.bottom() - min_y;
802  ASSERT_HOST(index >= 0 && index < line_count);
803  // count transitions
804  deltas[index] += width;
805  index = blob_box.top() - min_y;
806  ASSERT_HOST(index >= 0 && index < line_count);
807  deltas[index] -= width;
808  }
809  }
810  occupation[0] = deltas[0];
811  for (line_index = 1; line_index < line_count; line_index++)
812  occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
813 }

◆ compute_occupation_threshold()

void compute_occupation_threshold ( int32_t  low_window,
int32_t  high_window,
int32_t  line_count,
int32_t *  occupation,
int32_t *  thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 821 of file makerow.cpp.

827  {
828  int32_t line_index; //of thresholds line
829  int32_t low_index; //in occupation
830  int32_t high_index; //in occupation
831  int32_t sum; //current average
832  int32_t divisor; //to get thresholds
833  int32_t min_index; //of min occ
834  int32_t min_occ; //min in locality
835  int32_t test_index; //for finding min
836 
837  divisor =
838  static_cast<int32_t>(ceil ((low_window + high_window) / textord_occupancy_threshold));
839  if (low_window + high_window < line_count) {
840  for (sum = 0, high_index = 0; high_index < low_window; high_index++)
841  sum += occupation[high_index];
842  for (low_index = 0; low_index < high_window; low_index++, high_index++)
843  sum += occupation[high_index];
844  min_occ = occupation[0];
845  min_index = 0;
846  for (test_index = 1; test_index < high_index; test_index++) {
847  if (occupation[test_index] <= min_occ) {
848  min_occ = occupation[test_index];
849  min_index = test_index; //find min in region
850  }
851  }
852  for (line_index = 0; line_index < low_window; line_index++)
853  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
854  //same out to end
855  for (low_index = 0; high_index < line_count; low_index++, high_index++) {
856  sum -= occupation[low_index];
857  sum += occupation[high_index];
858  if (occupation[high_index] <= min_occ) {
859  //find min in region
860  min_occ = occupation[high_index];
861  min_index = high_index;
862  }
863  //lost min from region
864  if (min_index <= low_index) {
865  min_occ = occupation[low_index + 1];
866  min_index = low_index + 1;
867  for (test_index = low_index + 2; test_index <= high_index;
868  test_index++) {
869  if (occupation[test_index] <= min_occ) {
870  min_occ = occupation[test_index];
871  //find min in region
872  min_index = test_index;
873  }
874  }
875  }
876  thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
877  }
878  }
879  else {
880  min_occ = occupation[0];
881  min_index = 0;
882  for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
883  if (occupation[low_index] < min_occ) {
884  min_occ = occupation[low_index];
885  min_index = low_index;
886  }
887  sum += occupation[low_index];
888  }
889  line_index = 0;
890  }
891  for (; line_index < line_count; line_index++)
892  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
893  //same out to end
894 }

◆ compute_page_skew()

void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 286 of file makerow.cpp.

290  {
291  int32_t row_count; //total rows
292  int32_t blob_count; //total_blobs
293  int32_t row_err; //integer error
294  int32_t row_index; //of total
295  TO_ROW *row; //current row
296  TO_BLOCK_IT block_it = blocks; //iterator
297 
298  row_count = 0;
299  blob_count = 0;
300  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
301  block_it.forward ()) {
302  POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
303  if (pb != nullptr && !pb->IsText())
304  continue; // Pretend non-text blocks don't exist.
305  row_count += block_it.data ()->get_rows ()->length ();
306  //count up rows
307  TO_ROW_IT row_it(block_it.data()->get_rows());
308  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
309  blob_count += row_it.data ()->blob_list ()->length ();
310  }
311  if (row_count == 0) {
312  page_m = 0.0f;
313  page_err = 0.0f;
314  return;
315  }
316  // of rows
317  std::vector<float> gradients(blob_count);
318  // of rows
319  std::vector<float> errors(blob_count);
320 
321  row_index = 0;
322  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
323  block_it.forward ()) {
324  POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
325  if (pb != nullptr && !pb->IsText())
326  continue; // Pretend non-text blocks don't exist.
327  TO_ROW_IT row_it(block_it.data ()->get_rows());
328  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
329  row = row_it.data ();
330  blob_count = row->blob_list ()->length ();
331  row_err = static_cast<int32_t>(ceil (row->line_error ()));
332  if (row_err <= 0)
333  row_err = 1;
334  if (textord_biased_skewcalc) {
335  blob_count /= row_err;
336  for (blob_count /= row_err; blob_count > 0; blob_count--) {
337  gradients[row_index] = row->line_m ();
338  errors[row_index] = row->line_error ();
339  row_index++;
340  }
341  }
342  else if (blob_count >= textord_min_blobs_in_row) {
343  //get gradient
344  gradients[row_index] = row->line_m ();
345  errors[row_index] = row->line_error ();
346  row_index++;
347  }
348  }
349  }
350  if (row_index == 0) {
351  //desperate
352  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
353  block_it.forward ()) {
354  POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
355  if (pb != nullptr && !pb->IsText())
356  continue; // Pretend non-text blocks don't exist.
357  TO_ROW_IT row_it(block_it.data()->get_rows());
358  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
359  row_it.forward ()) {
360  row = row_it.data ();
361  gradients[row_index] = row->line_m ();
362  errors[row_index] = row->line_error ();
363  row_index++;
364  }
365  }
366  }
367  row_count = row_index;
368  row_index = choose_nth_item (static_cast<int32_t>(row_count * textord_skew_ile),
369  &gradients[0], row_count);
370  page_m = gradients[row_index];
371  row_index = choose_nth_item (static_cast<int32_t>(row_count * textord_skew_ile),
372  &errors[0], row_count);
373  page_err = errors[row_index];
374 }

◆ compute_row_descdrop()

int32_t compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS asc_heights 
)

Definition at line 1563 of file makerow.cpp.

1564  {
1565  // Count how many potential ascenders are in this row.
1566  int i_min = asc_heights->min_bucket();
1567  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
1568  i_min = static_cast<int>(
1569  floor(row->xheight * textord_ascx_ratio_min + 0.5));
1570  }
1571  int i_max = asc_heights->max_bucket();
1572  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
1573  i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
1574  }
1575  int num_potential_asc = 0;
1576  for (int i = i_min; i <= i_max; ++i) {
1577  num_potential_asc += asc_heights->pile_count(i);
1578  }
1579  auto min_height =
1580  static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_min + 0.5));
1581  auto max_height =
1582  static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_max));
1583  float xcentre; // centre of blob
1584  float height; // height of blob
1585  BLOBNBOX_IT blob_it = row->blob_list();
1586  BLOBNBOX *blob; // current blob
1587  STATS heights (min_height, max_height + 1);
1588  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1589  blob = blob_it.data();
1590  if (!blob->joined_to_prev()) {
1591  xcentre = (blob->bounding_box().left() +
1592  blob->bounding_box().right()) / 2.0f;
1593  height = (gradient * xcentre + row->parallel_c() -
1594  blob->bounding_box().bottom());
1595  if (height >= min_height && height <= max_height)
1596  heights.add(static_cast<int>(floor(height + 0.5)), 1);
1597  }
1598  }
1599  int blob_index = heights.mode(); // find mode
1600  int blob_count = heights.pile_count(blob_index); // get count of mode
1601  float total_fraction =
1602  (textord_descheight_mode_fraction + textord_ascheight_mode_fraction);
1603  if (static_cast<float>(blob_count + num_potential_asc) <
1604  xheight_blob_count * total_fraction) {
1605  blob_count = 0;
1606  }
1607  int descdrop = blob_count > 0 ? -blob_index : 0;
1608  if (textord_debug_xheights) {
1609  tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
1610  descdrop, num_potential_asc, blob_count);
1611  heights.print();
1612  }
1613  return descdrop;
1614 }

◆ compute_row_stats()

void compute_row_stats ( TO_BLOCK block,
bool  testing_on 
)

Definition at line 1143 of file makerow.cpp.

1146  {
1147  int32_t row_index; //of median
1148  TO_ROW *row; //current row
1149  TO_ROW *prev_row; //previous row
1150  float iqr; //inter quartile range
1151  TO_ROW_IT row_it = block->get_rows ();
1152  //number of rows
1153  int16_t rowcount = row_it.length ();
1154  // for choose nth
1155  std::vector<TO_ROW*> rows(rowcount);
1156  rowcount = 0;
1157  prev_row = nullptr;
1158  row_it.move_to_last (); //start at bottom
1159  do {
1160  row = row_it.data ();
1161  if (prev_row != nullptr) {
1162  rows[rowcount++] = prev_row;
1163  prev_row->spacing = row->intercept () - prev_row->intercept ();
1164  if (testing_on)
1165  tprintf ("Row at %g yields spacing of %g\n",
1166  row->intercept (), prev_row->spacing);
1167  }
1168  prev_row = row;
1169  row_it.backward ();
1170  }
1171  while (!row_it.at_last ());
1172  block->key_row = prev_row;
1173  block->baseline_offset =
1174  fmod (prev_row->parallel_c (), block->line_spacing);
1175  if (testing_on)
1176  tprintf ("Blob based spacing=(%g,%g), offset=%g",
1177  block->line_size, block->line_spacing, block->baseline_offset);
1178  if (rowcount > 0) {
1179  row_index = choose_nth_item(rowcount * 3 / 4, &rows[0], rowcount,
1180  sizeof (TO_ROW *), row_spacing_order);
1181  iqr = rows[row_index]->spacing;
1182  row_index = choose_nth_item(rowcount / 4, &rows[0], rowcount,
1183  sizeof (TO_ROW *), row_spacing_order);
1184  iqr -= rows[row_index]->spacing;
1185  row_index = choose_nth_item(rowcount / 2, &rows[0], rowcount,
1186  sizeof (TO_ROW *), row_spacing_order);
1187  block->key_row = rows[row_index];
1188  if (testing_on)
1189  tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
1190  if (rowcount > 2
1191  && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
1193  if (rows[row_index]->spacing < block->line_spacing
1194  && rows[row_index]->spacing > block->line_size)
1195  //within range
1196  block->line_size = rows[row_index]->spacing;
1197  //spacing=size
1198  else if (rows[row_index]->spacing > block->line_spacing)
1199  block->line_size = block->line_spacing;
1200  //too big so use max
1201  }
1202  else {
1203  if (rows[row_index]->spacing < block->line_spacing)
1204  block->line_size = rows[row_index]->spacing;
1205  else
1206  block->line_size = block->line_spacing;
1207  //too big so use max
1208  }
1209  if (block->line_size < textord_min_xheight)
1210  block->line_size = (float) textord_min_xheight;
1211  block->line_spacing = rows[row_index]->spacing;
1212  block->max_blob_size =
1214  }
1215  block->baseline_offset = fmod (rows[row_index]->intercept (),
1216  block->line_spacing);
1217  }
1218  if (testing_on)
1219  tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
1220  block->line_size, block->line_spacing, block->baseline_offset);
1221 }

◆ compute_xheight_from_modes()

int compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1467 of file makerow.cpp.

1469  {
1470  int blob_index = heights->mode(); // find mode
1471  int blob_count = heights->pile_count(blob_index); // get count of mode
1472  if (textord_debug_xheights) {
1473  tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
1474  min_height, max_height, blob_index, blob_count,
1475  heights->get_total());
1476  heights->print();
1477  floating_heights->print();
1478  }
1479  if (blob_count == 0) return 0;
1480  int modes[MAX_HEIGHT_MODES]; // biggest piles
1481  bool in_best_pile = false;
1482  int prev_size = -INT32_MAX;
1483  int best_count = 0;
1484  int mode_count = compute_height_modes(heights, min_height, max_height,
1485  modes, MAX_HEIGHT_MODES);
1486  if (cap_only && mode_count > 1)
1487  mode_count = 1;
1488  int x;
1489  if (textord_debug_xheights) {
1490  tprintf("found %d modes: ", mode_count);
1491  for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
1492  tprintf("\n");
1493  }
1494 
1495  for (x = 0; x < mode_count - 1; x++) {
1496  if (modes[x] != prev_size + 1)
1497  in_best_pile = false; // had empty height
1498  int modes_x_count = heights->pile_count(modes[x]) -
1499  floating_heights->pile_count(modes[x]);
1500  if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
1501  (in_best_pile || modes_x_count > best_count)) {
1502  for (int asc = x + 1; asc < mode_count; asc++) {
1503  float ratio =
1504  static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1505  if (textord_ascx_ratio_min < ratio &&
1506  ratio < textord_ascx_ratio_max &&
1507  (heights->pile_count(modes[asc]) >=
1508  blob_count * textord_ascheight_mode_fraction)) {
1509  if (modes_x_count > best_count) {
1510  in_best_pile = true;
1511  best_count = modes_x_count;
1512  }
1513  if (textord_debug_xheights) {
1514  tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
1515  modes[x], modes[asc]-modes[x], modes_x_count, ratio);
1516  }
1517  prev_size = modes[x];
1518  *xheight = static_cast<float>(modes[x]);
1519  *ascrise = static_cast<float>(modes[asc] - modes[x]);
1520  }
1521  }
1522  }
1523  }
1524  if (*xheight == 0) { // single mode
1525  // Remove counts of the "floating" blobs (the one whose height is too
1526  // small in relation to it's top end of the bounding box) from heights
1527  // before computing the single-mode xheight.
1528  // Restore the counts in heights after the mode is found, since
1529  // floating blobs might be useful for determining potential ascenders
1530  // in compute_row_descdrop().
1531  if (floating_heights->get_total() > 0) {
1532  for (x = min_height; x < max_height; ++x) {
1533  heights->add(x, -(floating_heights->pile_count(x)));
1534  }
1535  blob_index = heights->mode(); // find the modified mode
1536  for (x = min_height; x < max_height; ++x) {
1537  heights->add(x, floating_heights->pile_count(x));
1538  }
1539  }
1540  *xheight = static_cast<float>(blob_index);
1541  *ascrise = 0.0f;
1542  best_count = heights->pile_count(blob_index);
1544  tprintf("Single mode xheight set to %g\n", *xheight);
1545  } else if (textord_debug_xheights) {
1546  tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
1547  }
1548  return best_count;
1549 }

◆ correct_row_xheight()

void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1685 of file makerow.cpp.

1686  {
1687  ROW_CATEGORY row_category = get_row_category(row);
1688  if (textord_debug_xheights) {
1689  tprintf("correcting row xheight: row->xheight %.4f"
1690  ", row->acrise %.4f row->descdrop %.4f\n",
1691  row->xheight, row->ascrise, row->descdrop);
1692  }
1693  bool normal_xheight =
1695  bool cap_xheight =
1696  within_error_margin(row->xheight, xheight + ascrise,
1698  // Use the average xheight/ascrise for the following cases:
1699  // -- the xheight of the row could not be determined at all
1700  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
1701  // and its xheight is close to either cap height or average xheight
1702  // -- the row does not have ascenders or descenders, but its xheight
1703  // is close to the average block xheight (e.g. row with "www.mmm.com")
1704  if (row_category == ROW_ASCENDERS_FOUND) {
1705  if (row->descdrop >= 0.0) {
1706  row->descdrop = row->xheight * (descdrop / xheight);
1707  }
1708  } else if (row_category == ROW_INVALID ||
1709  (row_category == ROW_DESCENDERS_FOUND &&
1710  (normal_xheight || cap_xheight)) ||
1711  (row_category == ROW_UNKNOWN && normal_xheight)) {
1712  if (textord_debug_xheights) tprintf("using average xheight\n");
1713  row->xheight = xheight;
1714  row->ascrise = ascrise;
1715  row->descdrop = descdrop;
1716  } else if (row_category == ROW_DESCENDERS_FOUND) {
1717  // Assume this is a row with mostly lowercase letters and it's xheight
1718  // is computed correctly (unfortunately there is no way to distinguish
1719  // this from the case when descenders are found, but the most common
1720  // height is capheight).
1721  if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
1722  row->ascrise = row->xheight * (ascrise / xheight);
1723  } else if (row_category == ROW_UNKNOWN) {
1724  // Otherwise assume this row is an all-caps or small-caps row
1725  // and adjust xheight and ascrise of the row.
1726 
1727  row->all_caps = true;
1728  if (cap_xheight) { // regular all caps
1729  if (textord_debug_xheights) tprintf("all caps\n");
1730  row->xheight = xheight;
1731  row->ascrise = ascrise;
1732  row->descdrop = descdrop;
1733  } else { // small caps or caps with an odd xheight
1734  if (textord_debug_xheights) {
1735  if (row->xheight < xheight + ascrise && row->xheight > xheight) {
1736  tprintf("small caps\n");
1737  } else {
1738  tprintf("all caps with irregular xheight\n");
1739  }
1740  }
1741  row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
1742  row->xheight -= row->ascrise;
1743  row->descdrop = row->xheight * (descdrop / xheight);
1744  }
1745  }
1746  if (textord_debug_xheights) {
1747  tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
1748  " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
1749  }
1750 }

◆ delete_non_dropout_rows()

void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 570 of file makerow.cpp.

576  {
577  TBOX block_box; //deskewed block
578  int32_t max_y; //in block
579  int32_t min_y;
580  int32_t line_index; //of scan line
581  int32_t line_count; //no of scan lines
582  int32_t distance; //to drop-out
583  int32_t xleft; //of block
584  int32_t ybottom; //of block
585  TO_ROW *row; //current row
586  TO_ROW_IT row_it = block->get_rows ();
587  BLOBNBOX_IT blob_it = &block->blobs;
588 
589  if (row_it.length () == 0)
590  return; //empty block
591  block_box = deskew_block_coords (block, gradient);
592  xleft = block->block->pdblk.bounding_box ().left ();
593  ybottom = block->block->pdblk.bounding_box ().bottom ();
594  min_y = block_box.bottom () - 1;
595  max_y = block_box.top () + 1;
596  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
597  line_index = static_cast<int32_t>(floor (row_it.data ()->intercept ()));
598  if (line_index <= min_y)
599  min_y = line_index - 1;
600  if (line_index >= max_y)
601  max_y = line_index + 1;
602  }
603  line_count = max_y - min_y + 1;
604  if (line_count <= 0)
605  return; //empty block
606  // change in occupation
607  std::vector<int32_t> deltas(line_count);
608  // of pixel coords
609  std::vector<int32_t> occupation(line_count);
610 
611  compute_line_occupation(block, gradient, min_y, max_y, &occupation[0], &deltas[0]);
612  compute_occupation_threshold (static_cast<int32_t>(ceil (block->line_spacing *
615  static_cast<int32_t>(ceil (block->line_spacing *
618  max_y - min_y + 1, &occupation[0], &deltas[0]);
619 #ifndef GRAPHICS_DISABLED
620  if (testing_on) {
621  draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]);
622  }
623 #endif
624  compute_dropout_distances(&occupation[0], &deltas[0], line_count);
625  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
626  row = row_it.data ();
627  line_index = static_cast<int32_t>(floor (row->intercept ()));
628  distance = deltas[line_index - min_y];
629  if (find_best_dropout_row (row, distance, block->line_spacing / 2,
630  line_index, &row_it, testing_on)) {
631 #ifndef GRAPHICS_DISABLED
632  if (testing_on)
633  plot_parallel_row(row, gradient, block_edge,
634  ScrollView::WHITE, rotation);
635 #endif
636  blob_it.add_list_after (row_it.data ()->blob_list ());
637  delete row_it.extract (); //too far away
638  }
639  }
640  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
641  blob_it.add_list_after (row_it.data ()->blob_list ());
642  }
643 }

◆ deskew_block_coords()

TBOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 732 of file makerow.cpp.

735  {
736  TBOX result; //block bounds
737  TBOX blob_box; //of block
738  FCOORD rotation; //deskew vector
739  float length; //of gradient vector
740  TO_ROW_IT row_it = block->get_rows ();
741  TO_ROW *row; //current row
742  BLOBNBOX *blob; //current blob
743  BLOBNBOX_IT blob_it; //iterator
744 
745  length = sqrt (gradient * gradient + 1);
746  rotation = FCOORD (1 / length, -gradient / length);
747  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
748  row = row_it.data ();
749  blob_it.set_to_list (row->blob_list ());
750  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
751  blob_it.forward ()) {
752  blob = blob_it.data ();
753  blob_box = blob->bounding_box ();
754  blob_box.rotate (rotation);//de-skew it
755  result += blob_box;
756  }
757  }
758  return result;
759 }

◆ expand_rows()

void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

Definition at line 949 of file makerow.cpp.

956  {
957  bool swallowed_row; //eaten a neighbour
958  float y_max, y_min; //new row limits
959  float y_bottom, y_top; //allowed limits
960  TO_ROW *test_row; //next row
961  TO_ROW *row; //current row
962  //iterators
963  BLOBNBOX_IT blob_it = &block->blobs;
964  TO_ROW_IT row_it = block->get_rows ();
965 
966 #ifndef GRAPHICS_DISABLED
967  if (textord_show_expanded_rows && testing_on) {
968  if (to_win == nullptr)
969  create_to_win(page_tr);
970  }
971 #endif
972 
973  adjust_row_limits(block); //shift min,max.
975  if (block->get_rows ()->length () == 0)
976  return;
977  compute_row_stats(block, textord_show_expanded_rows && testing_on);
978  }
979  assign_blobs_to_rows (block, &gradient, 4, true, false, false);
980  //get real membership
981  if (block->get_rows ()->length () == 0)
982  return;
983  fit_parallel_rows(block,
984  gradient,
985  rotation,
986  block_edge,
987  textord_show_expanded_rows && testing_on);
989  compute_row_stats(block, textord_show_expanded_rows && testing_on);
990  row_it.move_to_last ();
991  do {
992  row = row_it.data ();
993  y_max = row->max_y (); //get current limits
994  y_min = row->min_y ();
995  y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
997  y_top = row->intercept () + block->line_size * textord_expansion_factor *
1000  if (y_min > y_bottom) { //expansion allowed
1001  if (textord_show_expanded_rows && testing_on)
1002  tprintf("Expanding bottom of row at %f from %f to %f\n",
1003  row->intercept(), y_min, y_bottom);
1004  //expandable
1005  swallowed_row = true;
1006  while (swallowed_row && !row_it.at_last ()) {
1007  swallowed_row = false;
1008  //get next one
1009  test_row = row_it.data_relative (1);
1010  //overlaps space
1011  if (test_row->max_y () > y_bottom) {
1012  if (test_row->min_y () > y_bottom) {
1013  if (textord_show_expanded_rows && testing_on)
1014  tprintf("Eating row below at %f\n", test_row->intercept());
1015  row_it.forward ();
1016 #ifndef GRAPHICS_DISABLED
1017  if (textord_show_expanded_rows && testing_on)
1018  plot_parallel_row(test_row,
1019  gradient,
1020  block_edge,
1022  rotation);
1023 #endif
1024  blob_it.set_to_list (row->blob_list ());
1025  blob_it.add_list_after (test_row->blob_list ());
1026  //swallow complete row
1027  delete row_it.extract ();
1028  row_it.backward ();
1029  swallowed_row = true;
1030  }
1031  else if (test_row->max_y () < y_min) {
1032  //shorter limit
1033  y_bottom = test_row->max_y ();
1034  if (textord_show_expanded_rows && testing_on)
1035  tprintf("Truncating limit to %f due to touching row at %f\n",
1036  y_bottom, test_row->intercept());
1037  }
1038  else {
1039  y_bottom = y_min; //can't expand it
1040  if (textord_show_expanded_rows && testing_on)
1041  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1042  y_bottom, test_row->intercept());
1043  }
1044  }
1045  }
1046  y_min = y_bottom; //expand it
1047  }
1048  if (y_max < y_top) { //expansion allowed
1049  if (textord_show_expanded_rows && testing_on)
1050  tprintf("Expanding top of row at %f from %f to %f\n",
1051  row->intercept(), y_max, y_top);
1052  swallowed_row = true;
1053  while (swallowed_row && !row_it.at_first ()) {
1054  swallowed_row = false;
1055  //get one above
1056  test_row = row_it.data_relative (-1);
1057  if (test_row->min_y () < y_top) {
1058  if (test_row->max_y () < y_top) {
1059  if (textord_show_expanded_rows && testing_on)
1060  tprintf("Eating row above at %f\n", test_row->intercept());
1061  row_it.backward ();
1062  blob_it.set_to_list (row->blob_list ());
1063 #ifndef GRAPHICS_DISABLED
1064  if (textord_show_expanded_rows && testing_on)
1065  plot_parallel_row(test_row,
1066  gradient,
1067  block_edge,
1069  rotation);
1070 #endif
1071  blob_it.add_list_after (test_row->blob_list ());
1072  //swallow complete row
1073  delete row_it.extract ();
1074  row_it.forward ();
1075  swallowed_row = true;
1076  }
1077  else if (test_row->min_y () < y_max) {
1078  //shorter limit
1079  y_top = test_row->min_y ();
1080  if (textord_show_expanded_rows && testing_on)
1081  tprintf("Truncating limit to %f due to touching row at %f\n",
1082  y_top, test_row->intercept());
1083  }
1084  else {
1085  y_top = y_max; //can't expand it
1086  if (textord_show_expanded_rows && testing_on)
1087  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1088  y_top, test_row->intercept());
1089  }
1090  }
1091  }
1092  y_max = y_top;
1093  }
1094  //new limits
1095  row->set_limits (y_min, y_max);
1096  row_it.backward ();
1097  }
1098  while (!row_it.at_last ());
1099 }

◆ fill_heights()

void fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1406 of file makerow.cpp.

1407  {
1408  float xcentre; // centre of blob
1409  float top; // top y coord of blob
1410  float height; // height of blob
1411  BLOBNBOX *blob; // current blob
1412  int repeated_set;
1413  BLOBNBOX_IT blob_it = row->blob_list();
1414  if (blob_it.empty()) return; // no blobs in this row
1415  bool has_rep_chars =
1416  row->rep_chars_marked() && row->num_repeated_sets() > 0;
1417  do {
1418  blob = blob_it.data();
1419  if (!blob->joined_to_prev()) {
1420  xcentre = (blob->bounding_box().left() +
1421  blob->bounding_box().right()) / 2.0f;
1422  top = blob->bounding_box().top();
1423  height = blob->bounding_box().height();
1425  top -= row->baseline.y(xcentre);
1426  else
1427  top -= gradient * xcentre + row->parallel_c();
1428  if (top >= min_height && top <= max_height) {
1429  heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
1430  if (height / top < textord_min_blob_height_fraction) {
1431  floating_heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
1432  }
1433  }
1434  }
1435  // Skip repeated chars, since they are likely to skew the height stats.
1436  if (has_rep_chars && blob->repeated_set() != 0) {
1437  repeated_set = blob->repeated_set();
1438  blob_it.forward();
1439  while (!blob_it.at_first() &&
1440  blob_it.data()->repeated_set() == repeated_set) {
1441  blob_it.forward();
1443  tprintf("Skipping repeated char when computing xheight\n");
1444  }
1445  } else {
1446  blob_it.forward();
1447  }
1448  } while (!blob_it.at_first());
1449 }

◆ find_best_dropout_row()

bool find_best_dropout_row ( TO_ROW row,
int32_t  distance,
float  dist_limit,
int32_t  line_index,
TO_ROW_IT *  row_it,
bool  testing_on 
)

Definition at line 652 of file makerow.cpp.

659  {
660  int32_t next_index; // of neighbouring row
661  int32_t row_offset; //from current row
662  int32_t abs_dist; //absolute distance
663  int8_t row_inc; //increment to row_index
664  TO_ROW *next_row; //nextious row
665 
666  if (testing_on)
667  tprintf ("Row at %g(%g), dropout dist=%d,",
668  row->intercept (), row->parallel_c (), distance);
669  if (distance < 0) {
670  row_inc = 1;
671  abs_dist = -distance;
672  }
673  else {
674  row_inc = -1;
675  abs_dist = distance;
676  }
677  if (abs_dist > dist_limit) {
678  if (testing_on) {
679  tprintf (" too far - deleting\n");
680  }
681  return true;
682  }
683  if ((distance < 0 && !row_it->at_last ())
684  || (distance >= 0 && !row_it->at_first ())) {
685  row_offset = row_inc;
686  do {
687  next_row = row_it->data_relative (row_offset);
688  next_index = static_cast<int32_t>(floor (next_row->intercept ()));
689  if ((distance < 0
690  && next_index < line_index
691  && next_index > line_index + distance + distance)
692  || (distance >= 0
693  && next_index > line_index
694  && next_index < line_index + distance + distance)) {
695  if (testing_on) {
696  tprintf (" nearer neighbour (%d) at %g\n",
697  line_index + distance - next_index,
698  next_row->intercept ());
699  }
700  return true; //other is nearer
701  }
702  else if (next_index == line_index
703  || next_index == line_index + distance + distance) {
704  if (row->believability () <= next_row->believability ()) {
705  if (testing_on) {
706  tprintf (" equal but more believable at %g (%g/%g)\n",
707  next_row->intercept (),
708  row->believability (),
709  next_row->believability ());
710  }
711  return true; //other is more believable
712  }
713  }
714  row_offset += row_inc;
715  }
716  while ((next_index == line_index
717  || next_index == line_index + distance + distance)
718  && row_offset < row_it->length ());
719  if (testing_on)
720  tprintf (" keeping\n");
721  }
722  return false;
723 }

◆ fit_lms_line()

void fit_lms_line ( TO_ROW row)

Definition at line 266 of file makerow.cpp.

266  {
267  float m, c; // fitted line
269  BLOBNBOX_IT blob_it = row->blob_list();
270 
271  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
272  const TBOX& box = blob_it.data()->bounding_box();
273  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
274  }
275  double error = lms.Fit(&m, &c);
276  row->set_line(m, c, error);
277 }

◆ fit_parallel_lms()

void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 1973 of file makerow.cpp.

1973  {
1974  float c; // fitted line
1975  int blobcount; // no of blobs
1977  BLOBNBOX_IT blob_it = row->blob_list();
1978 
1979  blobcount = 0;
1980  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1981  if (!blob_it.data()->joined_to_prev()) {
1982  const TBOX& box = blob_it.data()->bounding_box();
1983  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
1984  blobcount++;
1985  }
1986  }
1987  double error = lms.ConstrainedFit(gradient, &c);
1988  row->set_parallel_line(gradient, c, error);
1990  error = lms.Fit(&gradient, &c);
1991  }
1992  //set the other too
1993  row->set_line(gradient, c, error);
1994 }

◆ fit_parallel_rows()

void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
int32_t  block_edge,
bool  testing_on 
)

Definition at line 1931 of file makerow.cpp.

1937  {
1938 #ifndef GRAPHICS_DISABLED
1939  ScrollView::Color colour; //of row
1940 #endif
1941  TO_ROW_IT row_it = block->get_rows ();
1942 
1943  row_it.move_to_first ();
1944  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1945  if (row_it.data ()->blob_list ()->empty ())
1946  delete row_it.extract (); //nothing in it
1947  else
1948  fit_parallel_lms (gradient, row_it.data ());
1949  }
1950 #ifndef GRAPHICS_DISABLED
1951  if (testing_on) {
1952  colour = ScrollView::RED;
1953  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1954  plot_parallel_row (row_it.data (), gradient,
1955  block_edge, colour, rotation);
1956  colour = static_cast<ScrollView::Color>(colour + 1);
1957  if (colour > ScrollView::MAGENTA)
1958  colour = ScrollView::RED;
1959  }
1960  }
1961 #endif
1962  row_it.sort (row_y_order); //may have gone out of order
1963 }

◆ linear_spline_baseline()

double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
int32_t &  segments,
int32_t  xstarts[] 
)

Definition at line 2187 of file makerow.cpp.

2192  {
2193  int blobcount; //no of blobs
2194  int blobindex; //current blob
2195  int index1, index2; //blob numbers
2196  int blobs_per_segment; //blobs in each
2197  TBOX box; //blob box
2198  TBOX new_box; //new_it box
2199  //blobs
2200  BLOBNBOX_IT blob_it = row->blob_list ();
2201  BLOBNBOX_IT new_it = blob_it; //front end
2202  float b, c; //fitted curve
2204  int32_t segment; //current segment
2205 
2206  box = box_next_pre_chopped (&blob_it);
2207  xstarts[0] = box.left ();
2208  blobcount = 1;
2209  while (!blob_it.at_first ()) {
2210  blobcount++;
2211  box = box_next_pre_chopped (&blob_it);
2212  }
2213  segments = blobcount / textord_spline_medianwin;
2214  if (segments < 1)
2215  segments = 1;
2216  blobs_per_segment = blobcount / segments;
2217  // quadratic coeffs
2218  auto *coeffs = new double[segments * 3];
2219  if (textord_oldbl_debug)
2220  tprintf
2221  ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
2222  blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
2223  segment = 1;
2224  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
2225  box_next_pre_chopped(&new_it);
2226  index1 = 0;
2227  blobindex = index2;
2228  do {
2229  blobindex += blobs_per_segment;
2230  lms.Clear();
2231  while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2232  box = box_next_pre_chopped (&blob_it);
2233  int middle = (box.left() + box.right()) / 2;
2234  lms.Add(ICOORD(middle, box.bottom()));
2235  index1++;
2236  if (index1 == blobindex - blobs_per_segment / 2
2237  || index1 == blobcount - 1) {
2238  xstarts[segment] = box.left ();
2239  }
2240  }
2241  lms.Fit(&b, &c);
2242  coeffs[segment * 3 - 3] = 0;
2243  coeffs[segment * 3 - 2] = b;
2244  coeffs[segment * 3 - 1] = c;
2245  segment++;
2246  if (segment > segments)
2247  break;
2248 
2249  blobindex += blobs_per_segment;
2250  lms.Clear();
2251  while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2252  new_box = box_next_pre_chopped (&new_it);
2253  int middle = (new_box.left() + new_box.right()) / 2;
2254  lms.Add(ICOORD (middle, new_box.bottom()));
2255  index2++;
2256  if (index2 == blobindex - blobs_per_segment / 2
2257  || index2 == blobcount - 1) {
2258  xstarts[segment] = new_box.left ();
2259  }
2260  }
2261  lms.Fit(&b, &c);
2262  coeffs[segment * 3 - 3] = 0;
2263  coeffs[segment * 3 - 2] = b;
2264  coeffs[segment * 3 - 1] = c;
2265  segment++;
2266  }
2267  while (segment <= segments);
2268  return coeffs;
2269 }

◆ make_baseline_spline()

void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2056 of file makerow.cpp.

2057  {
2058  double *coeffs; // quadratic coeffs
2059  int32_t segments; // no of segments
2060 
2061  // spline boundaries
2062  auto *xstarts = new int32_t[row->blob_list()->length() + 1];
2063  if (segment_baseline(row, block, segments, xstarts)
2065  coeffs = linear_spline_baseline(row, block, segments, xstarts);
2066  } else {
2067  xstarts[1] = xstarts[segments];
2068  segments = 1;
2069  coeffs = new double[3];
2070  coeffs[0] = 0;
2071  coeffs[1] = row->line_m ();
2072  coeffs[2] = row->line_c ();
2073  }
2074  row->baseline = QSPLINE (segments, xstarts, coeffs);
2075  delete[] coeffs;
2076  delete[] xstarts;
2077 }

◆ make_initial_textrows()

void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 226 of file makerow.cpp.

231  {
232  TO_ROW_IT row_it = block->get_rows ();
233 
234 #ifndef GRAPHICS_DISABLED
235  ScrollView::Color colour; //of row
236 
237  if (textord_show_initial_rows && testing_on) {
238  if (to_win == nullptr)
239  create_to_win(page_tr);
240  }
241 #endif
242  //guess skew
243  assign_blobs_to_rows (block, nullptr, 0, true, true, textord_show_initial_rows && testing_on);
244  row_it.move_to_first ();
245  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
246  fit_lms_line (row_it.data ());
247 #ifndef GRAPHICS_DISABLED
248  if (textord_show_initial_rows && testing_on) {
249  colour = ScrollView::RED;
250  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
251  plot_to_row (row_it.data (), colour, rotation);
252  colour = static_cast<ScrollView::Color>(colour + 1);
253  if (colour > ScrollView::MAGENTA)
254  colour = ScrollView::RED;
255  }
256  }
257 #endif
258 }

◆ make_rows()

float make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 200 of file makerow.cpp.

200  {
201  float port_m; // global skew
202  float port_err; // global noise
203  TO_BLOCK_IT block_it; // iterator
204 
205  block_it.set_to_list(port_blocks);
206  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
207  block_it.forward())
208  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
210  // compute globally
211  compute_page_skew(port_blocks, port_m, port_err);
212  block_it.set_to_list(port_blocks);
213  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
214  cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
215  block_it.data()->block->pdblk.bounding_box().left(),
217  }
218  return port_m; // global skew
219 }

◆ make_single_row()

float make_single_row ( ICOORD  page_tr,
bool  allow_sub_blobs,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 163 of file makerow.cpp.

164  {
165  BLOBNBOX_IT blob_it = &block->blobs;
166  TO_ROW_IT row_it = block->get_rows();
167 
168  // Include all the small blobs and large blobs.
169  blob_it.add_list_after(&block->small_blobs);
170  blob_it.add_list_after(&block->noise_blobs);
171  blob_it.add_list_after(&block->large_blobs);
172  if (block->blobs.singleton() && allow_sub_blobs) {
173  blob_it.move_to_first();
174  float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
175  if (size > block->line_size)
176  block->line_size = size;
177  } else if (block->blobs.empty()) {
178  // Make a fake blob.
179  C_BLOB* blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box());
180  // The blobnbox owns the blob.
181  auto* bblob = new BLOBNBOX(blob);
182  blob_it.add_after_then_move(bblob);
183  }
184  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
185  // Fit an LMS line to the rows.
186  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
187  fit_lms_line(row_it.data());
188  float gradient;
189  float fit_error;
190  // Compute the skew based on the fitted line.
191  compute_page_skew(blocks, gradient, fit_error);
192  return gradient;
193 }

◆ mark_repeated_chars()

void mark_repeated_chars ( TO_ROW row)

Definition at line 2639 of file makerow.cpp.

2639  {
2640  BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
2641  int num_repeated_sets = 0;
2642  if (!box_it.empty()) {
2643  do {
2644  BLOBNBOX* bblob = box_it.data();
2645  int repeat_length = 1;
2646  if (bblob->flow() == BTFT_LEADER &&
2647  !bblob->joined_to_prev() && bblob->cblob() != nullptr) {
2648  BLOBNBOX_IT test_it(box_it);
2649  for (test_it.forward(); !test_it.at_first();) {
2650  bblob = test_it.data();
2651  if (bblob->flow() != BTFT_LEADER)
2652  break;
2653  test_it.forward();
2654  bblob = test_it.data();
2655  if (bblob->joined_to_prev() || bblob->cblob() == nullptr) {
2656  repeat_length = 0;
2657  break;
2658  }
2659  ++repeat_length;
2660  }
2661  }
2662  if (repeat_length >= kMinLeaderCount) {
2663  num_repeated_sets++;
2664  for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2665  bblob = box_it.data();
2666  bblob->set_repeated_set(num_repeated_sets);
2667  }
2668  } else {
2669  bblob->set_repeated_set(0);
2670  box_it.forward();
2671  }
2672  } while (!box_it.at_first()); // until all done
2673  }
2674  row->set_num_repeated_sets(num_repeated_sets);
2675 }

◆ most_overlapping_row()

OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
bool  testing_blob 
)

Definition at line 2478 of file makerow.cpp.

2485  {
2486  OVERLAP_STATE result; //result of tests
2487  float overlap; //of blob & row
2488  float bestover; //nearest row
2489  float merge_top, merge_bottom; //size of merged row
2490  ICOORD testpt; //testing only
2491  TO_ROW *row; //current row
2492  TO_ROW *test_row; //for multiple overlaps
2493  BLOBNBOX_IT blob_it; //for merging rows
2494 
2495  result = ASSIGN;
2496  row = row_it->data ();
2497  bestover = top - bottom;
2498  if (top > row->max_y ())
2499  bestover -= top - row->max_y ();
2500  if (bottom < row->min_y ())
2501  //compute overlap
2502  bestover -= row->min_y () - bottom;
2503  if (testing_blob && textord_debug_blob) {
2504  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
2505  bottom, top, row->min_y(), row->max_y(), rowsize, bestover);
2506  }
2507  test_row = row;
2508  do {
2509  if (!row_it->at_last ()) {
2510  row_it->forward ();
2511  test_row = row_it->data ();
2512  if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
2513  merge_top =
2514  test_row->max_y () >
2515  row->max_y ()? test_row->max_y () : row->max_y ();
2516  merge_bottom =
2517  test_row->min_y () <
2518  row->min_y ()? test_row->min_y () : row->min_y ();
2519  if (merge_top - merge_bottom <= rowsize) {
2520  if (testing_blob && textord_debug_blob) {
2521  tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
2522  row->min_y (), row->max_y (),
2523  test_row->min_y (), test_row->max_y ());
2524  }
2525  test_row->set_limits (merge_bottom, merge_top);
2526  blob_it.set_to_list (test_row->blob_list ());
2527  blob_it.add_list_after (row->blob_list ());
2528  blob_it.sort (blob_x_order);
2529  row_it->backward ();
2530  delete row_it->extract ();
2531  row_it->forward ();
2532  bestover = -1.0f; //force replacement
2533  }
2534  overlap = top - bottom;
2535  if (top > test_row->max_y ())
2536  overlap -= top - test_row->max_y ();
2537  if (bottom < test_row->min_y ())
2538  overlap -= test_row->min_y () - bottom;
2539  if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2540  result = REJECT;
2541  }
2542  if (overlap > bestover) {
2543  bestover = overlap; //find biggest overlap
2544  row = test_row;
2545  }
2546  if (testing_blob && textord_debug_blob) {
2547  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
2548  bottom, top, test_row->min_y(), test_row->max_y(),
2549  rowsize, overlap, bestover);
2550  }
2551  }
2552  }
2553  }
2554  while (!row_it->at_last ()
2555  && test_row->min_y () <= top && test_row->max_y () >= bottom);
2556  while (row_it->data () != row)
2557  row_it->backward (); //make it point to row
2558  //doesn't overlap much
2559  if (top - bottom - bestover > rowsize * textord_overlap_x &&
2560  (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
2561  && result == ASSIGN)
2562  result = NEW_ROW; //doesn't overlap enough
2563  best_row = row;
2564  return result;
2565 }

◆ pre_associate_blobs()

void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 1845 of file makerow.cpp.

1850  {
1851 #ifndef GRAPHICS_DISABLED
1852  ScrollView::Color colour; //of boxes
1853 #endif
1854  BLOBNBOX *blob; //current blob
1855  BLOBNBOX *nextblob; //next in list
1856  TBOX blob_box;
1857  FCOORD blob_rotation; //inverse of rotation
1858  BLOBNBOX_IT blob_it; //iterator
1859  BLOBNBOX_IT start_it; //iterator
1860  TO_ROW_IT row_it = block->get_rows ();
1861 
1862 #ifndef GRAPHICS_DISABLED
1863  colour = ScrollView::RED;
1864 #endif
1865 
1866  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
1867  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1868  //get blobs
1869  blob_it.set_to_list (row_it.data ()->blob_list ());
1870  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1871  blob_it.forward ()) {
1872  blob = blob_it.data ();
1873  blob_box = blob->bounding_box ();
1874  start_it = blob_it; //save start point
1875  // if (testing_on && textord_show_final_blobs)
1876  // {
1877  // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
1878  // blob_box.left(),blob_box.bottom(),
1879  // blob_box.right(),blob_box.top(),
1880  // (void*)blob,blob_it.length());
1881  // }
1882  bool overlap;
1883  do {
1884  overlap = false;
1885  if (!blob_it.at_last ()) {
1886  nextblob = blob_it.data_relative(1);
1887  overlap = blob_box.major_x_overlap(nextblob->bounding_box());
1888  if (overlap) {
1889  blob->merge(nextblob); // merge new blob
1890  blob_box = blob->bounding_box(); // get bigger box
1891  blob_it.forward();
1892  }
1893  }
1894  }
1895  while (overlap);
1896  blob->chop (&start_it, &blob_it,
1897  blob_rotation,
1900  //attempt chop
1901  }
1902 #ifndef GRAPHICS_DISABLED
1903  if (testing_on && textord_show_final_blobs) {
1904  if (to_win == nullptr)
1905  create_to_win(page_tr);
1906  to_win->Pen(colour);
1907  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1908  blob_it.forward ()) {
1909  blob = blob_it.data ();
1910  blob_box = blob->bounding_box ();
1911  blob_box.rotate (rotation);
1912  if (!blob->joined_to_prev ()) {
1913  to_win->Rectangle (blob_box.left (), blob_box.bottom (),
1914  blob_box.right (), blob_box.top ());
1915  }
1916  }
1917  colour = static_cast<ScrollView::Color>(colour + 1);
1918  if (colour > ScrollView::MAGENTA)
1919  colour = ScrollView::RED;
1920  }
1921 #endif
1922  }
1923 }

◆ row_spacing_order()

int row_spacing_order ( const void *  item1,
const void *  item2 
)

Definition at line 2617 of file makerow.cpp.

2619  {
2620  //converted ptr
2621  const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
2622  //converted ptr
2623  const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
2624 
2625  if (row1->spacing < row2->spacing)
2626  return -1;
2627  else if (row1->spacing > row2->spacing)
2628  return 1;
2629  else
2630  return 0;
2631 }

◆ row_y_order()

int row_y_order ( const void *  item1,
const void *  item2 
)

Definition at line 2595 of file makerow.cpp.

2597  {
2598  //converted ptr
2599  const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
2600  //converted ptr
2601  const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
2602 
2603  if (row1->parallel_c () > row2->parallel_c ())
2604  return -1;
2605  else if (row1->parallel_c () < row2->parallel_c ())
2606  return 1;
2607  else
2608  return 0;
2609 }

◆ segment_baseline()

bool segment_baseline ( TO_ROW row,
TO_BLOCK block,
int32_t &  segments,
int32_t *  xstarts 
)

Definition at line 2088 of file makerow.cpp.

2093  {
2094  bool needs_curve; //needs curved line
2095  int blobcount; //no of blobs
2096  int blobindex; //current blob
2097  int last_state; //above, on , below
2098  int state; //of current blob
2099  float yshift; //from baseline
2100  TBOX box; //blob box
2101  TBOX new_box; //new_it box
2102  float middle; //xcentre of blob
2103  //blobs
2104  BLOBNBOX_IT blob_it = row->blob_list ();
2105  BLOBNBOX_IT new_it = blob_it; //front end
2106  SORTED_FLOATS yshifts; //shifts from baseline
2107 
2108  needs_curve = false;
2109  box = box_next_pre_chopped (&blob_it);
2110  xstarts[0] = box.left ();
2111  segments = 1;
2112  blobcount = row->blob_list ()->length ();
2113  if (textord_oldbl_debug)
2114  tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
2115  blobcount, box.left (), box.bottom ());
2116  if (blobcount <= textord_spline_medianwin
2117  || blobcount < textord_spline_minblobs) {
2118  blob_it.move_to_last ();
2119  box = blob_it.data ()->bounding_box ();
2120  xstarts[1] = box.right ();
2121  return false;
2122  }
2123  last_state = 0;
2124  new_it.mark_cycle_pt ();
2125  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
2126  new_box = box_next_pre_chopped (&new_it);
2127  middle = (new_box.left () + new_box.right ()) / 2.0;
2128  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2129  //record shift
2130  yshifts.add (yshift, blobindex);
2131  if (new_it.cycled_list ()) {
2132  xstarts[1] = new_box.right ();
2133  return false;
2134  }
2135  }
2136  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
2137  box = box_next_pre_chopped (&blob_it);
2138  do {
2139  new_box = box_next_pre_chopped (&new_it);
2140  //get middle one
2141  yshift = yshifts[textord_spline_medianwin / 2];
2142  if (yshift > textord_spline_shift_fraction * block->line_size)
2143  state = 1;
2144  else if (-yshift > textord_spline_shift_fraction * block->line_size)
2145  state = -1;
2146  else
2147  state = 0;
2148  if (state != 0)
2149  needs_curve = true;
2150  // tprintf("State=%d, prev=%d, shift=%g\n",
2151  // state,last_state,yshift);
2152  if (state != last_state && blobcount > textord_spline_minblobs) {
2153  xstarts[segments++] = box.left ();
2154  blobcount = 0;
2155  }
2156  last_state = state;
2157  yshifts.remove (blobindex - textord_spline_medianwin);
2158  box = box_next_pre_chopped (&blob_it);
2159  middle = (new_box.left () + new_box.right ()) / 2.0;
2160  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2161  yshifts.add (yshift, blobindex);
2162  blobindex++;
2163  blobcount++;
2164  }
2165  while (!new_it.cycled_list ());
2166  if (blobcount > textord_spline_minblobs || segments == 1) {
2167  xstarts[segments] = new_box.right ();
2168  }
2169  else {
2170  xstarts[--segments] = new_box.right ();
2171  }
2172  if (textord_oldbl_debug)
2173  tprintf ("Made %d segments on row at (%d,%d)\n",
2174  segments, box.right (), box.bottom ());
2175  return needs_curve;
2176 }

◆ separate_underlines()

void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 1772 of file makerow.cpp.

1775  { // correct orientation
1776  BLOBNBOX *blob; // current blob
1777  C_BLOB *rotated_blob; // rotated blob
1778  TO_ROW *row; // current row
1779  float length; // of g_vec
1780  TBOX blob_box;
1781  FCOORD blob_rotation; // inverse of rotation
1782  FCOORD g_vec; // skew rotation
1783  BLOBNBOX_IT blob_it; // iterator
1784  // iterator
1785  BLOBNBOX_IT under_it = &block->underlines;
1786  BLOBNBOX_IT large_it = &block->large_blobs;
1787  TO_ROW_IT row_it = block->get_rows();
1788  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
1789  block->line_size + 0.5);
1790 
1791  // length of vector
1792  length = sqrt(1 + gradient * gradient);
1793  g_vec = FCOORD(1 / length, -gradient / length);
1794  blob_rotation = FCOORD(rotation.x(), -rotation.y());
1795  blob_rotation.rotate(g_vec); // undoing everything
1796  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1797  row = row_it.data();
1798  // get blobs
1799  blob_it.set_to_list(row->blob_list());
1800  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
1801  blob_it.forward()) {
1802  blob = blob_it.data();
1803  blob_box = blob->bounding_box();
1804  if (blob_box.width() > block->line_size * textord_underline_width) {
1805  ASSERT_HOST(blob->cblob() != nullptr);
1806  rotated_blob = crotate_cblob (blob->cblob(),
1807  blob_rotation);
1808  if (test_underline(
1809  testing_on && textord_show_final_rows,
1810  rotated_blob, static_cast<int16_t>(row->intercept()),
1811  static_cast<int16_t>(
1812  block->line_size *
1815  under_it.add_after_then_move(blob_it.extract());
1816  if (testing_on && textord_show_final_rows) {
1817  tprintf("Underlined blob at:");
1818  rotated_blob->bounding_box().print();
1819  tprintf("Was:");
1820  blob_box.print();
1821  }
1822  } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
1823  row->blob_list()) >
1824  textord_max_blob_overlaps) {
1825  large_it.add_after_then_move(blob_it.extract());
1826  if (testing_on && textord_show_final_rows) {
1827  tprintf("Large blob overlaps %d blobs at:",
1828  CountOverlaps(blob_box, min_blob_height,
1829  row->blob_list()));
1830  blob_box.print();
1831  }
1832  }
1833  delete rotated_blob;
1834  }
1835  }
1836  }
1837 }

◆ vigorous_noise_removal()

void vigorous_noise_removal ( TO_BLOCK block)

Definition at line 466 of file makerow.cpp.

466  {
467  TO_ROW_IT row_it = block->get_rows ();
468  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
469  TO_ROW* row = row_it.data();
470  BLOBNBOX_IT b_it = row->blob_list();
471  // Estimate the xheight on the row.
472  int max_height = 0;
473  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
474  BLOBNBOX* blob = b_it.data();
475  if (blob->bounding_box().height() > max_height)
476  max_height = blob->bounding_box().height();
477  }
478  STATS hstats(0, max_height + 1);
479  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
480  BLOBNBOX* blob = b_it.data();
481  int height = blob->bounding_box().height();
482  if (height >= kMinSize)
483  hstats.add(blob->bounding_box().height(), 1);
484  }
485  float xheight = hstats.median();
486  // Delete small objects.
487  BLOBNBOX* prev = nullptr;
488  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
489  BLOBNBOX* blob = b_it.data();
490  const TBOX& box = blob->bounding_box();
491  if (box.height() < kNoiseSize * xheight) {
492  // Small so delete unless it looks like an i dot.
493  if (prev != nullptr) {
494  if (dot_of_i(blob, prev, row))
495  continue; // Looks OK.
496  }
497  if (!b_it.at_last()) {
498  BLOBNBOX* next = b_it.data_relative(1);
499  if (dot_of_i(blob, next, row))
500  continue; // Looks OK.
501  }
502  // It might be noise so get rid of it.
503  delete blob->cblob();
504  delete b_it.extract();
505  } else {
506  prev = blob;
507  }
508  }
509  }
510 }

Variable Documentation

◆ kMinLeaderCount

const int kMinLeaderCount = 5

Definition at line 105 of file makerow.cpp.

◆ kMinSize

const int kMinSize = 8

Definition at line 377 of file makerow.cpp.

◆ kNoiseSize

const double kNoiseSize = 0.5

Definition at line 376 of file makerow.cpp.

◆ textord_ascheight_mode_fraction

double textord_ascheight_mode_fraction = 0.08

"Min pile height to make ascheight"

Definition at line 91 of file makerow.cpp.

◆ textord_ascx_ratio_max

double textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 95 of file makerow.cpp.

◆ textord_ascx_ratio_min

double textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 94 of file makerow.cpp.

◆ textord_chop_width

double textord_chop_width = 1.5

"Max width before chopping"

Definition at line 76 of file makerow.cpp.

◆ textord_debug_blob

bool textord_debug_blob = false

"Print test blob information"

Definition at line 101 of file makerow.cpp.

◆ textord_debug_xheights

bool textord_debug_xheights = false

"Test xheight algorithms"

Definition at line 55 of file makerow.cpp.

◆ textord_descx_ratio_max

double textord_descx_ratio_max = 0.6

"Max desc/xheight"

Definition at line 97 of file makerow.cpp.

◆ textord_descx_ratio_min

double textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 96 of file makerow.cpp.

◆ textord_excess_blobsize

double textord_excess_blobsize = 1.3

"New row made if blob makes row this big"

Definition at line 83 of file makerow.cpp.

◆ textord_fix_makerow_bug

bool textord_fix_makerow_bug = true

"Prevent multiple baselines"

Definition at line 54 of file makerow.cpp.

◆ textord_fix_xheight_bug

bool textord_fix_xheight_bug = true

"Use spline baseline"

Definition at line 53 of file makerow.cpp.

◆ textord_heavy_nr

bool textord_heavy_nr = false

"Vigorously remove noise"

Definition at line 42 of file makerow.cpp.

◆ textord_linespace_iqrlimit

double textord_linespace_iqrlimit = 0.2

"Max iqr/median for linespace"

Definition at line 74 of file makerow.cpp.

◆ textord_lms_line_trials

int textord_lms_line_trials = 12

"Number of linew fits to do"

Definition at line 99 of file makerow.cpp.

◆ textord_min_blob_height_fraction

double textord_min_blob_height_fraction = 0.75

"Min blob height/top to include blob top into xheight stats"

Definition at line 87 of file makerow.cpp.

◆ textord_min_blobs_in_row

int textord_min_blobs_in_row = 4

"Min blobs before gradient counted"

Definition at line 62 of file makerow.cpp.

◆ textord_min_linesize

double textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 81 of file makerow.cpp.

◆ textord_min_xheight

int textord_min_xheight = 10

"Min credible pixel xheight"

Definition at line 67 of file makerow.cpp.

◆ textord_minxh

double textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 80 of file makerow.cpp.

◆ textord_new_initial_xheight

bool textord_new_initial_xheight = true

"Use test xheight mechanism"

Definition at line 100 of file makerow.cpp.

◆ textord_occupancy_threshold

double textord_occupancy_threshold = 0.4

"Fraction of neighbourhood"

Definition at line 84 of file makerow.cpp.

◆ textord_old_baselines

bool textord_old_baselines = true

"Use old baseline algorithm"

Definition at line 51 of file makerow.cpp.

◆ textord_old_xheight

bool textord_old_xheight = false

"Use old xheight algorithm"

Definition at line 52 of file makerow.cpp.

◆ textord_parallel_baselines

bool textord_parallel_baselines = true

"Force parallel baselines"

Definition at line 49 of file makerow.cpp.

◆ textord_show_expanded_rows

bool textord_show_expanded_rows = false

"Display rows after expanding"

Definition at line 45 of file makerow.cpp.

◆ textord_show_final_blobs

bool textord_show_final_blobs = false

"Display blob bounds after pre-ass"

Definition at line 47 of file makerow.cpp.

◆ textord_show_final_rows

bool textord_show_final_rows = false

"Display rows after final fitting"

Definition at line 46 of file makerow.cpp.

◆ textord_show_initial_rows

bool textord_show_initial_rows = false

"Display row accumulation"

Definition at line 43 of file makerow.cpp.

◆ textord_show_parallel_rows

bool textord_show_parallel_rows = false

"Display page correlated rows"

Definition at line 44 of file makerow.cpp.

◆ textord_skew_ile

double textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 72 of file makerow.cpp.

◆ textord_skew_lag

double textord_skew_lag = 0.02

"Lag for skew on row accumulation"

Definition at line 73 of file makerow.cpp.

◆ textord_spline_medianwin

int textord_spline_medianwin = 6

"Size of window for spline segmentation"

Definition at line 64 of file makerow.cpp.

◆ textord_spline_minblobs

int textord_spline_minblobs = 8

"Min blobs in each spline segment"

Definition at line 63 of file makerow.cpp.

◆ textord_spline_outlier_fraction

double textord_spline_outlier_fraction = 0.1

"Fraction of line spacing for outlier"

Definition at line 71 of file makerow.cpp.

◆ textord_spline_shift_fraction

double textord_spline_shift_fraction = 0.02

"Fraction of line spacing for quad"

Definition at line 69 of file makerow.cpp.

◆ textord_straight_baselines

bool textord_straight_baselines = false

"Force straight baselines"

Definition at line 50 of file makerow.cpp.

◆ textord_test_landscape

bool textord_test_landscape = false

"Tests refer to land/port"

Definition at line 48 of file makerow.cpp.

◆ textord_test_x

int textord_test_x = -INT32_MAX

"coord of test pt"

Definition at line 60 of file makerow.cpp.

◆ textord_test_y

int textord_test_y = -INT32_MAX

"coord of test pt"

Definition at line 61 of file makerow.cpp.

◆ textord_underline_width

double textord_underline_width = 2.0

"Multiple of line_size for underline"

Definition at line 85 of file makerow.cpp.

◆ textord_width_limit

double textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 75 of file makerow.cpp.

◆ textord_xheight_error_margin

double textord_xheight_error_margin = 0.1

"Accepted variation"

Definition at line 98 of file makerow.cpp.

◆ textord_xheight_mode_fraction

double textord_xheight_mode_fraction = 0.4

"Min pile height to make xheight"

Definition at line 89 of file makerow.cpp.

crotate_cblob
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
Definition: blobbox.cpp:605
textord_ascx_ratio_max
double textord_ascx_ratio_max
Definition: makerow.cpp:95
textord_fix_xheight_bug
bool textord_fix_xheight_bug
Definition: makerow.cpp:53
TO_ROW::set_num_repeated_sets
void set_num_repeated_sets(int num_sets)
Definition: blobbox.h:639
choose_nth_item
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
Definition: statistc.cpp:609
STATS::get_total
int32_t get_total() const
Definition: statistc.h:83
textord_show_parallel_rows
bool textord_show_parallel_rows
Definition: makerow.cpp:44
TO_BLOCK::small_blobs
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:774
textord_linespace_iqrlimit
double textord_linespace_iqrlimit
Definition: makerow.cpp:74
C_BLOB::FakeBlob
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:236
STATS::min_bucket
int32_t min_bucket() const
Definition: statistc.cpp:187
TO_BLOCK::baseline_offset
float baseline_offset
Definition: blobbox.h:786
textord_fix_makerow_bug
bool textord_fix_makerow_bug
Definition: makerow.cpp:54
tesseract::CCStruct::kXHeightFraction
static const double kXHeightFraction
Definition: ccstruct.h:34
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:58
create_to_win
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:42
fit_parallel_lms
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:1973
TO_BLOCK::noise_blobs
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:773
test_underline
bool test_underline(bool testing_on, C_BLOB *blob, int16_t baseline, int16_t xheight)
Definition: blkocc.cpp:48
compute_height_modes
int32_t compute_height_modes(STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
Definition: makerow.cpp:1623
POLY_BLOCK::IsText
bool IsText() const
Definition: polyblk.h:62
BLOBNBOX::set_repeated_set
void set_repeated_set(int set_id)
Definition: blobbox.h:264
textord_chop_width
double textord_chop_width
Definition: makerow.cpp:76
TO_BLOCK::key_row
TO_ROW * key_row
Definition: blobbox.h:797
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
TO_ROW::set_parallel_line
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:611
TO_ROW::spacing
float spacing
Definition: blobbox.h:655
ROW_DESCENDERS_FOUND
Definition: makerow.h:36
deskew_block_coords
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:732
most_overlapping_row
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
Definition: makerow.cpp:2478
kMinSize
const int kMinSize
Definition: makerow.cpp:377
FCOORD::y
float y() const
Definition: points.h:209
ICOORD
integer coordinate
Definition: points.h:30
textord_straight_baselines
bool textord_straight_baselines
Definition: makerow.cpp:50
TO_ROW::line_m
float line_m() const
Definition: blobbox.h:570
textord_debug_blob
bool textord_debug_blob
Definition: makerow.cpp:101
TBOX::print
void print() const
Definition: rect.h:277
compute_line_occupation
void compute_line_occupation(TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
Definition: makerow.cpp:768
textord_test_landscape
bool textord_test_landscape
Definition: makerow.cpp:48
textord_spline_shift_fraction
double textord_spline_shift_fraction
Definition: makerow.cpp:69
QSPLINE
Definition: quspline.h:31
FCOORD::x
float x() const
Definition: points.h:206
TO_ROW::max_y
float max_y() const
Definition: blobbox.h:558
TBOX::top
int16_t top() const
Definition: rect.h:57
TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:330
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
STATS::max_bucket
int32_t max_bucket() const
Definition: statistc.cpp:201
STATS::pile_count
int32_t pile_count(int32_t value) const
Definition: statistc.h:75
textord_occupancy_threshold
double textord_occupancy_threshold
Definition: makerow.cpp:84
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:717
fit_parallel_rows
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:1931
ScrollView::DrawTo
void DrawTo(int x, int y)
Definition: scrollview.cpp:524
plot_parallel_row
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:117
textord_debug_xheights
bool textord_debug_xheights
Definition: makerow.cpp:55
fit_lms_line
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:266
ROW_ASCENDERS_FOUND
Definition: makerow.h:35
textord_descx_ratio_max
double textord_descx_ratio_max
Definition: makerow.cpp:97
ROW_INVALID
Definition: makerow.h:38
FCOORD
Definition: points.h:187
BLOBNBOX
Definition: blobbox.h:142
OVERLAP_STATE
OVERLAP_STATE
Definition: makerow.h:27
MAX_HEIGHT_MODES
#define MAX_HEIGHT_MODES
Definition: makerow.cpp:103
tesseract::DetLineFit
Definition: detlinefit.h:56
BTFT_LEADER
Definition: blobbox.h:120
TO_BLOCK::underlines
BLOBNBOX_LIST underlines
Definition: blobbox.h:772
textord_spline_minblobs
int textord_spline_minblobs
Definition: makerow.cpp:63
C_BLOB
Definition: stepblob.h:36
TBOX::rotate
void rotate(const FCOORD &vec)
Definition: rect.h:196
plot_to_row
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:84
assign_blobs_to_rows
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
Definition: makerow.cpp:2278
textord_min_xheight
int textord_min_xheight
Definition: makerow.cpp:67
TBOX::height
int16_t height() const
Definition: rect.h:107
textord_underline_width
double textord_underline_width
Definition: makerow.cpp:85
tesseract::DetLineFit::Clear
void Clear()
Definition: detlinefit.cpp:45
textord_excess_blobsize
double textord_excess_blobsize
Definition: makerow.cpp:83
textord_show_final_blobs
bool textord_show_final_blobs
Definition: makerow.cpp:47
blob_x_order
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2573
get_row_category
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:121
textord_parallel_baselines
bool textord_parallel_baselines
Definition: makerow.cpp:49
textord_xheight_mode_fraction
double textord_xheight_mode_fraction
Definition: makerow.cpp:89
TO_ROW::initial_min_y
float initial_min_y() const
Definition: blobbox.h:567
ROW_CATEGORY
ROW_CATEGORY
Definition: makerow.h:34
TO_ROW::merged
bool merged
Definition: blobbox.h:644
TO_ROW::rep_chars_marked
bool rep_chars_marked() const
Definition: blobbox.h:630
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
TO_BLOCK::large_blobs
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:775
compute_row_stats
void compute_row_stats(TO_BLOCK *block, bool testing_on)
Definition: makerow.cpp:1143
TBOX::major_x_overlap
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:403
textord_spline_medianwin
int textord_spline_medianwin
Definition: makerow.cpp:64
compute_dropout_distances
void compute_dropout_distances(int32_t *occupation, int32_t *thresholds, int32_t line_count)
Definition: makerow.cpp:902
within_error_margin
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:127
TO_BLOCK::block
BLOCK * block
Definition: blobbox.h:776
textord_lms_line_trials
int textord_lms_line_trials
Definition: makerow.cpp:99
kMinLeaderCount
const int kMinLeaderCount
Definition: makerow.cpp:105
ScrollView::MAGENTA
Definition: scrollview.h:109
tesseract::DetLineFit::ConstrainedFit
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:130
NEW_ROW
Definition: makerow.h:46
adjust_row_limits
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1107
textord_oldbl_debug
bool textord_oldbl_debug
Definition: oldbasel.cpp:38
textord_skew_lag
double textord_skew_lag
Definition: makerow.cpp:73
textord_skew_ile
double textord_skew_ile
Definition: makerow.cpp:72
TO_ROW::num_repeated_sets
int num_repeated_sets() const
Definition: blobbox.h:636
BLOBNBOX::joined_to_prev
bool joined_to_prev() const
Definition: blobbox.h:255
textord_test_y
int textord_test_y
Definition: makerow.cpp:61
TO_ROW::parallel_c
float parallel_c() const
Definition: blobbox.h:579
TBOX::width
int16_t width() const
Definition: rect.h:114
textord_show_initial_rows
bool textord_show_initial_rows
Definition: makerow.cpp:43
textord_min_blobs_in_row
int textord_min_blobs_in_row
Definition: makerow.cpp:62
textord_min_blob_height_fraction
double textord_min_blob_height_fraction
Definition: makerow.cpp:87
textord_descx_ratio_min
double textord_descx_ratio_min
Definition: makerow.cpp:96
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::CCStruct::kAscenderFraction
static const double kAscenderFraction
Definition: ccstruct.h:35
kNoiseSize
const double kNoiseSize
Definition: makerow.cpp:376
ScrollView::WHITE
Definition: scrollview.h:103
segment_baseline
bool segment_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
Definition: makerow.cpp:2088
tesseract::CCStruct::kDescenderFraction
static const double kDescenderFraction
Definition: ccstruct.h:33
compute_occupation_threshold
void compute_occupation_threshold(int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
Definition: makerow.cpp:821
TO_ROW::xheight
float xheight
Definition: blobbox.h:656
BLOBNBOX::repeated_set
int repeated_set() const
Definition: blobbox.h:261
distance
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44
FCOORD::rotate
void rotate(const FCOORD vec)
Definition: points.h:736
TO_BLOCK::line_spacing
float line_spacing
Definition: blobbox.h:778
TO_ROW::line_c
float line_c() const
Definition: blobbox.h:573
ScrollView::RED
Definition: scrollview.h:104
TO_ROW::add_blob
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:723
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
textord_show_final_rows
bool textord_show_final_rows
Definition: makerow.cpp:46
ROW_UNKNOWN
Definition: makerow.h:37
STATS::mode
int32_t mode() const
Definition: statistc.cpp:100
TO_BLOCK::get_rows
TO_ROW_LIST * get_rows()
Definition: blobbox.h:703
box_next_pre_chopped
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:657
row_y_order
int row_y_order(const void *item1, const void *item2)
Definition: makerow.cpp:2595
textord_show_expanded_rows
bool textord_show_expanded_rows
Definition: makerow.cpp:45
find_best_dropout_row
bool find_best_dropout_row(TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
Definition: makerow.cpp:652
QSPLINE::y
double y(double x) const
Definition: quspline.cpp:202
TO_ROW::intercept
float intercept() const
Definition: blobbox.h:588
BLOBNBOX::merge
void merge(BLOBNBOX *nextblob)
Definition: blobbox.cpp:91
BLOBNBOX::flow
BlobTextFlowType flow() const
Definition: blobbox.h:294
TO_ROW::believability
float believability() const
Definition: blobbox.h:585
TBOX::left
int16_t left() const
Definition: rect.h:71
STATS::add
void add(int32_t value, int32_t count)
Definition: statistc.cpp:87
textord_new_initial_xheight
bool textord_new_initial_xheight
Definition: makerow.cpp:100
TO_ROW::min_y
float min_y() const
Definition: blobbox.h:561
tesseract::DetLineFit::Fit
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
make_initial_textrows
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:226
TBOX::right
int16_t right() const
Definition: rect.h:78
delete_non_dropout_rows
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:570
tesseract::DetLineFit::Add
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
SORTED_FLOATS::add
void add(float value, int32_t key)
Definition: sortflts.cpp:26
textord_ascx_ratio_min
double textord_ascx_ratio_min
Definition: makerow.cpp:94
POLY_BLOCK
Definition: polyblk.h:26
SORTED_FLOATS
Definition: sortflts.h:41
TO_ROW
Definition: blobbox.h:543
linear_spline_baseline
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
Definition: makerow.cpp:2187
TO_ROW::set_line
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:603
ScrollView::SetCursor
void SetCursor(int x, int y)
Definition: scrollview.cpp:518
TO_ROW::ascrise
float ascrise
Definition: blobbox.h:658
TO_ROW::all_caps
bool all_caps
Definition: blobbox.h:645
ScrollView::Color
Color
Definition: scrollview.h:100
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
textord_test_x
int textord_test_x
Definition: makerow.cpp:60
TO_ROW::baseline
QSPLINE baseline
Definition: blobbox.h:669
ScrollView::Rectangle
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:599
TO_ROW::descdrop
float descdrop
Definition: blobbox.h:659
TO_BLOCK::max_blob_size
float max_blob_size
Definition: blobbox.h:785
TO_ROW::set_limits
void set_limits(float new_min, float new_max)
Definition: blobbox.h:621
row_spacing_order
int row_spacing_order(const void *item1, const void *item2)
Definition: makerow.cpp:2617
textord_xheight_error_margin
double textord_xheight_error_margin
Definition: makerow.cpp:98
SORTED_FLOATS::remove
void remove(int32_t key)
Definition: sortflts.cpp:51
to_win
ScrollView * to_win
Definition: drawtord.cpp:34
STATS::print
void print() const
Definition: statistc.cpp:509
TO_ROW::line_error
float line_error() const
Definition: blobbox.h:576
BLOBNBOX::chop
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
Definition: blobbox.cpp:118
ASSIGN
Definition: makerow.h:44
compute_page_skew
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:286
REJECT
Definition: makerow.h:45
TO_ROW::blob_list
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:599
expand_rows
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:949
draw_occupation
void draw_occupation(int32_t xleft, int32_t ybottom, int32_t min_y, int32_t max_y, int32_t occupation[], int32_t thresholds[])
Definition: drawtord.cpp:157
TBOX
Definition: rect.h:33
TO_BLOCK::line_size
float line_size
Definition: blobbox.h:784
cleanup_rows_making
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:517
textord_ascheight_mode_fraction
double textord_ascheight_mode_fraction
Definition: makerow.cpp:91