All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
makerow.cpp File Reference
#include "stderr.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define MAX_HEIGHT_MODES   12
 

Functions

float MakeRowFromSubBlobs (TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
 
make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
 
make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
 
fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)
 
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. TRUE is returned if the row should be deleted.

BOOL8 find_best_dropout_row (TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
 
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
 
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
 
void compute_occupation_threshold (inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
 
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (inT32 *occupation, inT32 *thresholds, inT32 line_count)
 
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
void adjust_row_limits (TO_BLOCK *block)
 
compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, BOOL8 testing_on)
 
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
 
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, `, ", ', etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
 
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

inT32 compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
 
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

inT32 compute_height_modes (STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
 
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
 
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
 
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
 
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)
 
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
 
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return TRUE if enough blobs were far enough away to need a quadratic.

BOOL8 segment_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
 
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns
TRUE if enough blobs were far enough away to need a quadratic.
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
 
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
 
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
 
blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)
 
row_y_order

Sort function to sort rows in y from page top.

int row_y_order (const void *item1, const void *item2)
 
row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

int row_spacing_order (const void *item1, const void *item2)
 
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)
 

Variables

bool textord_heavy_nr = FALSE
 
bool textord_show_initial_rows = FALSE
 
bool textord_show_parallel_rows = FALSE
 
bool textord_show_expanded_rows = FALSE
 
bool textord_show_final_rows = FALSE
 
bool textord_show_final_blobs = FALSE
 
bool textord_test_landscape = FALSE
 
bool textord_parallel_baselines = TRUE
 
bool textord_straight_baselines = FALSE
 
bool textord_old_baselines = TRUE
 
bool textord_old_xheight = FALSE
 
bool textord_fix_xheight_bug = TRUE
 
bool textord_fix_makerow_bug = TRUE
 
bool textord_debug_xheights = FALSE
 
bool textord_biased_skewcalc = TRUE
 
bool textord_interpolating_skew = TRUE
 
int textord_skewsmooth_offset = 4
 
int textord_skewsmooth_offset2 = 1
 
int textord_test_x = -MAX_INT32
 
int textord_test_y = -MAX_INT32
 
int textord_min_blobs_in_row = 4
 
int textord_spline_minblobs = 8
 
int textord_spline_medianwin = 6
 
int textord_max_blob_overlaps = 4
 
int textord_min_xheight = 10
 
double textord_spline_shift_fraction = 0.02
 
double textord_spline_outlier_fraction = 0.1
 
double textord_skew_ile = 0.5
 
double textord_skew_lag = 0.02
 
double textord_linespace_iqrlimit = 0.2
 
double textord_width_limit = 8
 
double textord_chop_width = 1.5
 
double textord_expansion_factor = 1.0
 
double textord_overlap_x = 0.375
 
double textord_minxh = 0.25
 
double textord_min_linesize = 1.25
 
double textord_excess_blobsize = 1.3
 
double textord_occupancy_threshold = 0.4
 
double textord_underline_width = 2.0
 
double textord_min_blob_height_fraction = 0.75
 
double textord_xheight_mode_fraction = 0.4
 
double textord_ascheight_mode_fraction = 0.08
 
double textord_descheight_mode_fraction = 0.08
 
double textord_ascx_ratio_min = 1.25
 
double textord_ascx_ratio_max = 1.8
 
double textord_descx_ratio_min = 0.25
 
double textord_descx_ratio_max = 0.6
 
double textord_xheight_error_margin = 0.1
 
int textord_lms_line_trials = 12
 
bool textord_new_initial_xheight = TRUE
 
bool textord_debug_blob = FALSE
 
const int kMinLeaderCount = 5
 

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double kNoiseSize = 0.5
 
const int kMinSize = 8
 
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
 
void vigorous_noise_removal (TO_BLOCK *block)
 
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 

Macro Definition Documentation

#define MAX_HEIGHT_MODES   12

Definition at line 105 of file makerow.cpp.

Function Documentation

void adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1134 of file makerow.cpp.

1136  {
1137  TO_ROW *row; //current row
1138  float size; //size of row
1139  float ymax; //top of row
1140  float ymin; //bottom of row
1141  TO_ROW_IT row_it = block->get_rows ();
1142 
1144  tprintf("Adjusting row limits for block(%d,%d)\n",
1145  block->block->bounding_box().left(),
1146  block->block->bounding_box().top());
1147  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1148  row = row_it.data ();
1149  size = row->max_y () - row->min_y ();
1151  tprintf("Row at %f has min %f, max %f, size %f\n",
1152  row->intercept(), row->min_y(), row->max_y(), size);
1156  ymax = size * (tesseract::CCStruct::kXHeightFraction +
1159  row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
1160  row->merged = FALSE;
1161  }
1162 }
#define tprintf(...)
Definition: tprintf.h:31
static const double kDescenderFraction
Definition: ccstruct.h:33
float intercept() const
Definition: blobbox.h:584
bool textord_show_expanded_rows
Definition: makerow.cpp:47
BOOL8 merged
Definition: blobbox.h:641
static const double kAscenderFraction
Definition: ccstruct.h:35
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
#define FALSE
Definition: capi.h:29
static const double kXHeightFraction
Definition: ccstruct.h:34
float min_y() const
Definition: blobbox.h:557
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
BLOCK * block
Definition: blobbox.h:773
float max_y() const
Definition: blobbox.h:554
void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
BOOL8  reject_misses,
BOOL8  make_new_rows,
BOOL8  drawing_skew 
)

Definition at line 2310 of file makerow.cpp.

2317  {
2318  OVERLAP_STATE overlap_result; //what to do with it
2319  float ycoord; //current y
2320  float top, bottom; //of blob
2321  float g_length = 1.0f; //from gradient
2322  inT16 row_count; //no of rows
2323  inT16 left_x; //left edge
2324  inT16 last_x; //previous edge
2325  float block_skew; //y delta
2326  float smooth_factor; //for new coords
2327  float near_dist; //dist to nearest row
2328  ICOORD testpt; //testing only
2329  BLOBNBOX *blob; //current blob
2330  TO_ROW *row; //current row
2331  TO_ROW *dest_row = NULL; //row to put blob in
2332  //iterators
2333  BLOBNBOX_IT blob_it = &block->blobs;
2334  TO_ROW_IT row_it = block->get_rows ();
2335 
2336  ycoord =
2337  (block->block->bounding_box ().bottom () +
2338  block->block->bounding_box ().top ()) / 2.0f;
2339  if (gradient != NULL)
2340  g_length = sqrt (1 + *gradient * *gradient);
2341 #ifndef GRAPHICS_DISABLED
2342  if (drawing_skew)
2343  to_win->SetCursor(block->block->bounding_box ().left (), ycoord);
2344 #endif
2345  testpt = ICOORD (textord_test_x, textord_test_y);
2346  blob_it.sort (blob_x_order);
2347  smooth_factor = 1.0;
2348  block_skew = 0.0f;
2349  row_count = row_it.length (); //might have rows
2350  if (!blob_it.empty ()) {
2351  left_x = blob_it.data ()->bounding_box ().left ();
2352  }
2353  else {
2354  left_x = block->block->bounding_box ().left ();
2355  }
2356  last_x = left_x;
2357  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
2358  blob = blob_it.data ();
2359  if (gradient != NULL) {
2360  block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
2361  + *gradient / g_length * blob->bounding_box ().left ();
2362  }
2363  else if (blob->bounding_box ().left () - last_x > block->line_size / 2
2364  && last_x - left_x > block->line_size * 2
2366  // tprintf("Interpolating skew from %g",block_skew);
2367  block_skew *= (float) (blob->bounding_box ().left () - left_x)
2368  / (last_x - left_x);
2369  // tprintf("to %g\n",block_skew);
2370  }
2371  last_x = blob->bounding_box ().left ();
2372  top = blob->bounding_box ().top () - block_skew;
2373  bottom = blob->bounding_box ().bottom () - block_skew;
2374 #ifndef GRAPHICS_DISABLED
2375  if (drawing_skew)
2376  to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
2377 #endif
2378  if (!row_it.empty ()) {
2379  for (row_it.move_to_first ();
2380  !row_it.at_last () && row_it.data ()->min_y () > top;
2381  row_it.forward ());
2382  row = row_it.data ();
2383  if (row->min_y () <= top && row->max_y () >= bottom) {
2384  //any overlap
2385  dest_row = row;
2386  overlap_result = most_overlapping_row (&row_it, dest_row,
2387  top, bottom,
2388  block->line_size,
2389  blob->bounding_box ().
2390  contains (testpt));
2391  if (overlap_result == NEW_ROW && !reject_misses)
2392  overlap_result = ASSIGN;
2393  }
2394  else {
2395  overlap_result = NEW_ROW;
2396  if (!make_new_rows) {
2397  near_dist = row_it.data_relative (-1)->min_y () - top;
2398  //below bottom
2399  if (bottom < row->min_y ()) {
2400  if (row->min_y () - bottom <=
2401  (block->line_spacing -
2403  //done it
2404  overlap_result = ASSIGN;
2405  dest_row = row;
2406  }
2407  }
2408  else if (near_dist > 0
2409  && near_dist < bottom - row->max_y ()) {
2410  row_it.backward ();
2411  dest_row = row_it.data ();
2412  if (dest_row->min_y () - bottom <=
2413  (block->line_spacing -
2415  //done it
2416  overlap_result = ASSIGN;
2417  }
2418  }
2419  else {
2420  if (top - row->max_y () <=
2421  (block->line_spacing -
2422  block->line_size) * (textord_overlap_x +
2424  //done it
2425  overlap_result = ASSIGN;
2426  dest_row = row;
2427  }
2428  }
2429  }
2430  }
2431  if (overlap_result == ASSIGN)
2432  dest_row->add_blob (blob_it.extract (), top, bottom,
2433  block->line_size);
2434  if (overlap_result == NEW_ROW) {
2435  if (make_new_rows && top - bottom < block->max_blob_size) {
2436  dest_row =
2437  new TO_ROW (blob_it.extract (), top, bottom,
2438  block->line_size);
2439  row_count++;
2440  if (bottom > row_it.data ()->min_y ())
2441  row_it.add_before_then_move (dest_row);
2442  //insert in right place
2443  else
2444  row_it.add_after_then_move (dest_row);
2445  smooth_factor =
2446  1.0 / (row_count * textord_skew_lag +
2448  }
2449  else
2450  overlap_result = REJECT;
2451  }
2452  }
2453  else if (make_new_rows && top - bottom < block->max_blob_size) {
2454  overlap_result = NEW_ROW;
2455  dest_row =
2456  new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
2457  row_count++;
2458  row_it.add_after_then_move(dest_row);
2459  smooth_factor = 1.0 / (row_count * textord_skew_lag +
2461  }
2462  else
2463  overlap_result = REJECT;
2464  if (blob->bounding_box ().contains(testpt) && textord_debug_blob) {
2465  if (overlap_result != REJECT) {
2466  tprintf("Test blob assigned to row at (%g,%g) on pass %d\n",
2467  dest_row->min_y(), dest_row->max_y(), pass);
2468  }
2469  else {
2470  tprintf("Test blob assigned to no row on pass %d\n", pass);
2471  }
2472  }
2473  if (overlap_result != REJECT) {
2474  while (!row_it.at_first() &&
2475  row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
2476  row = row_it.extract();
2477  row_it.backward();
2478  row_it.add_before_then_move(row);
2479  }
2480  while (!row_it.at_last() &&
2481  row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
2482  row = row_it.extract();
2483  row_it.forward();
2484  // Keep rows in order.
2485  row_it.add_after_then_move(row);
2486  }
2487  BLOBNBOX_IT added_blob_it(dest_row->blob_list());
2488  added_blob_it.move_to_last();
2489  TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
2490  if (dest_row->blob_list()->singleton() ||
2491  !prev_box.major_x_overlap(blob->bounding_box())) {
2492  block_skew = (1 - smooth_factor) * block_skew
2493  + smooth_factor * (blob->bounding_box().bottom() -
2494  dest_row->initial_min_y());
2495  }
2496  }
2497  }
2498  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
2499  if (row_it.data()->blob_list()->empty())
2500  delete row_it.extract(); // Discard empty rows.
2501  }
2502 }
double textord_overlap_x
Definition: makerow.cpp:81
double textord_skew_lag
Definition: makerow.cpp:75
#define tprintf(...)
Definition: tprintf.h:31
void DrawTo(int x, int y)
Definition: scrollview.cpp:531
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2605
static const double kDescenderFraction
Definition: ccstruct.h:33
bool textord_interpolating_skew
Definition: makerow.cpp:59
Definition: makerow.h:31
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
Definition: makerow.h:32
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:726
static const double kAscenderFraction
Definition: ccstruct.h:35
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
inT16 left() const
Definition: rect.h:68
void SetCursor(int x, int y)
Definition: scrollview.cpp:525
int textord_skewsmooth_offset2
Definition: makerow.cpp:61
int textord_test_x
Definition: makerow.cpp:62
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
OVERLAP_STATE
Definition: makerow.h:29
integer coordinate
Definition: points.h:30
int textord_skewsmooth_offset
Definition: makerow.cpp:60
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
inT16 bottom() const
Definition: rect.h:61
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: rect.h:30
float line_spacing
Definition: blobbox.h:775
bool contains(const FCOORD pt) const
Definition: rect.h:323
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
Definition: makerow.cpp:2510
inT16 top() const
Definition: rect.h:54
bool textord_debug_blob
Definition: makerow.cpp:103
float min_y() const
Definition: blobbox.h:557
int textord_test_y
Definition: makerow.cpp:63
BLOCK * block
Definition: blobbox.h:773
float max_y() const
Definition: blobbox.h:554
float line_size
Definition: blobbox.h:781
short inT16
Definition: host.h:100
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
float initial_min_y() const
Definition: blobbox.h:563
int blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2605 of file makerow.cpp.

2607  {
2608  //converted ptr
2609  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
2610  //converted ptr
2611  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
2612 
2613  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
2614  return -1;
2615  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
2616  return 1;
2617  else
2618  return 0;
2619 }
inT16 left() const
Definition: rect.h:68
const TBOX & bounding_box() const
Definition: blobbox.h:215
void cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 525 of file makerow.cpp.

532  {
533  //iterators
534  BLOBNBOX_IT blob_it = &block->blobs;
535  TO_ROW_IT row_it = block->get_rows ();
536 
537 #ifndef GRAPHICS_DISABLED
538  if (textord_show_parallel_rows && testing_on) {
539  if (to_win == NULL)
540  create_to_win(page_tr);
541  }
542 #endif
543  //get row coords
544  fit_parallel_rows(block,
545  gradient,
546  rotation,
547  block_edge,
548  textord_show_parallel_rows &&testing_on);
550  gradient,
551  rotation,
552  block_edge,
553  textord_show_parallel_rows &&testing_on);
554  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
555  blob_it.set_to_list (&block->blobs);
556  row_it.set_to_list (block->get_rows ());
557  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
558  blob_it.add_list_after (row_it.data ()->blob_list ());
559  //give blobs back
560  assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);
561  //now new rows must be genuine
562  blob_it.set_to_list (&block->blobs);
563  blob_it.add_list_after (&block->large_blobs);
564  assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
565  //safe to use big ones now
566  blob_it.set_to_list (&block->blobs);
567  //throw all blobs in
568  blob_it.add_list_after (&block->noise_blobs);
569  blob_it.add_list_after (&block->small_blobs);
570  assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
571 }
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2310
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:976
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:578
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
bool textord_show_parallel_rows
Definition: makerow.cpp:46
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:1962
#define FALSE
Definition: capi.h:29
#define TRUE
Definition: capi.h:28
#define NULL
Definition: host.h:144
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void compute_dropout_distances ( inT32 occupation,
inT32 thresholds,
inT32  line_count 
)

Definition at line 929 of file makerow.cpp.

933  {
934  inT32 line_index; //of thresholds line
935  inT32 distance; //from prev dropout
936  inT32 next_dist; //to next dropout
937  inT32 back_index; //for back filling
938  inT32 prev_threshold; //before overwrite
939 
940  distance = -line_count;
941  line_index = 0;
942  do {
943  do {
944  distance--;
945  prev_threshold = thresholds[line_index];
946  //distance from prev
947  thresholds[line_index] = distance;
948  line_index++;
949  }
950  while (line_index < line_count
951  && (occupation[line_index] < thresholds[line_index]
952  || occupation[line_index - 1] >= prev_threshold));
953  if (line_index < line_count) {
954  back_index = line_index - 1;
955  next_dist = 1;
956  while (next_dist < -distance && back_index >= 0) {
957  thresholds[back_index] = next_dist;
958  back_index--;
959  next_dist++;
960  distance++;
961  }
962  distance = 1;
963  }
964  }
965  while (line_index < line_count);
966 }
int inT32
Definition: host.h:102
inT32 compute_height_modes ( STATS heights,
inT32  min_height,
inT32  max_height,
inT32 modes,
inT32  maxmodes 
)

Definition at line 1654 of file makerow.cpp.

1658  { // size of modes
1659  inT32 pile_count; // no in source pile
1660  inT32 src_count; // no of source entries
1661  inT32 src_index; // current entry
1662  inT32 least_count; // height of smalllest
1663  inT32 least_index; // index of least
1664  inT32 dest_count; // index in modes
1665 
1666  src_count = max_height + 1 - min_height;
1667  dest_count = 0;
1668  least_count = MAX_INT32;
1669  least_index = -1;
1670  for (src_index = 0; src_index < src_count; src_index++) {
1671  pile_count = heights->pile_count(min_height + src_index);
1672  if (pile_count > 0) {
1673  if (dest_count < maxmodes) {
1674  if (pile_count < least_count) {
1675  // find smallest in array
1676  least_count = pile_count;
1677  least_index = dest_count;
1678  }
1679  modes[dest_count++] = min_height + src_index;
1680  } else if (pile_count >= least_count) {
1681  while (least_index < maxmodes - 1) {
1682  modes[least_index] = modes[least_index + 1];
1683  // shuffle up
1684  least_index++;
1685  }
1686  // new one on end
1687  modes[maxmodes - 1] = min_height + src_index;
1688  if (pile_count == least_count) {
1689  // new smallest
1690  least_index = maxmodes - 1;
1691  } else {
1692  least_count = heights->pile_count(modes[0]);
1693  least_index = 0;
1694  for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1695  pile_count = heights->pile_count(modes[dest_count]);
1696  if (pile_count < least_count) {
1697  // find smallest
1698  least_count = pile_count;
1699  least_index = dest_count;
1700  }
1701  }
1702  }
1703  }
1704  }
1705  }
1706  return dest_count;
1707 }
#define MAX_INT32
Definition: host.h:120
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
int inT32
Definition: host.h:102
void compute_line_occupation ( TO_BLOCK block,
float  gradient,
inT32  min_y,
inT32  max_y,
inT32 occupation,
inT32 deltas 
)

Definition at line 782 of file makerow.cpp.

789  {
790  inT32 line_count; //maxy-miny+1
791  inT32 line_index; //of scan line
792  int index; //array index for daft compilers
793  float top, bottom; //coords of blob
794  inT32 width; //of blob
795  TO_ROW *row; //current row
796  TO_ROW_IT row_it = block->get_rows ();
797  BLOBNBOX *blob; //current blob
798  BLOBNBOX_IT blob_it; //iterator
799  float length; //of skew vector
800  TBOX blob_box; //bounding box
801  FCOORD rotation; //inverse of skew
802 
803  line_count = max_y - min_y + 1;
804  length = sqrt (gradient * gradient + 1);
805  rotation = FCOORD (1 / length, -gradient / length);
806  for (line_index = 0; line_index < line_count; line_index++)
807  deltas[line_index] = 0;
808  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
809  row = row_it.data ();
810  blob_it.set_to_list (row->blob_list ());
811  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
812  blob_it.forward ()) {
813  blob = blob_it.data ();
814  blob_box = blob->bounding_box ();
815  blob_box.rotate (rotation);//de-skew it
816  top = blob_box.top ();
817  bottom = blob_box.bottom ();
818  width =
819  (inT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ()));
820  if ((inT32) floor (bottom) < min_y
821  || (inT32) floor (bottom) - min_y >= line_count)
822  fprintf (stderr,
823  "Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT ","
824  INT32FORMAT ")\n", (inT32) floor (bottom), min_y, max_y);
825  //count transitions
826  index = (inT32) floor (bottom) - min_y;
827  deltas[index] += width;
828  if ((inT32) floor (top) < min_y
829  || (inT32) floor (top) - min_y >= line_count)
830  fprintf (stderr,
831  "Bad y coord of top, " INT32FORMAT "(" INT32FORMAT ","
832  INT32FORMAT ")\n", (inT32) floor (top), min_y, max_y);
833  index = (inT32) floor (top) - min_y;
834  deltas[index] -= width;
835  }
836  }
837  occupation[0] = deltas[0];
838  for (line_index = 1; line_index < line_count; line_index++)
839  occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
840 }
float FLOAT32
Definition: host.h:111
#define INT32FORMAT
Definition: host.h:115
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 left() const
Definition: rect.h:68
inT16 bottom() const
Definition: rect.h:61
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
Definition: points.h:189
int inT32
Definition: host.h:102
void rotate(const FCOORD &vec)
Definition: rect.h:189
void compute_occupation_threshold ( inT32  low_window,
inT32  high_window,
inT32  line_count,
inT32 occupation,
inT32 thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 848 of file makerow.cpp.

854  {
855  inT32 line_index; //of thresholds line
856  inT32 low_index; //in occupation
857  inT32 high_index; //in occupation
858  inT32 sum; //current average
859  inT32 divisor; //to get thresholds
860  inT32 min_index; //of min occ
861  inT32 min_occ; //min in locality
862  inT32 test_index; //for finding min
863 
864  divisor =
865  (inT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
866  if (low_window + high_window < line_count) {
867  for (sum = 0, high_index = 0; high_index < low_window; high_index++)
868  sum += occupation[high_index];
869  for (low_index = 0; low_index < high_window; low_index++, high_index++)
870  sum += occupation[high_index];
871  min_occ = occupation[0];
872  min_index = 0;
873  for (test_index = 1; test_index < high_index; test_index++) {
874  if (occupation[test_index] <= min_occ) {
875  min_occ = occupation[test_index];
876  min_index = test_index; //find min in region
877  }
878  }
879  for (line_index = 0; line_index < low_window; line_index++)
880  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
881  //same out to end
882  for (low_index = 0; high_index < line_count; low_index++, high_index++) {
883  sum -= occupation[low_index];
884  sum += occupation[high_index];
885  if (occupation[high_index] <= min_occ) {
886  //find min in region
887  min_occ = occupation[high_index];
888  min_index = high_index;
889  }
890  //lost min from region
891  if (min_index <= low_index) {
892  min_occ = occupation[low_index + 1];
893  min_index = low_index + 1;
894  for (test_index = low_index + 2; test_index <= high_index;
895  test_index++) {
896  if (occupation[test_index] <= min_occ) {
897  min_occ = occupation[test_index];
898  //find min in region
899  min_index = test_index;
900  }
901  }
902  }
903  thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
904  }
905  }
906  else {
907  min_occ = occupation[0];
908  min_index = 0;
909  for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
910  if (occupation[low_index] < min_occ) {
911  min_occ = occupation[low_index];
912  min_index = low_index;
913  }
914  sum += occupation[low_index];
915  }
916  line_index = 0;
917  }
918  for (; line_index < line_count; line_index++)
919  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
920  //same out to end
921 }
double textord_occupancy_threshold
Definition: makerow.cpp:86
int inT32
Definition: host.h:102
void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 287 of file makerow.cpp.

291  {
292  inT32 row_count; //total rows
293  inT32 blob_count; //total_blobs
294  inT32 row_err; //integer error
295  float *gradients; //of rows
296  float *errors; //of rows
297  inT32 row_index; //of total
298  TO_ROW *row; //current row
299  TO_BLOCK_IT block_it = blocks; //iterator
300  TO_ROW_IT row_it;
301 
302  row_count = 0;
303  blob_count = 0;
304  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
305  block_it.forward ()) {
306  POLY_BLOCK* pb = block_it.data()->block->poly_block();
307  if (pb != NULL && !pb->IsText())
308  continue; // Pretend non-text blocks don't exist.
309  row_count += block_it.data ()->get_rows ()->length ();
310  //count up rows
311  row_it.set_to_list (block_it.data ()->get_rows ());
312  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
313  blob_count += row_it.data ()->blob_list ()->length ();
314  }
315  if (row_count == 0) {
316  page_m = 0.0f;
317  page_err = 0.0f;
318  return;
319  }
320  gradients = (float *) alloc_mem (blob_count * sizeof (float));
321  //get mem
322  errors = (float *) alloc_mem (blob_count * sizeof (float));
323  if (gradients == NULL || errors == NULL)
324  MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);
325 
326  row_index = 0;
327  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
328  block_it.forward ()) {
329  POLY_BLOCK* pb = block_it.data()->block->poly_block();
330  if (pb != NULL && !pb->IsText())
331  continue; // Pretend non-text blocks don't exist.
332  row_it.set_to_list (block_it.data ()->get_rows ());
333  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
334  row = row_it.data ();
335  blob_count = row->blob_list ()->length ();
336  row_err = (inT32) ceil (row->line_error ());
337  if (row_err <= 0)
338  row_err = 1;
340  blob_count /= row_err;
341  for (blob_count /= row_err; blob_count > 0; blob_count--) {
342  gradients[row_index] = row->line_m ();
343  errors[row_index] = row->line_error ();
344  row_index++;
345  }
346  }
347  else if (blob_count >= textord_min_blobs_in_row) {
348  //get gradient
349  gradients[row_index] = row->line_m ();
350  errors[row_index] = row->line_error ();
351  row_index++;
352  }
353  }
354  }
355  if (row_index == 0) {
356  //desperate
357  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
358  block_it.forward ()) {
359  POLY_BLOCK* pb = block_it.data()->block->poly_block();
360  if (pb != NULL && !pb->IsText())
361  continue; // Pretend non-text blocks don't exist.
362  row_it.set_to_list (block_it.data ()->get_rows ());
363  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
364  row_it.forward ()) {
365  row = row_it.data ();
366  gradients[row_index] = row->line_m ();
367  errors[row_index] = row->line_error ();
368  row_index++;
369  }
370  }
371  }
372  row_count = row_index;
373  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
374  gradients, row_count);
375  page_m = gradients[row_index];
376  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
377  errors, row_count);
378  page_err = errors[row_index];
379  free_mem(gradients);
380  free_mem(errors);
381 }
void free_mem(void *oldchunk)
Definition: memry.cpp:55
double textord_skew_ile
Definition: makerow.cpp:74
int textord_min_blobs_in_row
Definition: makerow.cpp:64
bool IsText() const
Definition: polyblk.h:52
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
Definition: errcode.h:30
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:642
float line_m() const
Definition: blobbox.h:566
bool textord_biased_skewcalc
Definition: makerow.cpp:58
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
#define NULL
Definition: host.h:144
float line_error() const
Definition: blobbox.h:572
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
int inT32
Definition: host.h:102
inT32 compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS asc_heights 
)

Definition at line 1594 of file makerow.cpp.

1595  {
1596  // Count how many potential ascenders are in this row.
1597  int i_min = asc_heights->min_bucket();
1598  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
1599  i_min = static_cast<int>(
1600  floor(row->xheight * textord_ascx_ratio_min + 0.5));
1601  }
1602  int i_max = asc_heights->max_bucket();
1603  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
1604  i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
1605  }
1606  int num_potential_asc = 0;
1607  for (int i = i_min; i <= i_max; ++i) {
1608  num_potential_asc += asc_heights->pile_count(i);
1609  }
1610  inT32 min_height =
1611  static_cast<inT32>(floor(row->xheight * textord_descx_ratio_min + 0.5));
1612  inT32 max_height =
1613  static_cast<inT32>(floor(row->xheight * textord_descx_ratio_max));
1614  float xcentre; // centre of blob
1615  float height; // height of blob
1616  BLOBNBOX_IT blob_it = row->blob_list();
1617  BLOBNBOX *blob; // current blob
1618  STATS heights (min_height, max_height + 1);
1619  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1620  blob = blob_it.data();
1621  if (!blob->joined_to_prev()) {
1622  xcentre = (blob->bounding_box().left() +
1623  blob->bounding_box().right()) / 2.0f;
1624  height = (gradient * xcentre + row->parallel_c() -
1625  blob->bounding_box().bottom());
1626  if (height >= min_height && height <= max_height)
1627  heights.add(static_cast<int>(floor(height + 0.5)), 1);
1628  }
1629  }
1630  int blob_index = heights.mode(); // find mode
1631  int blob_count = heights.pile_count(blob_index); // get count of mode
1632  float total_fraction =
1634  if (static_cast<float>(blob_count + num_potential_asc) <
1635  xheight_blob_count * total_fraction) {
1636  blob_count = 0;
1637  }
1638  int descdrop = blob_count > 0 ? -blob_index : 0;
1639  if (textord_debug_xheights) {
1640  tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
1641  descdrop, num_potential_asc, blob_count);
1642  heights.print();
1643  }
1644  return descdrop;
1645 }
inT32 min_bucket() const
Definition: statistc.cpp:209
double textord_descx_ratio_max
Definition: makerow.cpp:99
bool joined_to_prev() const
Definition: blobbox.h:241
float parallel_c() const
Definition: blobbox.h:575
#define tprintf(...)
Definition: tprintf.h:31
Definition: statistc.h:33
inT32 max_bucket() const
Definition: statistc.cpp:225
double textord_ascx_ratio_min
Definition: makerow.cpp:96
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
bool textord_debug_xheights
Definition: makerow.cpp:57
double textord_ascx_ratio_max
Definition: makerow.cpp:97
double textord_descx_ratio_min
Definition: makerow.cpp:98
inT16 left() const
Definition: rect.h:68
inT16 bottom() const
Definition: rect.h:61
double textord_ascheight_mode_fraction
Definition: makerow.cpp:93
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
const TBOX & bounding_box() const
Definition: blobbox.h:215
float xheight
Definition: blobbox.h:653
double textord_descheight_mode_fraction
Definition: makerow.cpp:95
int inT32
Definition: host.h:102
void compute_row_stats ( TO_BLOCK block,
BOOL8  testing_on 
)

Definition at line 1170 of file makerow.cpp.

1173  {
1174  inT32 row_index; //of median
1175  TO_ROW *row; //current row
1176  TO_ROW *prev_row; //previous row
1177  float iqr; //inter quartile range
1178  TO_ROW_IT row_it = block->get_rows ();
1179  //number of rows
1180  inT16 rowcount = row_it.length ();
1181  TO_ROW **rows; //for choose nth
1182 
1183  rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
1184  if (rows == NULL)
1185  MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
1186  rowcount = 0;
1187  prev_row = NULL;
1188  row_it.move_to_last (); //start at bottom
1189  do {
1190  row = row_it.data ();
1191  if (prev_row != NULL) {
1192  rows[rowcount++] = prev_row;
1193  prev_row->spacing = row->intercept () - prev_row->intercept ();
1194  if (testing_on)
1195  tprintf ("Row at %g yields spacing of %g\n",
1196  row->intercept (), prev_row->spacing);
1197  }
1198  prev_row = row;
1199  row_it.backward ();
1200  }
1201  while (!row_it.at_last ());
1202  block->key_row = prev_row;
1203  block->baseline_offset =
1204  fmod (prev_row->parallel_c (), block->line_spacing);
1205  if (testing_on)
1206  tprintf ("Blob based spacing=(%g,%g), offset=%g",
1207  block->line_size, block->line_spacing, block->baseline_offset);
1208  if (rowcount > 0) {
1209  row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
1210  sizeof (TO_ROW *), row_spacing_order);
1211  iqr = rows[row_index]->spacing;
1212  row_index = choose_nth_item (rowcount / 4, rows, rowcount,
1213  sizeof (TO_ROW *), row_spacing_order);
1214  iqr -= rows[row_index]->spacing;
1215  row_index = choose_nth_item (rowcount / 2, rows, rowcount,
1216  sizeof (TO_ROW *), row_spacing_order);
1217  block->key_row = rows[row_index];
1218  if (testing_on)
1219  tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
1220  if (rowcount > 2
1221  && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
1223  if (rows[row_index]->spacing < block->line_spacing
1224  && rows[row_index]->spacing > block->line_size)
1225  //within range
1226  block->line_size = rows[row_index]->spacing;
1227  //spacing=size
1228  else if (rows[row_index]->spacing > block->line_spacing)
1229  block->line_size = block->line_spacing;
1230  //too big so use max
1231  }
1232  else {
1233  if (rows[row_index]->spacing < block->line_spacing)
1234  block->line_size = rows[row_index]->spacing;
1235  else
1236  block->line_size = block->line_spacing;
1237  //too big so use max
1238  }
1239  if (block->line_size < textord_min_xheight)
1240  block->line_size = (float) textord_min_xheight;
1241  block->line_spacing = rows[row_index]->spacing;
1242  block->max_blob_size =
1244  }
1245  block->baseline_offset = fmod (rows[row_index]->intercept (),
1246  block->line_spacing);
1247  }
1248  if (testing_on)
1249  tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
1250  block->line_size, block->line_spacing, block->baseline_offset);
1251  free_mem(rows);
1252 }
void free_mem(void *oldchunk)
Definition: memry.cpp:55
bool textord_new_initial_xheight
Definition: makerow.cpp:102
TO_ROW * key_row
Definition: blobbox.h:794
float parallel_c() const
Definition: blobbox.h:575
#define tprintf(...)
Definition: tprintf.h:31
double textord_linespace_iqrlimit
Definition: makerow.cpp:76
float intercept() const
Definition: blobbox.h:584
Definition: errcode.h:30
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:642
double textord_excess_blobsize
Definition: makerow.cpp:85
int row_spacing_order(const void *item1, const void *item2)
Definition: makerow.cpp:2649
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
float spacing
Definition: blobbox.h:652
int textord_min_xheight
Definition: makerow.cpp:69
float baseline_offset
Definition: blobbox.h:783
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float line_spacing
Definition: blobbox.h:775
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
#define NULL
Definition: host.h:144
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
float max_blob_size
Definition: blobbox.h:782
float line_size
Definition: blobbox.h:781
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
int compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1498 of file makerow.cpp.

1500  {
1501  int blob_index = heights->mode(); // find mode
1502  int blob_count = heights->pile_count(blob_index); // get count of mode
1503  if (textord_debug_xheights) {
1504  tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
1505  min_height, max_height, blob_index, blob_count,
1506  heights->get_total());
1507  heights->print();
1508  floating_heights->print();
1509  }
1510  if (blob_count == 0) return 0;
1511  int modes[MAX_HEIGHT_MODES]; // biggest piles
1512  bool in_best_pile = FALSE;
1513  int prev_size = -MAX_INT32;
1514  int best_count = 0;
1515  int mode_count = compute_height_modes(heights, min_height, max_height,
1516  modes, MAX_HEIGHT_MODES);
1517  if (cap_only && mode_count > 1)
1518  mode_count = 1;
1519  int x;
1520  if (textord_debug_xheights) {
1521  tprintf("found %d modes: ", mode_count);
1522  for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
1523  tprintf("\n");
1524  }
1525 
1526  for (x = 0; x < mode_count - 1; x++) {
1527  if (modes[x] != prev_size + 1)
1528  in_best_pile = FALSE; // had empty height
1529  int modes_x_count = heights->pile_count(modes[x]) -
1530  floating_heights->pile_count(modes[x]);
1531  if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
1532  (in_best_pile || modes_x_count > best_count)) {
1533  for (int asc = x + 1; asc < mode_count; asc++) {
1534  float ratio =
1535  static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1536  if (textord_ascx_ratio_min < ratio &&
1537  ratio < textord_ascx_ratio_max &&
1538  (heights->pile_count(modes[asc]) >=
1539  blob_count * textord_ascheight_mode_fraction)) {
1540  if (modes_x_count > best_count) {
1541  in_best_pile = true;
1542  best_count = modes_x_count;
1543  }
1544  if (textord_debug_xheights) {
1545  tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
1546  modes[x], modes[asc]-modes[x], modes_x_count, ratio);
1547  }
1548  prev_size = modes[x];
1549  *xheight = static_cast<float>(modes[x]);
1550  *ascrise = static_cast<float>(modes[asc] - modes[x]);
1551  }
1552  }
1553  }
1554  }
1555  if (*xheight == 0) { // single mode
1556  // Remove counts of the "floating" blobs (the one whose height is too
1557  // small in relation to it's top end of the bounding box) from heights
1558  // before computing the single-mode xheight.
1559  // Restore the counts in heights after the mode is found, since
1560  // floating blobs might be useful for determining potential ascenders
1561  // in compute_row_descdrop().
1562  if (floating_heights->get_total() > 0) {
1563  for (x = min_height; x < max_height; ++x) {
1564  heights->add(x, -(floating_heights->pile_count(x)));
1565  }
1566  blob_index = heights->mode(); // find the modified mode
1567  for (x = min_height; x < max_height; ++x) {
1568  heights->add(x, floating_heights->pile_count(x));
1569  }
1570  }
1571  *xheight = static_cast<float>(blob_index);
1572  *ascrise = 0.0f;
1573  best_count = heights->pile_count(blob_index);
1575  tprintf("Single mode xheight set to %g\n", *xheight);
1576  } else if (textord_debug_xheights) {
1577  tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
1578  }
1579  return best_count;
1580 }
inT32 get_total() const
Definition: statistc.h:86
#define tprintf(...)
Definition: tprintf.h:31
inT32 mode() const
Definition: statistc.cpp:118
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
double textord_ascx_ratio_min
Definition: makerow.cpp:96
bool textord_debug_xheights
Definition: makerow.cpp:57
double textord_ascx_ratio_max
Definition: makerow.cpp:97
double textord_xheight_mode_fraction
Definition: makerow.cpp:91
#define MAX_INT32
Definition: host.h:120
#define FALSE
Definition: capi.h:29
double textord_ascheight_mode_fraction
Definition: makerow.cpp:93
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
void print() const
Definition: statistc.cpp:538
inT32 compute_height_modes(STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
Definition: makerow.cpp:1654
#define MAX_HEIGHT_MODES
Definition: makerow.cpp:105
void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1716 of file makerow.cpp.

1717  {
1718  ROW_CATEGORY row_category = get_row_category(row);
1719  if (textord_debug_xheights) {
1720  tprintf("correcting row xheight: row->xheight %.4f"
1721  ", row->acrise %.4f row->descdrop %.4f\n",
1722  row->xheight, row->ascrise, row->descdrop);
1723  }
1724  bool normal_xheight =
1726  bool cap_xheight =
1727  within_error_margin(row->xheight, xheight + ascrise,
1729  // Use the average xheight/ascrise for the following cases:
1730  // -- the xheight of the row could not be determined at all
1731  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
1732  // and its xheight is close to either cap height or average xheight
1733  // -- the row does not have ascenders or descenders, but its xheight
1734  // is close to the average block xheight (e.g. row with "www.mmm.com")
1735  if (row_category == ROW_ASCENDERS_FOUND) {
1736  if (row->descdrop >= 0.0) {
1737  row->descdrop = row->xheight * (descdrop / xheight);
1738  }
1739  } else if (row_category == ROW_INVALID ||
1740  (row_category == ROW_DESCENDERS_FOUND &&
1741  (normal_xheight || cap_xheight)) ||
1742  (row_category == ROW_UNKNOWN && normal_xheight)) {
1743  if (textord_debug_xheights) tprintf("using average xheight\n");
1744  row->xheight = xheight;
1745  row->ascrise = ascrise;
1746  row->descdrop = descdrop;
1747  } else if (row_category == ROW_DESCENDERS_FOUND) {
1748  // Assume this is a row with mostly lowercase letters and it's xheight
1749  // is computed correctly (unfortunately there is no way to distinguish
1750  // this from the case when descenders are found, but the most common
1751  // height is capheight).
1752  if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
1753  row->ascrise = row->xheight * (ascrise / xheight);
1754  } else if (row_category == ROW_UNKNOWN) {
1755  // Otherwise assume this row is an all-caps or small-caps row
1756  // and adjust xheight and ascrise of the row.
1757 
1758  row->all_caps = true;
1759  if (cap_xheight) { // regular all caps
1760  if (textord_debug_xheights) tprintf("all caps\n");
1761  row->xheight = xheight;
1762  row->ascrise = ascrise;
1763  row->descdrop = descdrop;
1764  } else { // small caps or caps with an odd xheight
1765  if (textord_debug_xheights) {
1766  if (row->xheight < xheight + ascrise && row->xheight > xheight) {
1767  tprintf("small caps\n");
1768  } else {
1769  tprintf("all caps with irregular xheight\n");
1770  }
1771  }
1772  row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
1773  row->xheight -= row->ascrise;
1774  row->descdrop = row->xheight * (descdrop / xheight);
1775  }
1776  }
1777  if (textord_debug_xheights) {
1778  tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
1779  " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
1780  }
1781 }
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:129
#define tprintf(...)
Definition: tprintf.h:31
double textord_xheight_error_margin
Definition: makerow.cpp:100
bool textord_debug_xheights
Definition: makerow.cpp:57
BOOL8 all_caps
Definition: blobbox.h:642
ROW_CATEGORY
Definition: makerow.h:36
float ascrise
Definition: blobbox.h:655
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:123
float xheight
Definition: blobbox.h:653
float descdrop
Definition: blobbox.h:656
void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 578 of file makerow.cpp.

584  {
585  TBOX block_box; //deskewed block
586  inT32 *deltas; //change in occupation
587  inT32 *occupation; //of pixel coords
588  inT32 max_y; //in block
589  inT32 min_y;
590  inT32 line_index; //of scan line
591  inT32 line_count; //no of scan lines
592  inT32 distance; //to drop-out
593  inT32 xleft; //of block
594  inT32 ybottom; //of block
595  TO_ROW *row; //current row
596  TO_ROW_IT row_it = block->get_rows ();
597  BLOBNBOX_IT blob_it = &block->blobs;
598 
599  if (row_it.length () == 0)
600  return; //empty block
601  block_box = deskew_block_coords (block, gradient);
602  xleft = block->block->bounding_box ().left ();
603  ybottom = block->block->bounding_box ().bottom ();
604  min_y = block_box.bottom () - 1;
605  max_y = block_box.top () + 1;
606  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
607  line_index = (inT32) floor (row_it.data ()->intercept ());
608  if (line_index <= min_y)
609  min_y = line_index - 1;
610  if (line_index >= max_y)
611  max_y = line_index + 1;
612  }
613  line_count = max_y - min_y + 1;
614  if (line_count <= 0)
615  return; //empty block
616  deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32));
617  occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32));
618  if (deltas == NULL || occupation == NULL)
619  MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);
620 
621  compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
623  ceil (block->line_spacing *
626  (inT32) ceil (block->line_spacing *
629  max_y - min_y + 1, occupation, deltas);
630 #ifndef GRAPHICS_DISABLED
631  if (testing_on) {
632  draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
633  }
634 #endif
635  compute_dropout_distances(occupation, deltas, line_count);
636  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
637  row = row_it.data ();
638  line_index = (inT32) floor (row->intercept ());
639  distance = deltas[line_index - min_y];
640  if (find_best_dropout_row (row, distance, block->line_spacing / 2,
641  line_index, &row_it, testing_on)) {
642 #ifndef GRAPHICS_DISABLED
643  if (testing_on)
644  plot_parallel_row(row, gradient, block_edge,
645  ScrollView::WHITE, rotation);
646 #endif
647  blob_it.add_list_after (row_it.data ()->blob_list ());
648  delete row_it.extract (); //too far away
649  }
650  }
651  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
652  blob_it.add_list_after (row_it.data ()->blob_list ());
653  }
654 
655  free_mem(deltas);
656  free_mem(occupation);
657 }
BOOL8 find_best_dropout_row(TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
Definition: makerow.cpp:666
void compute_dropout_distances(inT32 *occupation, inT32 *thresholds, inT32 line_count)
Definition: makerow.cpp:929
void free_mem(void *oldchunk)
Definition: memry.cpp:55
static const double kDescenderFraction
Definition: ccstruct.h:33
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
float intercept() const
Definition: blobbox.h:584
Definition: errcode.h:30
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:746
static const double kAscenderFraction
Definition: ccstruct.h:35
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
inT16 bottom() const
Definition: rect.h:61
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void draw_occupation(inT32 xleft, inT32 ybottom, inT32 min_y, inT32 max_y, inT32 occupation[], inT32 thresholds[])
Definition: drawtord.cpp:166
Definition: rect.h:30
float line_spacing
Definition: blobbox.h:775
static const double kXHeightFraction
Definition: ccstruct.h:34
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
#define NULL
Definition: host.h:144
inT16 top() const
Definition: rect.h:54
void compute_occupation_threshold(inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
Definition: makerow.cpp:848
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
BLOCK * block
Definition: blobbox.h:773
int inT32
Definition: host.h:102
void compute_line_occupation(TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
Definition: makerow.cpp:782
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
TBOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 746 of file makerow.cpp.

749  {
750  TBOX result; //block bounds
751  TBOX blob_box; //of block
752  FCOORD rotation; //deskew vector
753  float length; //of gradient vector
754  TO_ROW_IT row_it = block->get_rows ();
755  TO_ROW *row; //current row
756  BLOBNBOX *blob; //current blob
757  BLOBNBOX_IT blob_it; //iterator
758 
759  length = sqrt (gradient * gradient + 1);
760  rotation = FCOORD (1 / length, -gradient / length);
761  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
762  row = row_it.data ();
763  blob_it.set_to_list (row->blob_list ());
764  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
765  blob_it.forward ()) {
766  blob = blob_it.data ();
767  blob_box = blob->bounding_box ();
768  blob_box.rotate (rotation);//de-skew it
769  result += blob_box;
770  }
771  }
772  return result;
773 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
Definition: points.h:189
void rotate(const FCOORD &vec)
Definition: rect.h:189
void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 976 of file makerow.cpp.

983  {
984  BOOL8 swallowed_row; //eaten a neighbour
985  float y_max, y_min; //new row limits
986  float y_bottom, y_top; //allowed limits
987  TO_ROW *test_row; //next row
988  TO_ROW *row; //current row
989  //iterators
990  BLOBNBOX_IT blob_it = &block->blobs;
991  TO_ROW_IT row_it = block->get_rows ();
992 
993 #ifndef GRAPHICS_DISABLED
994  if (textord_show_expanded_rows && testing_on) {
995  if (to_win == NULL)
996  create_to_win(page_tr);
997  }
998 #endif
999 
1000  adjust_row_limits(block); //shift min,max.
1002  if (block->get_rows ()->length () == 0)
1003  return;
1004  compute_row_stats(block, textord_show_expanded_rows &&testing_on);
1005  }
1006  assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
1007  //get real membership
1008  if (block->get_rows ()->length () == 0)
1009  return;
1010  fit_parallel_rows(block,
1011  gradient,
1012  rotation,
1013  block_edge,
1014  textord_show_expanded_rows &&testing_on);
1016  compute_row_stats(block, textord_show_expanded_rows &&testing_on);
1017  row_it.move_to_last ();
1018  do {
1019  row = row_it.data ();
1020  y_max = row->max_y (); //get current limits
1021  y_min = row->min_y ();
1022  y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
1024  y_top = row->intercept () + block->line_size * textord_expansion_factor *
1027  if (y_min > y_bottom) { //expansion allowed
1028  if (textord_show_expanded_rows && testing_on)
1029  tprintf("Expanding bottom of row at %f from %f to %f\n",
1030  row->intercept(), y_min, y_bottom);
1031  //expandable
1032  swallowed_row = TRUE;
1033  while (swallowed_row && !row_it.at_last ()) {
1034  swallowed_row = FALSE;
1035  //get next one
1036  test_row = row_it.data_relative (1);
1037  //overlaps space
1038  if (test_row->max_y () > y_bottom) {
1039  if (test_row->min_y () > y_bottom) {
1040  if (textord_show_expanded_rows && testing_on)
1041  tprintf("Eating row below at %f\n", test_row->intercept());
1042  row_it.forward ();
1043 #ifndef GRAPHICS_DISABLED
1044  if (textord_show_expanded_rows && testing_on)
1045  plot_parallel_row(test_row,
1046  gradient,
1047  block_edge,
1049  rotation);
1050 #endif
1051  blob_it.set_to_list (row->blob_list ());
1052  blob_it.add_list_after (test_row->blob_list ());
1053  //swallow complete row
1054  delete row_it.extract ();
1055  row_it.backward ();
1056  swallowed_row = TRUE;
1057  }
1058  else if (test_row->max_y () < y_min) {
1059  //shorter limit
1060  y_bottom = test_row->max_y ();
1061  if (textord_show_expanded_rows && testing_on)
1062  tprintf("Truncating limit to %f due to touching row at %f\n",
1063  y_bottom, test_row->intercept());
1064  }
1065  else {
1066  y_bottom = y_min; //can't expand it
1067  if (textord_show_expanded_rows && testing_on)
1068  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1069  y_bottom, test_row->intercept());
1070  }
1071  }
1072  }
1073  y_min = y_bottom; //expand it
1074  }
1075  if (y_max < y_top) { //expansion allowed
1076  if (textord_show_expanded_rows && testing_on)
1077  tprintf("Expanding top of row at %f from %f to %f\n",
1078  row->intercept(), y_max, y_top);
1079  swallowed_row = TRUE;
1080  while (swallowed_row && !row_it.at_first ()) {
1081  swallowed_row = FALSE;
1082  //get one above
1083  test_row = row_it.data_relative (-1);
1084  if (test_row->min_y () < y_top) {
1085  if (test_row->max_y () < y_top) {
1086  if (textord_show_expanded_rows && testing_on)
1087  tprintf("Eating row above at %f\n", test_row->intercept());
1088  row_it.backward ();
1089  blob_it.set_to_list (row->blob_list ());
1090 #ifndef GRAPHICS_DISABLED
1091  if (textord_show_expanded_rows && testing_on)
1092  plot_parallel_row(test_row,
1093  gradient,
1094  block_edge,
1096  rotation);
1097 #endif
1098  blob_it.add_list_after (test_row->blob_list ());
1099  //swallow complete row
1100  delete row_it.extract ();
1101  row_it.forward ();
1102  swallowed_row = TRUE;
1103  }
1104  else if (test_row->min_y () < y_max) {
1105  //shorter limit
1106  y_top = test_row->min_y ();
1107  if (textord_show_expanded_rows && testing_on)
1108  tprintf("Truncating limit to %f due to touching row at %f\n",
1109  y_top, test_row->intercept());
1110  }
1111  else {
1112  y_top = y_max; //can't expand it
1113  if (textord_show_expanded_rows && testing_on)
1114  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1115  y_top, test_row->intercept());
1116  }
1117  }
1118  }
1119  y_max = y_top;
1120  }
1121  //new limits
1122  row->set_limits (y_min, y_max);
1123  row_it.backward ();
1124  }
1125  while (!row_it.at_last ());
1126 }
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2310
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
bool textord_new_initial_xheight
Definition: makerow.cpp:102
#define tprintf(...)
Definition: tprintf.h:31
static const double kDescenderFraction
Definition: ccstruct.h:33
unsigned char BOOL8
Definition: host.h:113
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
float intercept() const
Definition: blobbox.h:584
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
bool textord_show_expanded_rows
Definition: makerow.cpp:47
static const double kAscenderFraction
Definition: ccstruct.h:35
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
double textord_expansion_factor
Definition: makerow.cpp:80
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:1962
#define FALSE
Definition: capi.h:29
void compute_row_stats(TO_BLOCK *block, BOOL8 testing_on)
Definition: makerow.cpp:1170
#define TRUE
Definition: capi.h:28
static const double kXHeightFraction
Definition: ccstruct.h:34
#define NULL
Definition: host.h:144
float min_y() const
Definition: blobbox.h:557
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1134
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
float max_y() const
Definition: blobbox.h:554
float line_size
Definition: blobbox.h:781
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1437 of file makerow.cpp.

1438  {
1439  float xcentre; // centre of blob
1440  float top; // top y coord of blob
1441  float height; // height of blob
1442  BLOBNBOX *blob; // current blob
1443  int repeated_set;
1444  BLOBNBOX_IT blob_it = row->blob_list();
1445  if (blob_it.empty()) return; // no blobs in this row
1446  bool has_rep_chars =
1447  row->rep_chars_marked() && row->num_repeated_sets() > 0;
1448  do {
1449  blob = blob_it.data();
1450  if (!blob->joined_to_prev()) {
1451  xcentre = (blob->bounding_box().left() +
1452  blob->bounding_box().right()) / 2.0f;
1453  top = blob->bounding_box().top();
1454  height = blob->bounding_box().height();
1456  top -= row->baseline.y(xcentre);
1457  else
1458  top -= gradient * xcentre + row->parallel_c();
1459  if (top >= min_height && top <= max_height) {
1460  heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
1461  if (height / top < textord_min_blob_height_fraction) {
1462  floating_heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
1463  }
1464  }
1465  }
1466  // Skip repeated chars, since they are likely to skew the height stats.
1467  if (has_rep_chars && blob->repeated_set() != 0) {
1468  repeated_set = blob->repeated_set();
1469  blob_it.forward();
1470  while (!blob_it.at_first() &&
1471  blob_it.data()->repeated_set() == repeated_set) {
1472  blob_it.forward();
1474  tprintf("Skipping repeated char when computing xheight\n");
1475  }
1476  } else {
1477  blob_it.forward();
1478  }
1479  } while (!blob_it.at_first());
1480 }
int repeated_set() const
Definition: blobbox.h:247
bool joined_to_prev() const
Definition: blobbox.h:241
float parallel_c() const
Definition: blobbox.h:575
#define tprintf(...)
Definition: tprintf.h:31
int num_repeated_sets() const
Definition: blobbox.h:633
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
QSPLINE baseline
Definition: blobbox.h:666
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
bool textord_debug_xheights
Definition: makerow.cpp:57
bool textord_fix_xheight_bug
Definition: makerow.cpp:55
inT16 left() const
Definition: rect.h:68
bool rep_chars_marked() const
Definition: blobbox.h:627
double y(double x) const
Definition: quspline.cpp:217
inT16 height() const
Definition: rect.h:104
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
double textord_min_blob_height_fraction
Definition: makerow.cpp:89
BOOL8 find_best_dropout_row ( TO_ROW row,
inT32  distance,
float  dist_limit,
inT32  line_index,
TO_ROW_IT *  row_it,
BOOL8  testing_on 
)

Definition at line 666 of file makerow.cpp.

673  {
674  inT32 next_index; //of neigbouring row
675  inT32 row_offset; //from current row
676  inT32 abs_dist; //absolute distance
677  inT8 row_inc; //increment to row_index
678  TO_ROW *next_row; //nextious row
679 
680  if (testing_on)
681  tprintf ("Row at %g(%g), dropout dist=%d,",
682  row->intercept (), row->parallel_c (), distance);
683  if (distance < 0) {
684  row_inc = 1;
685  abs_dist = -distance;
686  }
687  else {
688  row_inc = -1;
689  abs_dist = distance;
690  }
691  if (abs_dist > dist_limit) {
692  if (testing_on) {
693  tprintf (" too far - deleting\n");
694  }
695  return TRUE;
696  }
697  if ((distance < 0 && !row_it->at_last ())
698  || (distance >= 0 && !row_it->at_first ())) {
699  row_offset = row_inc;
700  do {
701  next_row = row_it->data_relative (row_offset);
702  next_index = (inT32) floor (next_row->intercept ());
703  if ((distance < 0
704  && next_index < line_index
705  && next_index > line_index + distance + distance)
706  || (distance >= 0
707  && next_index > line_index
708  && next_index < line_index + distance + distance)) {
709  if (testing_on) {
710  tprintf (" nearer neighbour (%d) at %g\n",
711  line_index + distance - next_index,
712  next_row->intercept ());
713  }
714  return TRUE; //other is nearer
715  }
716  else if (next_index == line_index
717  || next_index == line_index + distance + distance) {
718  if (row->believability () <= next_row->believability ()) {
719  if (testing_on) {
720  tprintf (" equal but more believable at %g (%g/%g)\n",
721  next_row->intercept (),
722  row->believability (),
723  next_row->believability ());
724  }
725  return TRUE; //other is more believable
726  }
727  }
728  row_offset += row_inc;
729  }
730  while ((next_index == line_index
731  || next_index == line_index + distance + distance)
732  && row_offset < row_it->length ());
733  if (testing_on)
734  tprintf (" keeping\n");
735  }
736  return FALSE;
737 }
float parallel_c() const
Definition: blobbox.h:575
#define tprintf(...)
Definition: tprintf.h:31
float intercept() const
Definition: blobbox.h:584
float believability() const
Definition: blobbox.h:581
#define FALSE
Definition: capi.h:29
#define TRUE
Definition: capi.h:28
SIGNED char inT8
Definition: host.h:98
int inT32
Definition: host.h:102
void fit_lms_line ( TO_ROW row)

Definition at line 267 of file makerow.cpp.

267  {
268  float m, c; // fitted line
270  BLOBNBOX_IT blob_it = row->blob_list();
271 
272  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
273  const TBOX& box = blob_it.data()->bounding_box();
274  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
275  }
276  double error = lms.Fit(&m, &c);
277  row->set_line(m, c, error);
278 }
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:599
inT16 left() const
Definition: rect.h:68
integer coordinate
Definition: points.h:30
inT16 bottom() const
Definition: rect.h:61
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
Definition: rect.h:30
void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 2004 of file makerow.cpp.

2004  {
2005  float c; // fitted line
2006  int blobcount; // no of blobs
2008  BLOBNBOX_IT blob_it = row->blob_list();
2009 
2010  blobcount = 0;
2011  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
2012  if (!blob_it.data()->joined_to_prev()) {
2013  const TBOX& box = blob_it.data()->bounding_box();
2014  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
2015  blobcount++;
2016  }
2017  }
2018  double error = lms.ConstrainedFit(gradient, &c);
2019  row->set_parallel_line(gradient, c, error);
2021  error = lms.Fit(&gradient, &c);
2022  }
2023  //set the other too
2024  row->set_line(gradient, c, error);
2025 }
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:607
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:599
inT16 left() const
Definition: rect.h:68
int textord_lms_line_trials
Definition: makerow.cpp:101
integer coordinate
Definition: points.h:30
inT16 bottom() const
Definition: rect.h:61
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
Definition: rect.h:30
bool textord_straight_baselines
Definition: makerow.cpp:52
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:131
void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 1962 of file makerow.cpp.

1968  {
1969 #ifndef GRAPHICS_DISABLED
1970  ScrollView::Color colour; //of row
1971 #endif
1972  TO_ROW_IT row_it = block->get_rows ();
1973 
1974  row_it.move_to_first ();
1975  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1976  if (row_it.data ()->blob_list ()->empty ())
1977  delete row_it.extract (); //nothing in it
1978  else
1979  fit_parallel_lms (gradient, row_it.data ());
1980  }
1981 #ifndef GRAPHICS_DISABLED
1982  if (testing_on) {
1983  colour = ScrollView::RED;
1984  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1985  plot_parallel_row (row_it.data (), gradient,
1986  block_edge, colour, rotation);
1987  colour = (ScrollView::Color) (colour + 1);
1988  if (colour > ScrollView::MAGENTA)
1989  colour = ScrollView::RED;
1990  }
1991  }
1992 #endif
1993  row_it.sort (row_y_order); //may have gone out of order
1994 }
int row_y_order(const void *item1, const void *item2)
Definition: makerow.cpp:2627
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:2004
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2219 of file makerow.cpp.

2224  {
2225  int blobcount; //no of blobs
2226  int blobindex; //current blob
2227  int index1, index2; //blob numbers
2228  int blobs_per_segment; //blobs in each
2229  TBOX box; //blob box
2230  TBOX new_box; //new_it box
2231  //blobs
2232  BLOBNBOX_IT blob_it = row->blob_list ();
2233  BLOBNBOX_IT new_it = blob_it; //front end
2234  float b, c; //fitted curve
2236  double *coeffs; //quadratic coeffs
2237  inT32 segment; //current segment
2238 
2239  box = box_next_pre_chopped (&blob_it);
2240  xstarts[0] = box.left ();
2241  blobcount = 1;
2242  while (!blob_it.at_first ()) {
2243  blobcount++;
2244  box = box_next_pre_chopped (&blob_it);
2245  }
2246  segments = blobcount / textord_spline_medianwin;
2247  if (segments < 1)
2248  segments = 1;
2249  blobs_per_segment = blobcount / segments;
2250  coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
2251  if (textord_oldbl_debug)
2252  tprintf
2253  ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
2254  blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
2255  segment = 1;
2256  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
2257  box_next_pre_chopped(&new_it);
2258  index1 = 0;
2259  blobindex = index2;
2260  do {
2261  blobindex += blobs_per_segment;
2262  lms.Clear();
2263  while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2264  box = box_next_pre_chopped (&blob_it);
2265  int middle = (box.left() + box.right()) / 2;
2266  lms.Add(ICOORD(middle, box.bottom()));
2267  index1++;
2268  if (index1 == blobindex - blobs_per_segment / 2
2269  || index1 == blobcount - 1) {
2270  xstarts[segment] = box.left ();
2271  }
2272  }
2273  lms.Fit(&b, &c);
2274  coeffs[segment * 3 - 3] = 0;
2275  coeffs[segment * 3 - 2] = b;
2276  coeffs[segment * 3 - 1] = c;
2277  segment++;
2278  if (segment > segments)
2279  break;
2280 
2281  blobindex += blobs_per_segment;
2282  lms.Clear();
2283  while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2284  new_box = box_next_pre_chopped (&new_it);
2285  int middle = (new_box.left() + new_box.right()) / 2;
2286  lms.Add(ICOORD (middle, new_box.bottom()));
2287  index2++;
2288  if (index2 == blobindex - blobs_per_segment / 2
2289  || index2 == blobcount - 1) {
2290  xstarts[segment] = new_box.left ();
2291  }
2292  }
2293  lms.Fit(&b, &c);
2294  coeffs[segment * 3 - 3] = 0;
2295  coeffs[segment * 3 - 2] = b;
2296  coeffs[segment * 3 - 1] = c;
2297  segment++;
2298  }
2299  while (segment <= segments);
2300  return coeffs;
2301 }
int textord_spline_medianwin
Definition: makerow.cpp:66
#define tprintf(...)
Definition: tprintf.h:31
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:39
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 left() const
Definition: rect.h:68
integer coordinate
Definition: points.h:30
inT16 bottom() const
Definition: rect.h:61
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:658
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
Definition: rect.h:30
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
int inT32
Definition: host.h:102
void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2087 of file makerow.cpp.

2088  {
2089  inT32 *xstarts; // spline boundaries
2090  double *coeffs; // quadratic coeffs
2091  inT32 segments; // no of segments
2092 
2093  xstarts =
2094  (inT32 *) alloc_mem((row->blob_list()->length() + 1) * sizeof(inT32));
2095  if (segment_baseline(row, block, segments, xstarts)
2097  coeffs = linear_spline_baseline(row, block, segments, xstarts);
2098  } else {
2099  xstarts[1] = xstarts[segments];
2100  segments = 1;
2101  coeffs = (double *) alloc_mem (3 * sizeof (double));
2102  coeffs[0] = 0;
2103  coeffs[1] = row->line_m ();
2104  coeffs[2] = row->line_c ();
2105  }
2106  row->baseline = QSPLINE (segments, xstarts, coeffs);
2107  free_mem(coeffs);
2108  free_mem(xstarts);
2109 }
void free_mem(void *oldchunk)
Definition: memry.cpp:55
float line_c() const
Definition: blobbox.h:569
QSPLINE baseline
Definition: blobbox.h:666
bool textord_parallel_baselines
Definition: makerow.cpp:51
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
float line_m() const
Definition: blobbox.h:566
BOOL8 segment_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2120
bool textord_straight_baselines
Definition: makerow.cpp:52
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2219
int inT32
Definition: host.h:102
void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 227 of file makerow.cpp.

232  {
233  TO_ROW_IT row_it = block->get_rows ();
234 
235 #ifndef GRAPHICS_DISABLED
236  ScrollView::Color colour; //of row
237 
238  if (textord_show_initial_rows && testing_on) {
239  if (to_win == NULL)
240  create_to_win(page_tr);
241  }
242 #endif
243  //guess skew
244  assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
245  row_it.move_to_first ();
246  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
247  fit_lms_line (row_it.data ());
248 #ifndef GRAPHICS_DISABLED
249  if (textord_show_initial_rows && testing_on) {
250  colour = ScrollView::RED;
251  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
252  plot_to_row (row_it.data (), colour, rotation);
253  colour = (ScrollView::Color) (colour + 1);
254  if (colour > ScrollView::MAGENTA)
255  colour = ScrollView::RED;
256  }
257  }
258 #endif
259 }
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:91
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2310
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
bool textord_show_initial_rows
Definition: makerow.cpp:45
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
#define TRUE
Definition: capi.h:28
#define NULL
Definition: host.h:144
float make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 201 of file makerow.cpp.

201  {
202  float port_m; // global skew
203  float port_err; // global noise
204  TO_BLOCK_IT block_it; // iterator
205 
206  block_it.set_to_list(port_blocks);
207  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
208  block_it.forward())
209  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
211  // compute globally
212  compute_page_skew(port_blocks, port_m, port_err);
213  block_it.set_to_list(port_blocks);
214  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
215  cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
216  block_it.data()->block->bounding_box().left(),
218  }
219  return port_m; // global skew
220 }
unsigned char BOOL8
Definition: host.h:113
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:227
bool textord_test_landscape
Definition: makerow.cpp:50
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:525
Definition: points.h:189
float make_single_row ( ICOORD  page_tr,
bool  allow_sub_blobs,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 164 of file makerow.cpp.

165  {
166  BLOBNBOX_IT blob_it = &block->blobs;
167  TO_ROW_IT row_it = block->get_rows();
168 
169  // Include all the small blobs and large blobs.
170  blob_it.add_list_after(&block->small_blobs);
171  blob_it.add_list_after(&block->noise_blobs);
172  blob_it.add_list_after(&block->large_blobs);
173  if (block->blobs.singleton() && allow_sub_blobs) {
174  blob_it.move_to_first();
175  float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
176  if (size > block->line_size)
177  block->line_size = size;
178  } else if (block->blobs.empty()) {
179  // Make a fake blob.
180  C_BLOB* blob = C_BLOB::FakeBlob(block->block->bounding_box());
181  // The blobnbox owns the blob.
182  BLOBNBOX* bblob = new BLOBNBOX(blob);
183  blob_it.add_after_then_move(bblob);
184  }
185  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
186  // Fit an LMS line to the rows.
187  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
188  fit_lms_line(row_it.data());
189  float gradient;
190  float fit_error;
191  // Compute the skew based on the fitted line.
192  compute_page_skew(blocks, gradient, fit_error);
193  return gradient;
194 }
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
float MakeRowFromSubBlobs(TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
Definition: makerow.cpp:137
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
BLOCK * block
Definition: blobbox.h:773
float line_size
Definition: blobbox.h:781
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
float MakeRowFromSubBlobs ( TO_BLOCK block,
C_BLOB blob,
TO_ROW_IT *  row_it 
)

Definition at line 137 of file makerow.cpp.

137  {
138  // The blobs made from the children will go in the small_blobs list.
139  BLOBNBOX_IT bb_it(&block->small_blobs);
140  C_OUTLINE_IT ol_it(blob->out_list());
141  // Get the children.
142  ol_it.set_to_list(ol_it.data()->child());
143  if (ol_it.empty())
144  return 0.0f;
145  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
146  // Deep copy the child outline and use that to make a blob.
147  C_BLOB* blob = new C_BLOB(C_OUTLINE::deep_copy(ol_it.data()));
148  // Correct direction as needed.
150  BLOBNBOX* bbox = new BLOBNBOX(blob);
151  bb_it.add_after_then_move(bbox);
152  }
153  // Now we can make a row from the blobs.
154  return MakeRowFromBlobs(block->line_size, &bb_it, row_it);
155 }
void CheckInverseFlagAndDirection()
Definition: stepblob.cpp:221
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
float line_size
Definition: blobbox.h:781
static C_OUTLINE * deep_copy(const C_OUTLINE *src)
Definition: coutln.h:259
void mark_repeated_chars ( TO_ROW row)

Definition at line 2671 of file makerow.cpp.

2671  {
2672  BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
2673  int num_repeated_sets = 0;
2674  if (!box_it.empty()) {
2675  do {
2676  BLOBNBOX* bblob = box_it.data();
2677  int repeat_length = 1;
2678  if (bblob->flow() == BTFT_LEADER &&
2679  !bblob->joined_to_prev() && bblob->cblob() != NULL) {
2680  BLOBNBOX_IT test_it(box_it);
2681  for (test_it.forward(); !test_it.at_first();) {
2682  bblob = test_it.data();
2683  if (bblob->flow() != BTFT_LEADER)
2684  break;
2685  test_it.forward();
2686  bblob = test_it.data();
2687  if (bblob->joined_to_prev() || bblob->cblob() == NULL) {
2688  repeat_length = 0;
2689  break;
2690  }
2691  ++repeat_length;
2692  }
2693  }
2694  if (repeat_length >= kMinLeaderCount) {
2695  num_repeated_sets++;
2696  for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2697  bblob = box_it.data();
2698  bblob->set_repeated_set(num_repeated_sets);
2699  }
2700  } else {
2701  bblob->set_repeated_set(0);
2702  box_it.forward();
2703  }
2704  } while (!box_it.at_first()); // until all done
2705  }
2706  row->set_num_repeated_sets(num_repeated_sets);
2707 }
const int kMinLeaderCount
Definition: makerow.cpp:107
bool joined_to_prev() const
Definition: blobbox.h:241
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void set_num_repeated_sets(int num_sets)
Definition: blobbox.h:636
void set_repeated_set(int set_id)
Definition: blobbox.h:250
C_BLOB * cblob() const
Definition: blobbox.h:253
#define NULL
Definition: host.h:144
BlobTextFlowType flow() const
Definition: blobbox.h:280
OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
BOOL8  testing_blob 
)

Definition at line 2510 of file makerow.cpp.

2517  {
2518  OVERLAP_STATE result; //result of tests
2519  float overlap; //of blob & row
2520  float bestover; //nearest row
2521  float merge_top, merge_bottom; //size of merged row
2522  ICOORD testpt; //testing only
2523  TO_ROW *row; //current row
2524  TO_ROW *test_row; //for multiple overlaps
2525  BLOBNBOX_IT blob_it; //for merging rows
2526 
2527  result = ASSIGN;
2528  row = row_it->data ();
2529  bestover = top - bottom;
2530  if (top > row->max_y ())
2531  bestover -= top - row->max_y ();
2532  if (bottom < row->min_y ())
2533  //compute overlap
2534  bestover -= row->min_y () - bottom;
2535  if (testing_blob && textord_debug_blob) {
2536  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
2537  bottom, top, row->min_y(), row->max_y(), rowsize, bestover);
2538  }
2539  test_row = row;
2540  do {
2541  if (!row_it->at_last ()) {
2542  row_it->forward ();
2543  test_row = row_it->data ();
2544  if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
2545  merge_top =
2546  test_row->max_y () >
2547  row->max_y ()? test_row->max_y () : row->max_y ();
2548  merge_bottom =
2549  test_row->min_y () <
2550  row->min_y ()? test_row->min_y () : row->min_y ();
2551  if (merge_top - merge_bottom <= rowsize) {
2552  if (testing_blob) {
2553  tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
2554  row->min_y (), row->max_y (),
2555  test_row->min_y (), test_row->max_y ());
2556  }
2557  test_row->set_limits (merge_bottom, merge_top);
2558  blob_it.set_to_list (test_row->blob_list ());
2559  blob_it.add_list_after (row->blob_list ());
2560  blob_it.sort (blob_x_order);
2561  row_it->backward ();
2562  delete row_it->extract ();
2563  row_it->forward ();
2564  bestover = -1.0f; //force replacement
2565  }
2566  overlap = top - bottom;
2567  if (top > test_row->max_y ())
2568  overlap -= top - test_row->max_y ();
2569  if (bottom < test_row->min_y ())
2570  overlap -= test_row->min_y () - bottom;
2571  if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2572  result = REJECT;
2573  }
2574  if (overlap > bestover) {
2575  bestover = overlap; //find biggest overlap
2576  row = test_row;
2577  }
2578  if (testing_blob && textord_debug_blob) {
2579  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
2580  bottom, top, test_row->min_y(), test_row->max_y(),
2581  rowsize, overlap, bestover);
2582  }
2583  }
2584  }
2585  }
2586  while (!row_it->at_last ()
2587  && test_row->min_y () <= top && test_row->max_y () >= bottom);
2588  while (row_it->data () != row)
2589  row_it->backward (); //make it point to row
2590  //doesn't overlap much
2591  if (top - bottom - bestover > rowsize * textord_overlap_x &&
2592  (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
2593  && result == ASSIGN)
2594  result = NEW_ROW; //doesn't overlap enough
2595  best_row = row;
2596  return result;
2597 }
double textord_overlap_x
Definition: makerow.cpp:81
#define tprintf(...)
Definition: tprintf.h:31
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2605
bool textord_fix_makerow_bug
Definition: makerow.cpp:56
Definition: makerow.h:31
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
Definition: makerow.h:32
OVERLAP_STATE
Definition: makerow.h:29
integer coordinate
Definition: points.h:30
bool textord_debug_blob
Definition: makerow.cpp:103
float min_y() const
Definition: blobbox.h:557
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
float max_y() const
Definition: blobbox.h:554
void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1876 of file makerow.cpp.

1881  {
1882 #ifndef GRAPHICS_DISABLED
1883  ScrollView::Color colour; //of boxes
1884 #endif
1885  BLOBNBOX *blob; //current blob
1886  BLOBNBOX *nextblob; //next in list
1887  TBOX blob_box;
1888  FCOORD blob_rotation; //inverse of rotation
1889  BLOBNBOX_IT blob_it; //iterator
1890  BLOBNBOX_IT start_it; //iterator
1891  TO_ROW_IT row_it = block->get_rows ();
1892 
1893 #ifndef GRAPHICS_DISABLED
1894  colour = ScrollView::RED;
1895 #endif
1896 
1897  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
1898  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1899  //get blobs
1900  blob_it.set_to_list (row_it.data ()->blob_list ());
1901  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1902  blob_it.forward ()) {
1903  blob = blob_it.data ();
1904  blob_box = blob->bounding_box ();
1905  start_it = blob_it; //save start point
1906  // if (testing_on && textord_show_final_blobs)
1907  // {
1908  // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
1909  // blob_box.left(),blob_box.bottom(),
1910  // blob_box.right(),blob_box.top(),
1911  // (void*)blob,blob_it.length());
1912  // }
1913  bool overlap;
1914  do {
1915  overlap = false;
1916  if (!blob_it.at_last ()) {
1917  nextblob = blob_it.data_relative(1);
1918  overlap = blob_box.major_x_overlap(nextblob->bounding_box());
1919  if (overlap) {
1920  blob->merge(nextblob); // merge new blob
1921  blob_box = blob->bounding_box(); // get bigger box
1922  blob_it.forward();
1923  }
1924  }
1925  }
1926  while (overlap);
1927  blob->chop (&start_it, &blob_it,
1928  blob_rotation,
1931  //attempt chop
1932  }
1933 #ifndef GRAPHICS_DISABLED
1934  if (testing_on && textord_show_final_blobs) {
1935  if (to_win == NULL)
1936  create_to_win(page_tr);
1937  to_win->Pen(colour);
1938  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1939  blob_it.forward ()) {
1940  blob = blob_it.data ();
1941  blob_box = blob->bounding_box ();
1942  blob_box.rotate (rotation);
1943  if (!blob->joined_to_prev ()) {
1944  to_win->Rectangle (blob_box.left (), blob_box.bottom (),
1945  blob_box.right (), blob_box.top ());
1946  }
1947  }
1948  colour = (ScrollView::Color) (colour + 1);
1949  if (colour > ScrollView::MAGENTA)
1950  colour = ScrollView::RED;
1951  }
1952 #endif
1953  }
1954 }
void Pen(Color color)
Definition: scrollview.cpp:726
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
bool textord_show_final_blobs
Definition: makerow.cpp:49
float x() const
Definition: points.h:209
bool joined_to_prev() const
Definition: blobbox.h:241
double textord_chop_width
Definition: makerow.cpp:78
inT16 right() const
Definition: rect.h:75
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
inT16 left() const
Definition: rect.h:68
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
Definition: blobbox.cpp:113
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
inT16 bottom() const
Definition: rect.h:61
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: rect.h:30
float y() const
Definition: points.h:212
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
static const double kXHeightFraction
Definition: ccstruct.h:34
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
Definition: points.h:189
void merge(BLOBNBOX *nextblob)
Definition: blobbox.cpp:85
float line_size
Definition: blobbox.h:781
void rotate(const FCOORD &vec)
Definition: rect.h:189
int row_spacing_order ( const void *  item1,
const void *  item2 
)

Definition at line 2649 of file makerow.cpp.

2651  {
2652  //converted ptr
2653  TO_ROW *row1 = *(TO_ROW **) item1;
2654  //converted ptr
2655  TO_ROW *row2 = *(TO_ROW **) item2;
2656 
2657  if (row1->spacing < row2->spacing)
2658  return -1;
2659  else if (row1->spacing > row2->spacing)
2660  return 1;
2661  else
2662  return 0;
2663 }
float spacing
Definition: blobbox.h:652
int row_y_order ( const void *  item1,
const void *  item2 
)

Definition at line 2627 of file makerow.cpp.

2629  {
2630  //converted ptr
2631  TO_ROW *row1 = *(TO_ROW **) item1;
2632  //converted ptr
2633  TO_ROW *row2 = *(TO_ROW **) item2;
2634 
2635  if (row1->parallel_c () > row2->parallel_c ())
2636  return -1;
2637  else if (row1->parallel_c () < row2->parallel_c ())
2638  return 1;
2639  else
2640  return 0;
2641 }
float parallel_c() const
Definition: blobbox.h:575
BOOL8 segment_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2120 of file makerow.cpp.

2125  {
2126  BOOL8 needs_curve; //needs curved line
2127  int blobcount; //no of blobs
2128  int blobindex; //current blob
2129  int last_state; //above, on , below
2130  int state; //of current blob
2131  float yshift; //from baseline
2132  TBOX box; //blob box
2133  TBOX new_box; //new_it box
2134  float middle; //xcentre of blob
2135  //blobs
2136  BLOBNBOX_IT blob_it = row->blob_list ();
2137  BLOBNBOX_IT new_it = blob_it; //front end
2138  SORTED_FLOATS yshifts; //shifts from baseline
2139 
2140  needs_curve = FALSE;
2141  box = box_next_pre_chopped (&blob_it);
2142  xstarts[0] = box.left ();
2143  segments = 1;
2144  blobcount = row->blob_list ()->length ();
2145  if (textord_oldbl_debug)
2146  tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
2147  blobcount, box.left (), box.bottom ());
2148  if (blobcount <= textord_spline_medianwin
2149  || blobcount < textord_spline_minblobs) {
2150  blob_it.move_to_last ();
2151  box = blob_it.data ()->bounding_box ();
2152  xstarts[1] = box.right ();
2153  return FALSE;
2154  }
2155  last_state = 0;
2156  new_it.mark_cycle_pt ();
2157  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
2158  new_box = box_next_pre_chopped (&new_it);
2159  middle = (new_box.left () + new_box.right ()) / 2.0;
2160  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2161  //record shift
2162  yshifts.add (yshift, blobindex);
2163  if (new_it.cycled_list ()) {
2164  xstarts[1] = new_box.right ();
2165  return FALSE;
2166  }
2167  }
2168  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
2169  box = box_next_pre_chopped (&blob_it);
2170  do {
2171  new_box = box_next_pre_chopped (&new_it);
2172  //get middle one
2173  yshift = yshifts[textord_spline_medianwin / 2];
2174  if (yshift > textord_spline_shift_fraction * block->line_size)
2175  state = 1;
2176  else if (-yshift > textord_spline_shift_fraction * block->line_size)
2177  state = -1;
2178  else
2179  state = 0;
2180  if (state != 0)
2181  needs_curve = TRUE;
2182  // tprintf("State=%d, prev=%d, shift=%g\n",
2183  // state,last_state,yshift);
2184  if (state != last_state && blobcount > textord_spline_minblobs) {
2185  xstarts[segments++] = box.left ();
2186  blobcount = 0;
2187  }
2188  last_state = state;
2189  yshifts.remove (blobindex - textord_spline_medianwin);
2190  box = box_next_pre_chopped (&blob_it);
2191  middle = (new_box.left () + new_box.right ()) / 2.0;
2192  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2193  yshifts.add (yshift, blobindex);
2194  blobindex++;
2195  blobcount++;
2196  }
2197  while (!new_it.cycled_list ());
2198  if (blobcount > textord_spline_minblobs || segments == 1) {
2199  xstarts[segments] = new_box.right ();
2200  }
2201  else {
2202  xstarts[--segments] = new_box.right ();
2203  }
2204  if (textord_oldbl_debug)
2205  tprintf ("Made %d segments on row at (%d,%d)\n",
2206  segments, box.right (), box.bottom ());
2207  return needs_curve;
2208 }
int textord_spline_medianwin
Definition: makerow.cpp:66
#define tprintf(...)
Definition: tprintf.h:31
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:39
float line_c() const
Definition: blobbox.h:569
unsigned char BOOL8
Definition: host.h:113
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
double textord_spline_shift_fraction
Definition: makerow.cpp:71
float line_m() const
Definition: blobbox.h:566
inT16 left() const
Definition: rect.h:68
void add(float value, inT32 key)
Definition: sortflts.cpp:28
inT16 bottom() const
Definition: rect.h:61
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:658
#define FALSE
Definition: capi.h:29
int textord_spline_minblobs
Definition: makerow.cpp:65
Definition: rect.h:30
#define TRUE
Definition: capi.h:28
void remove(inT32 key)
Definition: sortflts.cpp:53
float line_size
Definition: blobbox.h:781
void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1803 of file makerow.cpp.

1806  { // correct orientation
1807  BLOBNBOX *blob; // current blob
1808  C_BLOB *rotated_blob; // rotated blob
1809  TO_ROW *row; // current row
1810  float length; // of g_vec
1811  TBOX blob_box;
1812  FCOORD blob_rotation; // inverse of rotation
1813  FCOORD g_vec; // skew rotation
1814  BLOBNBOX_IT blob_it; // iterator
1815  // iterator
1816  BLOBNBOX_IT under_it = &block->underlines;
1817  BLOBNBOX_IT large_it = &block->large_blobs;
1818  TO_ROW_IT row_it = block->get_rows();
1819  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
1820  block->line_size + 0.5);
1821 
1822  // length of vector
1823  length = sqrt(1 + gradient * gradient);
1824  g_vec = FCOORD(1 / length, -gradient / length);
1825  blob_rotation = FCOORD(rotation.x(), -rotation.y());
1826  blob_rotation.rotate(g_vec); // undoing everything
1827  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1828  row = row_it.data();
1829  // get blobs
1830  blob_it.set_to_list(row->blob_list());
1831  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
1832  blob_it.forward()) {
1833  blob = blob_it.data();
1834  blob_box = blob->bounding_box();
1835  if (blob_box.width() > block->line_size * textord_underline_width) {
1836  ASSERT_HOST(blob->cblob() != NULL);
1837  rotated_blob = crotate_cblob (blob->cblob(),
1838  blob_rotation);
1839  if (test_underline(
1840  testing_on && textord_show_final_rows,
1841  rotated_blob, static_cast<inT16>(row->intercept()),
1842  static_cast<inT16>(
1843  block->line_size *
1846  under_it.add_after_then_move(blob_it.extract());
1847  if (testing_on && textord_show_final_rows) {
1848  tprintf("Underlined blob at:");
1849  rotated_blob->bounding_box().print();
1850  tprintf("Was:");
1851  blob_box.print();
1852  }
1853  } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
1854  row->blob_list()) >
1856  large_it.add_after_then_move(blob_it.extract());
1857  if (testing_on && textord_show_final_rows) {
1858  tprintf("Large blob overlaps %d blobs at:",
1859  CountOverlaps(blob_box, min_blob_height,
1860  row->blob_list()));
1861  blob_box.print();
1862  }
1863  }
1864  delete rotated_blob;
1865  }
1866  }
1867  }
1868 }
float x() const
Definition: points.h:209
#define tprintf(...)
Definition: tprintf.h:31
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
Definition: blobbox.cpp:604
BLOBNBOX_LIST underlines
Definition: blobbox.h:769
void rotate(const FCOORD vec)
Definition: ipoints.h:471
void print() const
Definition: rect.h:270
int textord_max_blob_overlaps
Definition: makerow.cpp:68
float intercept() const
Definition: blobbox.h:584
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
#define ASSERT_HOST(x)
Definition: errcode.h:84
static const double kAscenderFraction
Definition: ccstruct.h:35
C_BLOB * cblob() const
Definition: blobbox.h:253
double textord_underline_width
Definition: makerow.cpp:87
bool textord_show_final_rows
Definition: makerow.cpp:48
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
inT16 width() const
Definition: rect.h:111
Definition: rect.h:30
float y() const
Definition: points.h:212
static const double kXHeightFraction
Definition: ccstruct.h:34
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
Definition: points.h:189
BOOL8 test_underline(BOOL8 testing_on, C_BLOB *blob, inT16 baseline, inT16 xheight)
Definition: blkocc.cpp:53
float line_size
Definition: blobbox.h:781
double textord_min_blob_height_fraction
Definition: makerow.cpp:89
void vigorous_noise_removal ( TO_BLOCK block)

Definition at line 473 of file makerow.cpp.

473  {
474  TO_ROW_IT row_it = block->get_rows ();
475  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
476  TO_ROW* row = row_it.data();
477  BLOBNBOX_IT b_it = row->blob_list();
478  // Estimate the xheight on the row.
479  int max_height = 0;
480  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
481  BLOBNBOX* blob = b_it.data();
482  if (blob->bounding_box().height() > max_height)
483  max_height = blob->bounding_box().height();
484  }
485  STATS hstats(0, max_height + 1);
486  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
487  BLOBNBOX* blob = b_it.data();
488  int height = blob->bounding_box().height();
489  if (height >= kMinSize)
490  hstats.add(blob->bounding_box().height(), 1);
491  }
492  float xheight = hstats.median();
493  // Delete small objects.
494  BLOBNBOX* prev = NULL;
495  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
496  BLOBNBOX* blob = b_it.data();
497  const TBOX& box = blob->bounding_box();
498  if (box.height() < kNoiseSize * xheight) {
499  // Small so delete unless it looks like an i dot.
500  if (prev != NULL) {
501  if (dot_of_i(blob, prev, row))
502  continue; // Looks OK.
503  }
504  if (!b_it.at_last()) {
505  BLOBNBOX* next = b_it.data_relative(1);
506  if (dot_of_i(blob, next, row))
507  continue; // Looks OK.
508  }
509  // It might be noise so get rid of it.
510  if (blob->cblob() != NULL)
511  delete blob->cblob();
512  delete b_it.extract();
513  } else {
514  prev = blob;
515  }
516  }
517  }
518 }
Definition: statistc.h:33
const int kMinSize
Definition: makerow.cpp:384
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
C_BLOB * cblob() const
Definition: blobbox.h:253
inT16 height() const
Definition: rect.h:104
const double kNoiseSize
Definition: makerow.cpp:383
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: rect.h:30
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215

Variable Documentation

const int kMinLeaderCount = 5

Definition at line 107 of file makerow.cpp.

const int kMinSize = 8

Definition at line 384 of file makerow.cpp.

const double kNoiseSize = 0.5

Definition at line 383 of file makerow.cpp.

double textord_ascheight_mode_fraction = 0.08

"Min pile height to make ascheight"

Definition at line 93 of file makerow.cpp.

double textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 97 of file makerow.cpp.

double textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 96 of file makerow.cpp.

bool textord_biased_skewcalc = TRUE

"Bias skew estimates with line length"

Definition at line 58 of file makerow.cpp.

double textord_chop_width = 1.5

"Max width before chopping"

Definition at line 78 of file makerow.cpp.

bool textord_debug_blob = FALSE

"Print test blob information"

Definition at line 103 of file makerow.cpp.

bool textord_debug_xheights = FALSE

"Test xheight algorithms"

Definition at line 57 of file makerow.cpp.

double textord_descheight_mode_fraction = 0.08

"Min pile height to make descheight"

Definition at line 95 of file makerow.cpp.

double textord_descx_ratio_max = 0.6

"Max desc/xheight"

Definition at line 99 of file makerow.cpp.

double textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 98 of file makerow.cpp.

double textord_excess_blobsize = 1.3

"New row made if blob makes row this big"

Definition at line 85 of file makerow.cpp.

double textord_expansion_factor = 1.0

"Factor to expand rows by in expand_rows"

Definition at line 80 of file makerow.cpp.

bool textord_fix_makerow_bug = TRUE

"Prevent multiple baselines"

Definition at line 56 of file makerow.cpp.

bool textord_fix_xheight_bug = TRUE

"Use spline baseline"

Definition at line 55 of file makerow.cpp.

bool textord_heavy_nr = FALSE

"Vigorously remove noise"

Definition at line 44 of file makerow.cpp.

bool textord_interpolating_skew = TRUE

"Interpolate across gaps"

Definition at line 59 of file makerow.cpp.

double textord_linespace_iqrlimit = 0.2

"Max iqr/median for linespace"

Definition at line 76 of file makerow.cpp.

int textord_lms_line_trials = 12

"Number of linew fits to do"

Definition at line 101 of file makerow.cpp.

int textord_max_blob_overlaps = 4

"Max number of blobs a big blob can overlap"

Definition at line 68 of file makerow.cpp.

double textord_min_blob_height_fraction = 0.75

"Min blob height/top to include blob top into xheight stats"

Definition at line 89 of file makerow.cpp.

int textord_min_blobs_in_row = 4

"Min blobs before gradient counted"

Definition at line 64 of file makerow.cpp.

double textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 83 of file makerow.cpp.

int textord_min_xheight = 10

"Min credible pixel xheight"

Definition at line 69 of file makerow.cpp.

double textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 82 of file makerow.cpp.

bool textord_new_initial_xheight = TRUE

"Use test xheight mechanism"

Definition at line 102 of file makerow.cpp.

double textord_occupancy_threshold = 0.4

"Fraction of neighbourhood"

Definition at line 86 of file makerow.cpp.

bool textord_old_baselines = TRUE

"Use old baseline algorithm"

Definition at line 53 of file makerow.cpp.

bool textord_old_xheight = FALSE

"Use old xheight algorithm"

Definition at line 54 of file makerow.cpp.

double textord_overlap_x = 0.375

"Fraction of linespace for good overlap"

Definition at line 81 of file makerow.cpp.

bool textord_parallel_baselines = TRUE

"Force parallel baselines"

Definition at line 51 of file makerow.cpp.

bool textord_show_expanded_rows = FALSE

"Display rows after expanding"

Definition at line 47 of file makerow.cpp.

bool textord_show_final_blobs = FALSE

"Display blob bounds after pre-ass"

Definition at line 49 of file makerow.cpp.

bool textord_show_final_rows = FALSE

"Display rows after final fitting"

Definition at line 48 of file makerow.cpp.

bool textord_show_initial_rows = FALSE

"Display row accumulation"

Definition at line 45 of file makerow.cpp.

bool textord_show_parallel_rows = FALSE

"Display page correlated rows"

Definition at line 46 of file makerow.cpp.

double textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 74 of file makerow.cpp.

double textord_skew_lag = 0.02

"Lag for skew on row accumulation"

Definition at line 75 of file makerow.cpp.

int textord_skewsmooth_offset = 4

"For smooth factor"

Definition at line 60 of file makerow.cpp.

int textord_skewsmooth_offset2 = 1

"For smooth factor"

Definition at line 61 of file makerow.cpp.

int textord_spline_medianwin = 6

"Size of window for spline segmentation"

Definition at line 66 of file makerow.cpp.

int textord_spline_minblobs = 8

"Min blobs in each spline segment"

Definition at line 65 of file makerow.cpp.

double textord_spline_outlier_fraction = 0.1

"Fraction of line spacing for outlier"

Definition at line 73 of file makerow.cpp.

double textord_spline_shift_fraction = 0.02

"Fraction of line spacing for quad"

Definition at line 71 of file makerow.cpp.

bool textord_straight_baselines = FALSE

"Force straight baselines"

Definition at line 52 of file makerow.cpp.

bool textord_test_landscape = FALSE

"Tests refer to land/port"

Definition at line 50 of file makerow.cpp.

int textord_test_x = -MAX_INT32

"coord of test pt"

Definition at line 62 of file makerow.cpp.

int textord_test_y = -MAX_INT32

"coord of test pt"

Definition at line 63 of file makerow.cpp.

double textord_underline_width = 2.0

"Multiple of line_size for underline"

Definition at line 87 of file makerow.cpp.

double textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 77 of file makerow.cpp.

double textord_xheight_error_margin = 0.1

"Accepted variation"

Definition at line 100 of file makerow.cpp.

double textord_xheight_mode_fraction = 0.4

"Min pile height to make xheight"

Definition at line 91 of file makerow.cpp.