All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
makerow.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: makerow.h (Formerly makerows.h)
3  * Description: Code to arrange blobs into rows of text.
4  * Author: Ray Smith
5  * Created: Mon Sep 21 14:34:48 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef MAKEROW_H
21 #define MAKEROW_H
22 
23 #include "params.h"
24 #include "ocrblock.h"
25 #include "blobs.h"
26 #include "blobbox.h"
27 #include "statistc.h"
28 
30 {
31  ASSIGN, //assign it to row
32  REJECT, //reject it - dual overlap
34 };
35 
41 };
42 
43 extern BOOL_VAR_H(textord_heavy_nr, FALSE, "Vigorously remove noise");
45 "Display row accumulation");
47 "Display page correlated rows");
49 "Display rows after expanding");
51 "Display rows after final fitting");
53 "Display blob bounds after pre-ass");
54 extern BOOL_VAR_H (textord_test_landscape, FALSE, "Tests refer to land/port");
56 "Force parallel baselines");
58 "Force straight baselines");
60 "Use quadratic splines");
61 extern BOOL_VAR_H (textord_old_baselines, TRUE, "Use old baseline algorithm");
62 extern BOOL_VAR_H (textord_old_xheight, TRUE, "Use old xheight algorithm");
63 extern BOOL_VAR_H (textord_fix_xheight_bug, TRUE, "Use spline baseline");
65 "Prevent multiple baselines");
67 "Use new projection for underlines");
68 extern BOOL_VAR_H (textord_debug_xheights, FALSE, "Test xheight algorithms");
69 extern INT_VAR_H (textord_test_x, -MAX_INT32, "coord of test pt");
70 extern INT_VAR_H (textord_test_y, -MAX_INT32, "coord of test pt");
72 "Min blobs before gradient counted");
74 "Min blobs in each spline segment");
76 "Size of window for spline segmentation");
77 extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight");
79 "Fraction of line spacing for quad");
81 "Fraction of line spacing for outlier");
82 extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew");
83 extern double_VAR_H (textord_skew_lag, 0.75,
84 "Lag for skew on row accumulation");
86 "Max iqr/median for linespace");
88 "Max width of blobs to make rows");
89 extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping");
90 extern double_VAR_H (textord_minxh, 0.25,
91 "fraction of linesize for min xheight");
93 "* blob height for initial linesize");
95 "New row made if blob makes row this big");
97 "Fraction of neighbourhood");
99 "Multiple of line_size for underline");
101 "Min blob height/top to include blob top into xheight stats");
103 "Min pile height to make xheight");
105 "Min pile height to make ascheight");
106 extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
107 extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
108 extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight");
109 extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight");
110 extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation");
111 extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do");
113 "Use test xheight mechanism");
114 extern BOOL_VAR_H(textord_debug_blob, FALSE, "Print test blob information");
115 
116 inline void get_min_max_xheight(int block_linesize,
117  int *min_height, int *max_height) {
118  *min_height = static_cast<inT32>(floor(block_linesize * textord_minxh));
119  if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
120  *max_height = static_cast<inT32>(ceil(block_linesize * 3.0));
121 }
122 
124  if (row->xheight <= 0) return ROW_INVALID;
125  return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
126  (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
127 }
128 
129 inline bool within_error_margin(float test, float num, float margin) {
130  return (test >= num * (1 - margin) && test <= num * (1 + margin));
131 }
132 
133 void fill_heights(TO_ROW *row, float gradient, int min_height,
134  int max_height, STATS *heights, STATS *floating_heights);
135 
136 float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block,
137  TO_BLOCK_LIST* blocks);
138 float make_rows(ICOORD page_tr, // top right
139  TO_BLOCK_LIST *port_blocks);
140 void make_initial_textrows(ICOORD page_tr,
141  TO_BLOCK *block, // block to do
142  FCOORD rotation, // for drawing
143  BOOL8 testing_on); // correct orientation
144 void fit_lms_line(TO_ROW *row);
145 void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks
146  float &page_m, // average gradient
147  float &page_err); // average error
148 void vigorous_noise_removal(TO_BLOCK* block);
149 void cleanup_rows_making(ICOORD page_tr, // top right
150  TO_BLOCK *block, // block to do
151  float gradient, // gradient to fit
152  FCOORD rotation, // for drawing
153  inT32 block_edge, // edge of block
154  BOOL8 testing_on); // correct orientation
155 void delete_non_dropout_rows( //find lines
156  TO_BLOCK *block, //block to do
157  float gradient, //global skew
158  FCOORD rotation, //deskew vector
159  inT32 block_edge, //left edge
160  BOOL8 testing_on //correct orientation
161  );
162 BOOL8 find_best_dropout_row( //find neighbours
163  TO_ROW *row, //row to test
164  inT32 distance, //dropout dist
165  float dist_limit, //threshold distance
166  inT32 line_index, //index of row
167  TO_ROW_IT *row_it, //current position
168  BOOL8 testing_on //correct orientation
169  );
170 TBOX deskew_block_coords( //block box
171  TO_BLOCK *block, //block to do
172  float gradient //global skew
173  );
174 void compute_line_occupation( //project blobs
175  TO_BLOCK *block, //block to do
176  float gradient, //global skew
177  inT32 min_y, //min coord in block
178  inT32 max_y, //in block
179  inT32 *occupation, //output projection
180  inT32 *deltas //derivative
181  );
182 void compute_occupation_threshold( //project blobs
183  inT32 low_window, //below result point
184  inT32 high_window, //above result point
185  inT32 line_count, //array sizes
186  inT32 *occupation, //input projection
187  inT32 *thresholds //output thresholds
188  );
189 void compute_dropout_distances( //project blobs
190  inT32 *occupation, //input projection
191  inT32 *thresholds, //output thresholds
192  inT32 line_count //array sizes
193  );
194 void expand_rows( //find lines
195  ICOORD page_tr, //top right
196  TO_BLOCK *block, //block to do
197  float gradient, //gradient to fit
198  FCOORD rotation, //for drawing
199  inT32 block_edge, //edge of block
200  BOOL8 testing_on //correct orientation
201  );
202 void adjust_row_limits( //tidy limits
203  TO_BLOCK *block //block to do
204  );
205 void compute_row_stats( //find lines
206  TO_BLOCK *block, //block to do
207  BOOL8 testing_on //correct orientation
208  );
209 float median_block_xheight( //find lines
210  TO_BLOCK *block, //block to do
211  float gradient //global skew
212  );
213 
215  STATS *heights, STATS *floating_heights, bool cap_only, int min_height,
216  int max_height, float *xheight, float *ascrise);
217 
218 inT32 compute_row_descdrop(TO_ROW *row, // row to do
219  float gradient, // global skew
220  int xheight_blob_count,
221  STATS *heights);
222 inT32 compute_height_modes(STATS *heights, // stats to search
223  inT32 min_height, // bottom of range
224  inT32 max_height, // top of range
225  inT32 *modes, // output array
226  inT32 maxmodes); // size of modes
227 void correct_row_xheight(TO_ROW *row, // row to fix
228  float xheight, // average values
229  float ascrise,
230  float descdrop);
231 void separate_underlines(TO_BLOCK *block, // block to do
232  float gradient, // skew angle
233  FCOORD rotation, // inverse landscape
234  BOOL8 testing_on); // correct orientation
235 void pre_associate_blobs( ICOORD page_tr, // top right
236  TO_BLOCK *block, // block to do
237  FCOORD rotation, // inverse landscape
238  BOOL8 testing_on); // correct orientation
239 void fit_parallel_rows(TO_BLOCK *block, // block to do
240  float gradient, // gradient to fit
241  FCOORD rotation, // for drawing
242  inT32 block_edge, // edge of block
243  BOOL8 testing_on); // correct orientation
244 void fit_parallel_lms(float gradient, // forced gradient
245  TO_ROW *row); // row to fit
246 void make_baseline_spline(TO_ROW *row, // row to fit
247  TO_BLOCK *block); // block it came from
248 BOOL8 segment_baseline ( //split baseline
249 TO_ROW * row, //row to fit
250 TO_BLOCK * block, //block it came from
251 inT32 & segments, //no fo segments
252 inT32 xstarts[] //coords of segments
253 );
254 double *linear_spline_baseline ( //split baseline
255 TO_ROW * row, //row to fit
256 TO_BLOCK * block, //block it came from
257 inT32 & segments, //no fo segments
258 inT32 xstarts[] //coords of segments
259 );
260 void assign_blobs_to_rows( //find lines
261  TO_BLOCK *block, //block to do
262  float *gradient, //block skew
263  int pass, //identification
264  BOOL8 reject_misses, //chuck big ones out
265  BOOL8 make_new_rows, //add rows for unmatched
266  BOOL8 drawing_skew //draw smoothed skew
267  );
268  //find best row
269 OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, //iterator
270  TO_ROW *&best_row, //output row
271  float top, //top of blob
272  float bottom, //bottom of blob
273  float rowsize, //max row size
274  BOOL8 testing_blob //test stuff
275  );
276 int blob_x_order( //sort function
277  const void *item1, //items to compare
278  const void *item2);
279 int row_y_order( //sort function
280  const void *item1, //items to compare
281  const void *item2);
282 int row_spacing_order( //sort function
283  const void *item1, //items to compare
284  const void *item2);
285 
286 void mark_repeated_chars(TO_ROW *row);
287 #endif
double textord_descx_ratio_max
Definition: makerow.cpp:99
int row_y_order(const void *item1, const void *item2)
Definition: makerow.cpp:2627
bool textord_old_baselines
Definition: makerow.cpp:53
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1498
int textord_min_blobs_in_row
Definition: makerow.cpp:64
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2671
double textord_min_blob_height_fraction
Definition: makerow.cpp:89
bool textord_cblob_blockocc
inT32 compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *heights)
Definition: makerow.cpp:1594
double textord_min_linesize
Definition: makerow.cpp:83
bool textord_new_initial_xheight
Definition: makerow.cpp:102
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1437
bool textord_parallel_baselines
Definition: makerow.cpp:51
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:129
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2310
inT32 compute_height_modes(STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
Definition: makerow.cpp:1654
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:976
bool textord_show_final_blobs
Definition: makerow.cpp:49
double textord_linespace_iqrlimit
Definition: makerow.cpp:76
bool textord_heavy_nr
Definition: makerow.cpp:44
double textord_minxh
Definition: makerow.cpp:82
void compute_dropout_distances(inT32 *occupation, inT32 *thresholds, inT32 line_count)
Definition: makerow.cpp:929
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:578
#define INT_VAR_H(name, val, comment)
Definition: params.h:265
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1134
void compute_occupation_threshold(inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
Definition: makerow.cpp:848
double textord_skew_lag
Definition: makerow.cpp:75
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:746
bool textord_fix_makerow_bug
Definition: makerow.cpp:56
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:116
bool textord_show_expanded_rows
Definition: makerow.cpp:47
Definition: statistc.h:33
int textord_test_y
Definition: makerow.cpp:63
bool textord_show_initial_rows
Definition: makerow.cpp:45
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:1876
double textord_ascx_ratio_min
Definition: makerow.cpp:96
void compute_line_occupation(TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
Definition: makerow.cpp:782
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:2004
double textord_chop_width
Definition: makerow.cpp:78
unsigned char BOOL8
Definition: host.h:113
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:227
double textord_descx_ratio_min
Definition: makerow.cpp:98
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2605
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
BOOL8 find_best_dropout_row(TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
Definition: makerow.cpp:666
Definition: makerow.h:31
Definition: makerow.h:32
int textord_spline_medianwin
Definition: makerow.cpp:66
double textord_spline_outlier_fraction
Definition: makerow.cpp:73
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:201
double textord_xheight_mode_fraction
Definition: makerow.cpp:91
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:164
bool textord_fix_xheight_bug
Definition: makerow.cpp:55
bool textord_old_xheight
Definition: makerow.cpp:54
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:1803
BOOL8 segment_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2120
double textord_ascx_ratio_max
Definition: makerow.cpp:97
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:473
ROW_CATEGORY
Definition: makerow.h:36
double textord_occupancy_threshold
Definition: makerow.cpp:86
double textord_underline_width
Definition: makerow.cpp:87
bool textord_test_landscape
Definition: makerow.cpp:50
int textord_lms_line_trials
Definition: makerow.cpp:101
double textord_spline_shift_fraction
Definition: makerow.cpp:71
bool textord_straight_baselines
Definition: makerow.cpp:52
int row_spacing_order(const void *item1, const void *item2)
Definition: makerow.cpp:2649
double textord_xheight_error_margin
Definition: makerow.cpp:100
#define double_VAR_H(name, val, comment)
Definition: params.h:274
float ascrise
Definition: blobbox.h:655
bool textord_debug_xheights
Definition: makerow.cpp:57
#define MAX_INT32
Definition: host.h:120
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:525
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
OVERLAP_STATE
Definition: makerow.h:29
integer coordinate
Definition: points.h:30
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2087
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1716
bool textord_debug_blob
Definition: makerow.cpp:103
bool textord_show_parallel_rows
Definition: makerow.cpp:46
int textord_min_xheight
Definition: makerow.cpp:69
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2219
double textord_excess_blobsize
Definition: makerow.cpp:85
bool textord_quadratic_baselines
#define FALSE
Definition: capi.h:29
int textord_test_x
Definition: makerow.cpp:62
double textord_width_limit
Definition: makerow.cpp:77
Definition: rect.h:30
void compute_row_stats(TO_BLOCK *block, BOOL8 testing_on)
Definition: makerow.cpp:1170
#define TRUE
Definition: capi.h:28
int textord_spline_minblobs
Definition: makerow.cpp:65
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
Definition: makerow.cpp:2510
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:123
double textord_skew_ile
Definition: makerow.cpp:74
float xheight
Definition: blobbox.h:653
bool textord_show_final_rows
Definition: makerow.cpp:48
float descdrop
Definition: blobbox.h:656
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:268
Definition: points.h:189
float median_block_xheight(TO_BLOCK *block, float gradient)
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:1962
double textord_ascheight_mode_fraction
Definition: makerow.cpp:93
int inT32
Definition: host.h:102