tesseract  4.0.0-1-g2a2b
underlin.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: underlin.cpp (Formerly undrline.c)
3  * Description: Code to chop blobs apart from underlines.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1994, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "underlin.h"
20 
21 #define PROJECTION_MARGIN 10 //arbitrary
22 #define EXTERN
23 
24 EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
26 "Chop underlines & put back");
27 
28 /**********************************************************************
29  * restore_underlined_blobs
30  *
31  * Find underlined blobs and put them back in the row.
32  **********************************************************************/
33 
34 void restore_underlined_blobs( //get chop points
35  TO_BLOCK *block //block to do
36  ) {
37  int16_t chop_coord; //chop boundary
38  TBOX blob_box; //of underline
39  BLOBNBOX *u_line; //underline bit
40  TO_ROW *row; //best row for blob
41  ICOORDELT_LIST chop_cells; //blobs to cut out
42  //real underlines
43  BLOBNBOX_LIST residual_underlines;
44  C_OUTLINE_LIST left_coutlines;
45  C_OUTLINE_LIST right_coutlines;
46  ICOORDELT_IT cell_it = &chop_cells;
47  //under lines
48  BLOBNBOX_IT under_it = &block->underlines;
49  BLOBNBOX_IT ru_it = &residual_underlines;
50 
51  if (block->get_rows()->empty())
52  return; // Don't crash if there are no rows.
53  for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
54  under_it.forward ()) {
55  u_line = under_it.extract ();
56  blob_box = u_line->bounding_box ();
57  row = most_overlapping_row (block->get_rows (), u_line);
58  if (row == nullptr)
59  return; // Don't crash if there is no row.
60  find_underlined_blobs (u_line, &row->baseline, row->xheight,
62  &chop_cells);
63  cell_it.set_to_list (&chop_cells);
64  for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
65  cell_it.forward ()) {
66  chop_coord = cell_it.data ()->x ();
67  if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
68  split_to_blob (u_line, chop_coord,
70  &left_coutlines,
71  &right_coutlines);
72  if (!left_coutlines.empty()) {
73  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
74  }
75  chop_coord = cell_it.data ()->y ();
76  split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5,
77  &left_coutlines, &right_coutlines);
78  if (!left_coutlines.empty()) {
79  row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
80  }
81  u_line = nullptr; //no more blobs to add
82  }
83  delete cell_it.extract();
84  }
85  if (!right_coutlines.empty ()) {
86  split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5,
87  &left_coutlines, &right_coutlines);
88  if (!left_coutlines.empty())
89  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
90  }
91  if (u_line != nullptr) {
92  delete u_line->cblob();
93  delete u_line;
94  }
95  }
96  if (!ru_it.empty()) {
97  ru_it.move_to_first();
98  for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
99  under_it.add_after_then_move(ru_it.extract());
100  }
101  }
102 }
103 
104 
105 /**********************************************************************
106  * most_overlapping_row
107  *
108  * Return the row which most overlaps the blob.
109  **********************************************************************/
110 
111 TO_ROW *most_overlapping_row( //find best row
112  TO_ROW_LIST *rows, //list of rows
113  BLOBNBOX *blob //blob to place
114  ) {
115  int16_t x = (blob->bounding_box ().left ()
116  + blob->bounding_box ().right ()) / 2;
117  TO_ROW_IT row_it = rows; //row iterator
118  TO_ROW *row; //current row
119  TO_ROW *best_row; //output row
120  float overlap; //of blob & row
121  float bestover; //best overlap
122 
123  best_row = nullptr;
124  bestover = (float) -INT32_MAX;
125  if (row_it.empty ())
126  return nullptr;
127  row = row_it.data ();
128  row_it.mark_cycle_pt ();
129  while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
130  && !row_it.cycled_list ()) {
131  best_row = row;
132  bestover =
133  blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
134  row_it.forward ();
135  row = row_it.data ();
136  }
137  while (row->baseline.y (x) + row->xheight + row->ascrise
138  >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
139  overlap = row->baseline.y (x) + row->xheight + row->ascrise;
140  if (blob->bounding_box ().top () < overlap)
141  overlap = blob->bounding_box ().top ();
142  if (blob->bounding_box ().bottom () >
143  row->baseline.y (x) + row->descdrop)
144  overlap -= blob->bounding_box ().bottom ();
145  else
146  overlap -= row->baseline.y (x) + row->descdrop;
147  if (overlap > bestover) {
148  bestover = overlap;
149  best_row = row;
150  }
151  row_it.forward ();
152  row = row_it.data ();
153  }
154  if (bestover < 0
155  && row->baseline.y (x) + row->xheight + row->ascrise
156  - blob->bounding_box ().bottom () > bestover)
157  best_row = row;
158  return best_row;
159 }
160 
161 
162 /**********************************************************************
163  * find_underlined_blobs
164  *
165  * Find the start and end coords of blobs in the underline.
166  **********************************************************************/
167 
168 void find_underlined_blobs( //get chop points
169  BLOBNBOX *u_line, //underlined unit
170  QSPLINE *baseline, //actual baseline
171  float xheight, //height of line
172  float baseline_offset, //amount to shrinke it
173  ICOORDELT_LIST *chop_cells //places to chop
174  ) {
175  int16_t x, y; //sides of blob
176  ICOORD blob_chop; //sides of blob
177  TBOX blob_box = u_line->bounding_box ();
178  //cell iterator
179  ICOORDELT_IT cell_it = chop_cells;
180  STATS upper_proj (blob_box.left (), blob_box.right () + 1);
181  STATS middle_proj (blob_box.left (), blob_box.right () + 1);
182  STATS lower_proj (blob_box.left (), blob_box.right () + 1);
183  C_OUTLINE_IT out_it; //outlines of blob
184 
185  ASSERT_HOST (u_line->cblob () != nullptr);
186 
187  out_it.set_to_list (u_line->cblob ()->out_list ());
188  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
189  vertical_cunderline_projection (out_it.data (),
190  baseline, xheight, baseline_offset,
191  &lower_proj, &middle_proj, &upper_proj);
192  }
193 
194  for (x = blob_box.left (); x < blob_box.right (); x++) {
195  if (middle_proj.pile_count (x) > 0) {
196  for (y = x + 1;
197  y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
198  blob_chop = ICOORD (x, y);
199  cell_it.add_after_then_move (new ICOORDELT (blob_chop));
200  x = y;
201  }
202  }
203 }
204 
205 
206 /**********************************************************************
207  * vertical_cunderline_projection
208  *
209  * Compute the vertical projection of a outline from its outlines
210  * and add to the given STATS.
211  **********************************************************************/
212 
213 void vertical_cunderline_projection( //project outlines
214  C_OUTLINE *outline, //outline to project
215  QSPLINE *baseline, //actual baseline
216  float xheight, //height of line
217  float baseline_offset, //amount to shrinke it
218  STATS *lower_proj, //below baseline
219  STATS *middle_proj, //centre region
220  STATS *upper_proj //top region
221  ) {
222  ICOORD pos; //current point
223  ICOORD step; //edge step
224  int16_t lower_y, upper_y; //region limits
225  int32_t length; //of outline
226  int16_t stepindex; //current step
227  C_OUTLINE_IT out_it = outline->child ();
228 
229  pos = outline->start_pos ();
230  length = outline->pathlength ();
231  for (stepindex = 0; stepindex < length; stepindex++) {
232  step = outline->step (stepindex);
233  if (step.x () > 0) {
234  lower_y =
235  (int16_t) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
236  upper_y =
237  (int16_t) floor (baseline->y (pos.x ()) + baseline_offset +
238  xheight + 0.5);
239  if (pos.y () >= lower_y) {
240  lower_proj->add (pos.x (), -lower_y);
241  if (pos.y () >= upper_y) {
242  middle_proj->add (pos.x (), lower_y - upper_y);
243  upper_proj->add (pos.x (), upper_y - pos.y ());
244  }
245  else
246  middle_proj->add (pos.x (), lower_y - pos.y ());
247  }
248  else
249  lower_proj->add (pos.x (), -pos.y ());
250  }
251  else if (step.x () < 0) {
252  lower_y =
253  (int16_t) floor (baseline->y (pos.x () - 1) + baseline_offset +
254  0.5);
255  upper_y =
256  (int16_t) floor (baseline->y (pos.x () - 1) + baseline_offset +
257  xheight + 0.5);
258  if (pos.y () >= lower_y) {
259  lower_proj->add (pos.x () - 1, lower_y);
260  if (pos.y () >= upper_y) {
261  middle_proj->add (pos.x () - 1, upper_y - lower_y);
262  upper_proj->add (pos.x () - 1, pos.y () - upper_y);
263  }
264  else
265  middle_proj->add (pos.x () - 1, pos.y () - lower_y);
266  }
267  else
268  lower_proj->add (pos.x () - 1, pos.y ());
269  }
270  pos += step;
271  }
272 
273  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
274  vertical_cunderline_projection (out_it.data (),
275  baseline, xheight, baseline_offset,
276  lower_proj, middle_proj, upper_proj);
277  }
278 }
QSPLINE baseline
Definition: blobbox.h:683
float descdrop
Definition: blobbox.h:673
TO_ROW * most_overlapping_row(TO_ROW_LIST *rows, BLOBNBOX *blob)
Definition: underlin.cpp:111
void insert_blob(BLOBNBOX *blob)
Definition: blobbox.cpp:770
#define TRUE
Definition: capi.h:51
#define BOOL_VAR(name, val, comment)
Definition: params.h:279
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:213
#define double_VAR(name, val, comment)
Definition: params.h:285
int16_t y() const
access_function
Definition: points.h:57
void split_to_blob(BLOBNBOX *blob, int16_t chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
Definition: fpchop.cpp:239
Definition: rect.h:34
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:34
const ICOORD & start_pos() const
Definition: coutln.h:148
EXTERN double textord_underline_offset
Definition: underlin.cpp:24
Definition: statistc.h:33
double y(double x) const
Definition: quspline.cpp:209
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
float xheight
Definition: blobbox.h:670
int16_t left() const
Definition: rect.h:72
#define EXTERN
Definition: underlin.cpp:22
int16_t top() const
Definition: rect.h:58
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
int32_t pathlength() const
Definition: coutln.h:135
float ascrise
Definition: blobbox.h:672
EXTERN bool textord_restore_underlines
Definition: underlin.cpp:26
void add(int32_t value, int32_t count)
Definition: statistc.cpp:100
C_OUTLINE_LIST * child()
Definition: coutln.h:108
void find_underlined_blobs(BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
Definition: underlin.cpp:168
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
EXTERN int textord_fp_chop_error
Definition: fpchop.cpp:35
int16_t bottom() const
Definition: rect.h:65
BLOBNBOX_LIST underlines
Definition: blobbox.h:786
ICOORD step(int index) const
Definition: coutln.h:144
C_BLOB * cblob() const
Definition: blobbox.h:269
#define ASSERT_HOST(x)
Definition: errcode.h:84