tesseract  4.0.0-1-g2a2b
split.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: split.cpp (Formerly split.c)
5  * Description:
6  * Author: Mark Seaman, OCR Technology
7  * Created: Fri Oct 16 14:37:00 1987
8  * Modified: Fri May 17 16:27:49 1991 (Mark Seaman) marks@hpgrlt
9  * Language: C
10  * Package: N/A
11  * Status: Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *************************************************************************/
25 /*----------------------------------------------------------------------
26  I n c l u d e s
27 ----------------------------------------------------------------------*/
28 // Include automatically generated configuration file if running autoconf.
29 #ifdef HAVE_CONFIG_H
30 #include "config_auto.h"
31 #endif
32 
33 #include "split.h"
34 #include "coutln.h"
35 #include "tprintf.h"
36 
37 #include <algorithm>
38 
39 /*----------------------------------------------------------------------
40  V a r i a b l e s
41 ----------------------------------------------------------------------*/
42 // Limit on the amount of penalty for the chop being off-center.
43 const int kCenterGradeCap = 25;
44 // Ridiculously large priority for splits that are no use.
45 const double kBadPriority = 999.0;
46 
47 BOOL_VAR(wordrec_display_splits, 0, "Display splits");
48 
49 // Returns the bounding box of all the points in the split.
51  return TBOX(
52  std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y),
53  std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y));
54 }
55 
56 // Hides the SPLIT so the outlines appear not to be cut by it.
57 void SPLIT::Hide() const {
58  EDGEPT* edgept = point1;
59  do {
60  edgept->Hide();
61  edgept = edgept->next;
62  } while (!edgept->EqualPos(*point2) && edgept != point1);
63  edgept = point2;
64  do {
65  edgept->Hide();
66  edgept = edgept->next;
67  } while (!edgept->EqualPos(*point1) && edgept != point2);
68 }
69 
70 // Undoes hide, so the outlines are cut by the SPLIT.
71 void SPLIT::Reveal() const {
72  EDGEPT* edgept = point1;
73  do {
74  edgept->Reveal();
75  edgept = edgept->next;
76  } while (!edgept->EqualPos(*point2) && edgept != point1);
77  edgept = point2;
78  do {
79  edgept->Reveal();
80  edgept = edgept->next;
81  } while (!edgept->EqualPos(*point1) && edgept != point2);
82 }
83 
84 // Compute a split priority based on the bounding boxes of the parts.
85 // The arguments here are config parameters defined in Wordrec. Add chop_
86 // to the beginning of the name.
87 float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
88  int centered_maxwidth, double center_knob,
89  double width_change_knob) const {
90  TBOX box1 = Box12();
91  TBOX box2 = Box21();
92  int min_left = std::min(box1.left(), box2.left());
93  int max_right = std::max(box1.right(), box2.right());
94  if (xmin < min_left && xmax > max_right) return kBadPriority;
95 
96  float grade = 0.0f;
97  // grade_overlap.
98  int width1 = box1.width();
99  int width2 = box2.width();
100  int min_width = std::min(width1, width2);
101  int overlap = -box1.x_gap(box2);
102  if (overlap == min_width) {
103  grade += 100.0f; // Total overlap.
104  } else {
105  if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
106  if (overlap > 0) grade += overlap_knob * overlap;
107  }
108  // grade_center_of_blob.
109  if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
110  grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2));
111  }
112  // grade_width_change.
113  float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
114  if (width_change_grade > 0.0f)
115  grade += width_change_grade * width_change_knob;
116  return grade;
117 }
118 
119 // Returns true if *this SPLIT appears OK in the sense that it does not cross
120 // any outlines and does not chop off any ridiculously small pieces.
121 bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
122  return !IsLittleChunk(min_points, min_area) &&
124 }
125 
126 // Returns true if the split generates a small chunk in terms of either area
127 // or number of points.
128 bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
129  if (point1->ShortNonCircularSegment(min_points, point2) &&
130  point1->SegmentArea(point2) < min_area) {
131  return true;
132  }
133  if (point2->ShortNonCircularSegment(min_points, point1) &&
134  point2->SegmentArea(point1) < min_area) {
135  return true;
136  }
137  return false;
138 }
139 
140 /**********************************************************************
141  * make_edgept
142  *
143  * Create an EDGEPT and hook it into an existing list of edge points.
144  **********************************************************************/
145 EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
146  EDGEPT *this_edgept;
147  /* Create point */
148  this_edgept = new EDGEPT;
149  this_edgept->pos.x = x;
150  this_edgept->pos.y = y;
151  // Now deal with the src_outline steps.
152  C_OUTLINE* prev_ol = prev->src_outline;
153  if (prev_ol != nullptr && prev->next == next) {
154  // Compute the fraction of the segment that is being cut.
155  FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
156  FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
157  double cut_fraction = target_vec.length() / segment_vec.length();
158  // Get the start and end at the step level.
159  ICOORD step_start = prev_ol->position_at_index(prev->start_step);
160  int end_step = prev->start_step + prev->step_count;
161  int step_length = prev_ol->pathlength();
162  ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
163  ICOORD step_vec = step_end - step_start;
164  double target_length = step_vec.length() * cut_fraction;
165  // Find the point on the segment that gives the length nearest to target.
166  int best_step = prev->start_step;
167  ICOORD total_step(0, 0);
168  double best_dist = target_length;
169  for (int s = prev->start_step; s < end_step; ++s) {
170  total_step += prev_ol->step(s % step_length);
171  double dist = fabs(target_length - total_step.length());
172  if (dist < best_dist) {
173  best_dist = dist;
174  best_step = s + 1;
175  }
176  }
177  // The new point is an intermediate point.
178  this_edgept->src_outline = prev_ol;
179  this_edgept->step_count = end_step - best_step;
180  this_edgept->start_step = best_step % step_length;
181  prev->step_count = best_step - prev->start_step;
182  } else {
183  // The new point is poly only.
184  this_edgept->src_outline = nullptr;
185  this_edgept->step_count = 0;
186  this_edgept->start_step = 0;
187  }
188  /* Hook it up */
189  this_edgept->next = next;
190  this_edgept->prev = prev;
191  prev->next = this_edgept;
192  next->prev = this_edgept;
193  /* Set up vec entries */
194  this_edgept->vec.x = this_edgept->next->pos.x - x;
195  this_edgept->vec.y = this_edgept->next->pos.y - y;
196  this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
197  this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
198  return this_edgept;
199 }
200 
201 /**********************************************************************
202  * remove_edgept
203  *
204  * Remove a given EDGEPT from its list and delete it.
205  **********************************************************************/
206 void remove_edgept(EDGEPT *point) {
207  EDGEPT *prev = point->prev;
208  EDGEPT *next = point->next;
209  // Add point's steps onto prev's steps if they are from the same outline.
210  if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
211  prev->step_count += point->step_count;
212  }
213  prev->next = next;
214  next->prev = prev;
215  prev->vec.x = next->pos.x - prev->pos.x;
216  prev->vec.y = next->pos.y - prev->pos.y;
217  delete point;
218 }
219 
220 /**********************************************************************
221  * Print
222  *
223  * Shows the coordinates of both points in a split.
224  **********************************************************************/
225 void SPLIT::Print() const {
226  tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
227  point2->pos.y);
228 }
229 
230 #ifndef GRAPHICS_DISABLED
231 // Draws the split in the given window.
232 void SPLIT::Mark(ScrollView* window) const {
233  window->Pen(ScrollView::GREEN);
234  window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
235  window->UpdateWindow();
236 }
237 #endif
238 
239 // Creates two outlines out of one by splitting the original one in half.
240 // Inserts the resulting outlines into the given list.
241 void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
242  SplitOutline();
243  while (outlines->next != nullptr) outlines = outlines->next;
244 
245  outlines->next = new TESSLINE;
246  outlines->next->loop = point1;
247  outlines->next->ComputeBoundingBox();
248 
249  outlines = outlines->next;
250 
251  outlines->next = new TESSLINE;
252  outlines->next->loop = point2;
253  outlines->next->ComputeBoundingBox();
254 
255  outlines->next->next = nullptr;
256 }
257 
258 // Makes a split between these two edge points, but does not affect the
259 // outlines to which they belong.
260 void SPLIT::SplitOutline() const {
261  EDGEPT* temp2 = point2->next;
262  EDGEPT* temp1 = point1->next;
263  /* Create two new points */
264  EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
265  EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
266  // point1 and 2 are now cross-over points, so they must have nullptr
267  // src_outlines and give their src_outline information their new
268  // replacements.
269  new_point1->src_outline = point1->src_outline;
270  new_point1->start_step = point1->start_step;
271  new_point1->step_count = point1->step_count;
272  new_point2->src_outline = point2->src_outline;
273  new_point2->start_step = point2->start_step;
274  new_point2->step_count = point2->step_count;
275  point1->src_outline = nullptr;
276  point1->start_step = 0;
277  point1->step_count = 0;
278  point2->src_outline = nullptr;
279  point2->start_step = 0;
280  point2->step_count = 0;
281 }
282 
283 // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
284 // the split, but possibly leaving some duplicate outlines.
285 void SPLIT::UnsplitOutlineList(TBLOB* blob) const {
286  /* Modify edge points */
287  UnsplitOutlines();
288 
289  TESSLINE* outline1 = new TESSLINE;
290  outline1->next = blob->outlines;
291  blob->outlines = outline1;
292  outline1->loop = point1;
293 
294  TESSLINE* outline2 = new TESSLINE;
295  outline2->next = blob->outlines;
296  blob->outlines = outline2;
297  outline2->loop = point2;
298 }
299 
300 // Removes the split that was put between these two points.
302  EDGEPT* tmp1 = point1->next;
303  EDGEPT* tmp2 = point2->next;
304 
305  tmp1->next->prev = point2;
306  tmp2->next->prev = point1;
307 
308  // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
309  // deleted.
310  point1->next = tmp2->next;
311  point1->src_outline = tmp2->src_outline;
312  point1->start_step = tmp2->start_step;
313  point1->step_count = tmp2->step_count;
314  // Likewise point2 takes tmp1's place.
315  point2->next = tmp1->next;
316  point2->src_outline = tmp1->src_outline;
317  point2->start_step = tmp1->start_step;
318  point2->step_count = tmp1->step_count;
319 
320  delete tmp1;
321  delete tmp2;
322 
323  point1->vec.x = point1->next->pos.x - point1->pos.x;
324  point1->vec.y = point1->next->pos.y - point1->pos.y;
325 
326  point2->vec.x = point2->next->pos.x - point2->pos.x;
327  point2->vec.y = point2->next->pos.y - point2->pos.y;
328 }
TBOX Box21() const
Definition: split.h:46
int step_count
Definition: blobs.h:181
EDGEPT * make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:145
void UnsplitOutlines() const
Definition: split.cpp:301
TESSLINE * next
Definition: blobs.h:265
#define BOOL_VAR(name, val, comment)
Definition: params.h:279
void Hide()
Definition: blobs.h:154
TPOINT pos
Definition: blobs.h:170
const int kCenterGradeCap
Definition: split.cpp:43
void Print() const
Definition: split.cpp:225
TBOX bounding_box() const
Definition: split.cpp:50
bool wordrec_display_splits
Definition: split.cpp:47
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:285
void ComputeBoundingBox()
Definition: blobs.cpp:223
EDGEPT * point2
Definition: split.h:104
bool ShortNonCircularSegment(int min_points, const EDGEPT *end) const
Definition: blobs.h:142
Definition: rect.h:34
void SplitOutline() const
Definition: split.cpp:260
void remove_edgept(EDGEPT *point)
Definition: split.cpp:206
int x_gap(const TBOX &box) const
Definition: rect.h:225
int start_step
Definition: blobs.h:180
void Hide() const
Definition: split.cpp:57
int16_t width() const
Definition: rect.h:115
bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const
Definition: blobs.h:323
int16_t left() const
Definition: rect.h:72
EDGEPT * point1
Definition: split.h:103
EDGEPT * loop
Definition: blobs.h:264
TBOX Box12() const
Definition: split.h:44
integer coordinate
Definition: points.h:32
ICOORD position_at_index(int index) const
Definition: coutln.h:153
VECTOR vec
Definition: blobs.h:171
int SegmentArea(const EDGEPT *end) const
Definition: blobs.h:129
Definition: blobs.h:83
int32_t pathlength() const
Definition: coutln.h:135
EDGEPT * prev
Definition: blobs.h:177
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void Reveal() const
Definition: split.cpp:71
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:87
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:121
bool IsLittleChunk(int min_points, int min_area) const
Definition: split.cpp:128
int16_t x
Definition: blobs.h:78
float length() const
find length
Definition: points.h:229
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:241
C_OUTLINE * src_outline
Definition: blobs.h:178
void UpdateWindow()
Definition: scrollview.cpp:706
Definition: points.h:189
float length() const
find length
Definition: points.h:79
int16_t right() const
Definition: rect.h:79
Definition: blobs.h:268
int16_t y
Definition: blobs.h:79
bool EqualPos(const EDGEPT &other) const
Definition: blobs.h:112
void Pen(Color color)
Definition: scrollview.cpp:722
const double kBadPriority
Definition: split.cpp:45
TESSLINE * outlines
Definition: blobs.h:384
EDGEPT * next
Definition: blobs.h:176
ICOORD step(int index) const
Definition: coutln.h:144
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:534
void Reveal()
Definition: blobs.h:157
void Mark(ScrollView *window) const
Definition: split.cpp:232