tesseract  5.0.0-alpha-619-ge9db
split.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: split.cpp (Formerly split.c)
4  * Author: Mark Seaman, OCR Technology
5  *
6  * (c) Copyright 1987, Hewlett-Packard Company.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  *************************************************************************/
18 /*----------------------------------------------------------------------
19  I n c l u d e s
20 ----------------------------------------------------------------------*/
21 // Include automatically generated configuration file if running autoconf.
22 #ifdef HAVE_CONFIG_H
23 #include "config_auto.h"
24 #endif
25 
26 #include "split.h"
27 #include "coutln.h"
28 #include "tprintf.h"
29 
30 #include <algorithm>
31 
32 /*----------------------------------------------------------------------
33  V a r i a b l e s
34 ----------------------------------------------------------------------*/
35 // Limit on the amount of penalty for the chop being off-center.
36 const int kCenterGradeCap = 25;
37 // Ridiculously large priority for splits that are no use.
38 const double kBadPriority = 999.0;
39 
40 BOOL_VAR(wordrec_display_splits, 0, "Display splits");
41 
42 // Returns the bounding box of all the points in the split.
43 TBOX SPLIT::bounding_box() const {
44  return TBOX(
45  std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y),
46  std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y));
47 }
48 
49 // Hides the SPLIT so the outlines appear not to be cut by it.
50 void SPLIT::Hide() const {
51  EDGEPT* edgept = point1;
52  do {
53  edgept->Hide();
54  edgept = edgept->next;
55  } while (!edgept->EqualPos(*point2) && edgept != point1);
56  edgept = point2;
57  do {
58  edgept->Hide();
59  edgept = edgept->next;
60  } while (!edgept->EqualPos(*point1) && edgept != point2);
61 }
62 
63 // Undoes hide, so the outlines are cut by the SPLIT.
64 void SPLIT::Reveal() const {
65  EDGEPT* edgept = point1;
66  do {
67  edgept->Reveal();
68  edgept = edgept->next;
69  } while (!edgept->EqualPos(*point2) && edgept != point1);
70  edgept = point2;
71  do {
72  edgept->Reveal();
73  edgept = edgept->next;
74  } while (!edgept->EqualPos(*point1) && edgept != point2);
75 }
76 
77 // Compute a split priority based on the bounding boxes of the parts.
78 // The arguments here are config parameters defined in Wordrec. Add chop_
79 // to the beginning of the name.
80 float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
81  int centered_maxwidth, double center_knob,
82  double width_change_knob) const {
83  TBOX box1 = Box12();
84  TBOX box2 = Box21();
85  int min_left = std::min(box1.left(), box2.left());
86  int max_right = std::max(box1.right(), box2.right());
87  if (xmin < min_left && xmax > max_right) return kBadPriority;
88 
89  float grade = 0.0f;
90  // grade_overlap.
91  int width1 = box1.width();
92  int width2 = box2.width();
93  int min_width = std::min(width1, width2);
94  int overlap = -box1.x_gap(box2);
95  if (overlap == min_width) {
96  grade += 100.0f; // Total overlap.
97  } else {
98  if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
99  if (overlap > 0) grade += overlap_knob * overlap;
100  }
101  // grade_center_of_blob.
102  if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
103  grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2));
104  }
105  // grade_width_change.
106  float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
107  if (width_change_grade > 0.0f)
108  grade += width_change_grade * width_change_knob;
109  return grade;
110 }
111 
112 // Returns true if *this SPLIT appears OK in the sense that it does not cross
113 // any outlines and does not chop off any ridiculously small pieces.
114 bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
115  return !IsLittleChunk(min_points, min_area) &&
117 }
118 
119 // Returns true if the split generates a small chunk in terms of either area
120 // or number of points.
121 bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
122  if (point1->ShortNonCircularSegment(min_points, point2) &&
123  point1->SegmentArea(point2) < min_area) {
124  return true;
125  }
126  if (point2->ShortNonCircularSegment(min_points, point1) &&
127  point2->SegmentArea(point1) < min_area) {
128  return true;
129  }
130  return false;
131 }
132 
133 /**********************************************************************
134  * make_edgept
135  *
136  * Create an EDGEPT and hook it into an existing list of edge points.
137  **********************************************************************/
138 EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
139  EDGEPT *this_edgept;
140  /* Create point */
141  this_edgept = new EDGEPT;
142  this_edgept->pos.x = x;
143  this_edgept->pos.y = y;
144  // Now deal with the src_outline steps.
145  C_OUTLINE* prev_ol = prev->src_outline;
146  if (prev_ol != nullptr && prev->next == next) {
147  // Compute the fraction of the segment that is being cut.
148  FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
149  FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
150  double cut_fraction = target_vec.length() / segment_vec.length();
151  // Get the start and end at the step level.
152  ICOORD step_start = prev_ol->position_at_index(prev->start_step);
153  int end_step = prev->start_step + prev->step_count;
154  int step_length = prev_ol->pathlength();
155  ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
156  ICOORD step_vec = step_end - step_start;
157  double target_length = step_vec.length() * cut_fraction;
158  // Find the point on the segment that gives the length nearest to target.
159  int best_step = prev->start_step;
160  ICOORD total_step(0, 0);
161  double best_dist = target_length;
162  for (int s = prev->start_step; s < end_step; ++s) {
163  total_step += prev_ol->step(s % step_length);
164  double dist = fabs(target_length - total_step.length());
165  if (dist < best_dist) {
166  best_dist = dist;
167  best_step = s + 1;
168  }
169  }
170  // The new point is an intermediate point.
171  this_edgept->src_outline = prev_ol;
172  this_edgept->step_count = end_step - best_step;
173  this_edgept->start_step = best_step % step_length;
174  prev->step_count = best_step - prev->start_step;
175  } else {
176  // The new point is poly only.
177  this_edgept->src_outline = nullptr;
178  this_edgept->step_count = 0;
179  this_edgept->start_step = 0;
180  }
181  /* Hook it up */
182  this_edgept->next = next;
183  this_edgept->prev = prev;
184  prev->next = this_edgept;
185  next->prev = this_edgept;
186  /* Set up vec entries */
187  this_edgept->vec.x = this_edgept->next->pos.x - x;
188  this_edgept->vec.y = this_edgept->next->pos.y - y;
189  this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
190  this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
191  return this_edgept;
192 }
193 
194 /**********************************************************************
195  * remove_edgept
196  *
197  * Remove a given EDGEPT from its list and delete it.
198  **********************************************************************/
199 void remove_edgept(EDGEPT *point) {
200  EDGEPT *prev = point->prev;
201  EDGEPT *next = point->next;
202  // Add point's steps onto prev's steps if they are from the same outline.
203  if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
204  prev->step_count += point->step_count;
205  }
206  prev->next = next;
207  next->prev = prev;
208  prev->vec.x = next->pos.x - prev->pos.x;
209  prev->vec.y = next->pos.y - prev->pos.y;
210  delete point;
211 }
212 
213 /**********************************************************************
214  * Print
215  *
216  * Shows the coordinates of both points in a split.
217  **********************************************************************/
218 void SPLIT::Print() const {
219  tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
220  point2->pos.y);
221 }
222 
223 #ifndef GRAPHICS_DISABLED
224 // Draws the split in the given window.
225 void SPLIT::Mark(ScrollView* window) const {
226  window->Pen(ScrollView::GREEN);
227  window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
228  window->UpdateWindow();
229 }
230 #endif
231 
232 // Creates two outlines out of one by splitting the original one in half.
233 // Inserts the resulting outlines into the given list.
234 void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
235  SplitOutline();
236  while (outlines->next != nullptr) outlines = outlines->next;
237 
238  outlines->next = new TESSLINE;
239  outlines->next->loop = point1;
240  outlines->next->ComputeBoundingBox();
241 
242  outlines = outlines->next;
243 
244  outlines->next = new TESSLINE;
245  outlines->next->loop = point2;
246  outlines->next->ComputeBoundingBox();
247 
248  outlines->next->next = nullptr;
249 }
250 
251 // Makes a split between these two edge points, but does not affect the
252 // outlines to which they belong.
253 void SPLIT::SplitOutline() const {
254  EDGEPT* temp2 = point2->next;
255  EDGEPT* temp1 = point1->next;
256  /* Create two new points */
257  EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
258  EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
259  // point1 and 2 are now cross-over points, so they must have nullptr
260  // src_outlines and give their src_outline information their new
261  // replacements.
262  new_point1->src_outline = point1->src_outline;
263  new_point1->start_step = point1->start_step;
264  new_point1->step_count = point1->step_count;
265  new_point2->src_outline = point2->src_outline;
266  new_point2->start_step = point2->start_step;
267  new_point2->step_count = point2->step_count;
268  point1->src_outline = nullptr;
269  point1->start_step = 0;
270  point1->step_count = 0;
271  point2->src_outline = nullptr;
272  point2->start_step = 0;
273  point2->step_count = 0;
274 }
275 
276 // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
277 // the split, but possibly leaving some duplicate outlines.
278 void SPLIT::UnsplitOutlineList(TBLOB* blob) const {
279  /* Modify edge points */
280  UnsplitOutlines();
281 
282  auto* outline1 = new TESSLINE;
283  outline1->next = blob->outlines;
284  blob->outlines = outline1;
285  outline1->loop = point1;
286 
287  auto* outline2 = new TESSLINE;
288  outline2->next = blob->outlines;
289  blob->outlines = outline2;
290  outline2->loop = point2;
291 }
292 
293 // Removes the split that was put between these two points.
294 void SPLIT::UnsplitOutlines() const {
295  EDGEPT* tmp1 = point1->next;
296  EDGEPT* tmp2 = point2->next;
297 
298  tmp1->next->prev = point2;
299  tmp2->next->prev = point1;
300 
301  // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
302  // deleted.
303  point1->next = tmp2->next;
304  point1->src_outline = tmp2->src_outline;
305  point1->start_step = tmp2->start_step;
306  point1->step_count = tmp2->step_count;
307  // Likewise point2 takes tmp1's place.
308  point2->next = tmp1->next;
309  point2->src_outline = tmp1->src_outline;
310  point2->start_step = tmp1->start_step;
311  point2->step_count = tmp1->step_count;
312 
313  delete tmp1;
314  delete tmp2;
315 
316  point1->vec.x = point1->next->pos.x - point1->pos.x;
317  point1->vec.y = point1->next->pos.y - point1->pos.y;
318 
319  point2->vec.x = point2->next->pos.x - point2->pos.x;
320  point2->vec.y = point2->next->pos.y - point2->pos.y;
321 }
TBOX
Definition: cleanapi_test.cc:19
EDGEPT::SegmentArea
int SegmentArea(const EDGEPT *end) const
Definition: blobs.h:143
ScrollView
Definition: scrollview.h:97
SPLIT::IsHealthy
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:113
TESSLINE::ComputeBoundingBox
void ComputeBoundingBox()
Definition: blobs.cpp:212
SPLIT::SplitOutlineList
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:230
TESSLINE::loop
EDGEPT * loop
Definition: blobs.h:278
SPLIT::point2
EDGEPT * point2
Definition: split.h:101
split.h
EDGEPT::EqualPos
bool EqualPos(const EDGEPT &other) const
Definition: blobs.h:126
EDGEPT::src_outline
C_OUTLINE * src_outline
Definition: blobs.h:192
SPLIT::IsLittleChunk
bool IsLittleChunk(int min_points, int min_area) const
Definition: split.cpp:120
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:398
SPLIT::Print
void Print() const
Definition: split.cpp:214
EDGEPT::step_count
int step_count
Definition: blobs.h:195
ICOORD
integer coordinate
Definition: points.h:30
TESSLINE
Definition: blobs.h:201
kCenterGradeCap
const int kCenterGradeCap
Definition: split.cpp:35
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:717
ICOORD::length
float length() const
find length
Definition: points.h:77
TESSLINE::next
TESSLINE * next
Definition: blobs.h:279
kBadPriority
const double kBadPriority
Definition: split.cpp:37
FCOORD
Definition: points.h:187
C_OUTLINE
Definition: coutln.h:71
EDGEPT::prev
EDGEPT * prev
Definition: blobs.h:191
EDGEPT::start_step
int start_step
Definition: blobs.h:194
C_OUTLINE::position_at_index
ICOORD position_at_index(int index) const
Definition: coutln.h:152
remove_edgept
void remove_edgept(EDGEPT *point)
Definition: split.cpp:196
FCOORD::length
float length() const
find length
Definition: points.h:227
SPLIT::Hide
void Hide() const
Definition: split.cpp:49
SPLIT::Mark
void Mark(ScrollView *window) const
Definition: split.cpp:221
TPOINT::x
int16_t x
Definition: blobs.h:91
ScrollView::UpdateWindow
void UpdateWindow()
Definition: scrollview.cpp:703
SPLIT::UnsplitOutlines
void UnsplitOutlines() const
Definition: split.cpp:290
SPLIT::Box12
TBOX Box12() const
Definition: split.h:41
TPOINT::y
int16_t y
Definition: blobs.h:92
TBOX::width
int16_t width() const
Definition: rect.h:114
make_edgept
EDGEPT * make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:136
BOOL_VAR
#define BOOL_VAR(name, val, comment)
Definition: params.h:303
coutln.h
SPLIT::FullPriority
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:79
EDGEPT::vec
VECTOR vec
Definition: blobs.h:185
EDGEPT::ShortNonCircularSegment
bool ShortNonCircularSegment(int min_points, const EDGEPT *end) const
Definition: blobs.h:156
tprintf.h
wordrec_display_splits
bool wordrec_display_splits
Definition: split.cpp:39
SPLIT::UnsplitOutlineList
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:274
SPLIT::Box21
TBOX Box21() const
Definition: split.h:43
TBLOB
Definition: blobs.h:282
TBOX::left
int16_t left() const
Definition: rect.h:71
ScrollView::GREEN
Definition: scrollview.h:106
SPLIT::SplitOutline
void SplitOutline() const
Definition: split.cpp:249
TBOX::right
int16_t right() const
Definition: rect.h:78
SPLIT::bounding_box
TBOX bounding_box() const
Definition: split.cpp:42
ScrollView::Line
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:531
EDGEPT
Definition: blobs.h:97
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
EDGEPT::Reveal
void Reveal()
Definition: blobs.h:171
C_OUTLINE::pathlength
int32_t pathlength() const
Definition: coutln.h:134
C_OUTLINE::step
ICOORD step(int index) const
Definition: coutln.h:143
EDGEPT::Hide
void Hide()
Definition: blobs.h:168
SPLIT::point1
EDGEPT * point1
Definition: split.h:100
SPLIT::Reveal
void Reveal() const
Definition: split.cpp:63
EDGEPT::pos
TPOINT pos
Definition: blobs.h:184
EDGEPT::next
EDGEPT * next
Definition: blobs.h:190
TBOX::x_gap
int x_gap(const TBOX &box) const
Definition: rect.h:224
TBOX
Definition: rect.h:33
TBLOB::SegmentCrossesOutline
bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const
Definition: blobs.h:337