tesseract  4.0.0-1-g2a2b
findseam.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: findseam.cpp (Formerly findseam.c)
5  * Description:
6  * Author: Mark Seaman, OCR Technology
7  * Created: Fri Oct 16 14:37:00 1987
8  * Modified: Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt
9  * Language: C
10  * Package: N/A
11  * Status: Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 /*----------------------------------------------------------------------
26  I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #include "findseam.h"
29 #include "plotedges.h"
30 #include "outlines.h"
31 #include "seam.h"
32 #include "wordrec.h"
33 
34 // Include automatically generated configuration file if running autoconf.
35 #ifdef HAVE_CONFIG_H
36 #include "config_auto.h"
37 #endif
38 
39 /**********************************************************************
40  * partial_split_priority
41  *
42  * Assign a priority to this split based on the features that it has.
43  * Grade it according to the different rating schemes and return the
44  * value of its goodness.
45  **********************************************************************/
46 
47 #define partial_split_priority(split) \
48  (grade_split_length(split) + grade_sharpness(split))
49 
50 /*----------------------------------------------------------------------
51  T y p e s
52 ----------------------------------------------------------------------*/
53 #define SPLIT_CLOSENESS 20/* Difference in x value */
54  /* How many to keep */
55 #define MAX_NUM_SEAMS 150
56  /* How many to keep */
57 #define MAX_OLD_SEAMS 150
58 #define NO_FULL_PRIORITY -1/* Special marker for pri. */
59  /* Evaluate right away */
60 #define BAD_PRIORITY 9999.0
61 
62 /*----------------------------------------------------------------------
63  F u n c t i o n s
64 ----------------------------------------------------------------------*/
65 namespace tesseract {
66 
67 /**********************************************************************
68  * add_seam_to_queue
69  *
70  * Adds the given new_seam to the seams priority queue, unless it is full
71  * and the new seam is worse than the worst.
72  **********************************************************************/
73 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
74  SeamQueue* seams) {
75  if (new_seam == nullptr) return;
76  if (chop_debug) {
77  tprintf("Pushing new seam with priority %g :", new_priority);
78  new_seam->Print("seam: ");
79  }
80  if (seams->size() >= MAX_NUM_SEAMS) {
81  SeamPair old_pair(0, nullptr);
82  if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
83  if (chop_debug) {
84  tprintf("Old seam staying with priority %g\n", old_pair.key());
85  }
86  delete new_seam;
87  seams->Push(&old_pair);
88  return;
89  } else if (chop_debug) {
90  tprintf("New seam with priority %g beats old worst seam with %g\n",
91  new_priority, old_pair.key());
92  }
93  }
94  SeamPair new_pair(new_priority, new_seam);
95  seams->Push(&new_pair);
96 }
97 
98 
99 /**********************************************************************
100  * choose_best_seam
101  *
102  * Choose the best seam that can be created by assembling this a
103  * collection of splits. A queue of all the possible seams is
104  * maintained. Each new split received is placed in that queue with
105  * its partial priority value. These values in the seam queue are
106  * evaluated and combined until a good enough seam is found. If no
107  * further good seams are being found then this function returns to the
108  * caller, who will send more splits. If this function is called with
109  * a split of nullptr, then no further splits can be supplied by the
110  * caller.
111  **********************************************************************/
112 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
113  PRIORITY priority, SEAM **seam_result,
114  TBLOB *blob, SeamPile *seam_pile) {
115  SEAM *seam;
116  char str[80];
117  float my_priority;
118  /* Add seam of split */
119  my_priority = priority;
120  if (split != nullptr) {
121  TPOINT split_point = split->point1->pos;
122  split_point += split->point2->pos;
123  split_point /= 2;
124  seam = new SEAM(my_priority, split_point, *split);
125  if (chop_debug > 1) seam->Print("Partial priority ");
126  add_seam_to_queue(my_priority, seam, seam_queue);
127 
128  if (my_priority > chop_good_split)
129  return;
130  }
131 
132  TBOX bbox = blob->bounding_box();
133  /* Queue loop */
134  while (!seam_queue->empty()) {
135  SeamPair seam_pair;
136  seam_queue->Pop(&seam_pair);
137  seam = seam_pair.extract_data();
138  /* Set full priority */
139  my_priority = seam->FullPriority(bbox.left(), bbox.right(),
142  if (chop_debug) {
143  sprintf (str, "Full my_priority %0.0f, ", my_priority);
144  seam->Print(str);
145  }
146 
147  if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
148  my_priority < chop_ok_split) {
149  /* No crossing */
150  if (seam->IsHealthy(*blob, chop_min_outline_points,
152  delete *seam_result;
153  *seam_result = new SEAM(*seam);
154  (*seam_result)->set_priority(my_priority);
155  } else {
156  delete seam;
157  seam = nullptr;
158  my_priority = BAD_PRIORITY;
159  }
160  }
161 
162  if (my_priority < chop_good_split) {
163  delete seam;
164  return; /* Made good answer */
165  }
166 
167  if (seam) {
168  /* Combine with others */
169  if (seam_pile->size() < chop_seam_pile_size) {
170  combine_seam(*seam_pile, seam, seam_queue);
171  SeamDecPair pair(seam_pair.key(), seam);
172  seam_pile->Push(&pair);
173  } else if (chop_new_seam_pile &&
174  seam_pile->size() == chop_seam_pile_size &&
175  seam_pile->PeekTop().key() > seam_pair.key()) {
176  combine_seam(*seam_pile, seam, seam_queue);
177  SeamDecPair pair;
178  seam_pile->Pop(&pair); // pop the worst.
179  // Replace the seam in pair (deleting the old one) with
180  // the new seam and score, then push back into the heap.
181  pair.set_key(seam_pair.key());
182  pair.set_data(seam);
183  seam_pile->Push(&pair);
184  } else {
185  delete seam;
186  }
187  }
188 
189  my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
190  : seam_queue->PeekTop().key();
191  if ((my_priority > chop_ok_split) ||
192  (my_priority > chop_good_split && split))
193  return;
194  }
195 }
196 
197 
198 /**********************************************************************
199  * combine_seam
200  *
201  * Find other seams to combine with this one. The new seams that result
202  * from this union should be added to the seam queue. The return value
203  * tells whether or not any additional seams were added to the queue.
204  **********************************************************************/
205 void Wordrec::combine_seam(const SeamPile& seam_pile,
206  const SEAM* seam, SeamQueue* seam_queue) {
207  for (int x = 0; x < seam_pile.size(); ++x) {
208  const SEAM *this_one = seam_pile.get(x).data();
209  if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
210  SEAM *new_one = new SEAM(*seam);
211  new_one->CombineWith(*this_one);
212  if (chop_debug > 1) new_one->Print("Combo priority ");
213  add_seam_to_queue(new_one->priority(), new_one, seam_queue);
214  }
215  }
216 }
217 
218 /**********************************************************************
219  * pick_good_seam
220  *
221  * Find and return a good seam that will split this blob into two pieces.
222  * Work from the outlines provided.
223  **********************************************************************/
225  SeamPile seam_pile(chop_seam_pile_size);
226  EDGEPT *points[MAX_NUM_POINTS];
227  EDGEPT_CLIST new_points;
228  SEAM *seam = nullptr;
229  TESSLINE *outline;
230  int16_t num_points = 0;
231 
232 #ifndef GRAPHICS_DISABLED
233  if (chop_debug > 2)
234  wordrec_display_splits.set_value(true);
235 
236  draw_blob_edges(blob);
237 #endif
238 
239  PointHeap point_heap(MAX_NUM_POINTS);
240  for (outline = blob->outlines; outline; outline = outline->next)
241  prioritize_points(outline, &point_heap);
242 
243  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
244  points[num_points++] = point_heap.PeekTop().data;
245  point_heap.Pop(nullptr);
246  }
247 
248  /* Initialize queue */
249  SeamQueue seam_queue(MAX_NUM_SEAMS);
250 
251  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
252  try_vertical_splits(points, num_points, &new_points,
253  &seam_queue, &seam_pile, &seam, blob);
254 
255  if (seam == nullptr) {
256  choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
257  } else if (seam->priority() > chop_good_split) {
258  choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob,
259  &seam_pile);
260  }
261 
262  EDGEPT_C_IT it(&new_points);
263  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
264  EDGEPT *inserted_point = it.data();
265  if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
266  for (outline = blob->outlines; outline; outline = outline->next) {
267  if (outline->loop == inserted_point) {
268  outline->loop = outline->loop->next;
269  }
270  }
271  remove_edgept(inserted_point);
272  }
273  }
274 
275  if (seam) {
276  if (seam->priority() > chop_ok_split) {
277  delete seam;
278  seam = nullptr;
279  }
280 #ifndef GRAPHICS_DISABLED
281  else if (wordrec_display_splits) {
282  seam->Mark(edge_window);
283  if (chop_debug > 2) {
286  }
287  }
288 #endif
289  }
290 
291  if (chop_debug)
292  wordrec_display_splits.set_value(false);
293 
294  return (seam);
295 }
296 
297 
298 /**********************************************************************
299  * try_point_pairs
300  *
301  * Try all the splits that are produced by pairing critical points
302  * together. See if any of them are suitable for use. Use a seam
303  * queue and seam pile that have already been initialized and used.
304  **********************************************************************/
306  int16_t num_points,
307  SeamQueue* seam_queue,
308  SeamPile* seam_pile,
309  SEAM ** seam,
310  TBLOB * blob) {
311  int16_t x;
312  int16_t y;
313  PRIORITY priority;
314 
315  for (x = 0; x < num_points; x++) {
316  for (y = x + 1; y < num_points; y++) {
317  if (points[y] &&
318  points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
320  points[x] != points[y]->next && points[y] != points[x]->next &&
321  !is_exterior_point(points[x], points[y]) &&
322  !is_exterior_point(points[y], points[x])) {
323  SPLIT split(points[x], points[y]);
324  priority = partial_split_priority(&split);
325 
326  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
327  }
328  }
329  }
330 }
331 
332 
333 /**********************************************************************
334  * try_vertical_splits
335  *
336  * Try all the splits that are produced by vertical projection to see
337  * if any of them are suitable for use. Use a seam queue and seam pile
338  * that have already been initialized and used.
339  * Return in new_points a collection of points that were inserted into
340  * the blob while examining vertical splits and which may safely be
341  * removed once a seam is chosen if they are not part of the seam.
342  **********************************************************************/
344  int16_t num_points,
345  EDGEPT_CLIST *new_points,
346  SeamQueue* seam_queue,
347  SeamPile* seam_pile,
348  SEAM ** seam,
349  TBLOB * blob) {
350  EDGEPT *vertical_point = nullptr;
351  int16_t x;
352  PRIORITY priority;
353  TESSLINE *outline;
354 
355  for (x = 0; x < num_points; x++) {
356  vertical_point = nullptr;
357  for (outline = blob->outlines; outline; outline = outline->next) {
358  vertical_projection_point(points[x], outline->loop,
359  &vertical_point, new_points);
360  }
361 
362  if (vertical_point && points[x] != vertical_point->next &&
363  vertical_point != points[x]->next &&
364  points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) <
366  SPLIT split(points[x], vertical_point);
367  priority = partial_split_priority(&split);
368  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
369  }
370  }
371 }
372 
373 }
bool PopWorst(Pair *entry)
Definition: genericheap.h:140
TESSLINE * next
Definition: blobs.h:265
void Mark(ScrollView *window) const
Definition: seam.cpp:186
const Pair & get(int index) const
Definition: genericheap.h:87
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:72
double chop_center_knob
Definition: wordrec.h:220
TPOINT pos
Definition: blobs.h:170
bool UsesPoint(const EDGEPT *point) const
Definition: seam.h:88
bool wordrec_display_splits
Definition: split.cpp:47
bool chop_new_seam_pile
Definition: wordrec.h:215
Definition: seam.h:44
EDGEPT * point2
Definition: split.h:104
#define MAX_NUM_POINTS
Definition: chop.h:39
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:224
Definition: rect.h:34
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:205
int chop_centered_maxwidth
Definition: wordrec.h:222
void remove_edgept(EDGEPT *point)
Definition: split.cpp:206
double chop_width_change_knob
Definition: wordrec.h:224
void set_data(Data *new_data)
Definition: kdpair.h:126
double chop_overlap_knob
Definition: wordrec.h:219
#define SPLIT_CLOSENESS
Definition: findseam.cpp:53
const Key & key() const
Definition: kdpair.h:116
void Print(const char *label) const
Definition: seam.cpp:160
int chop_min_outline_area
Definition: wordrec.h:217
#define edge_window_wait()
Definition: plotedges.h:61
#define BAD_PRIORITY
Definition: findseam.cpp:60
int16_t left() const
Definition: rect.h:72
int chop_min_outline_points
Definition: wordrec.h:213
bool empty() const
Definition: genericheap.h:68
float priority() const
Definition: seam.h:65
EDGEPT * point1
Definition: split.h:103
EDGEPT * loop
Definition: blobs.h:264
void set_key(const Key &new_key)
Definition: kdpair.h:119
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.cpp:46
#define NO_FULL_PRIORITY
Definition: findseam.cpp:58
const Pair & PeekTop() const
Definition: genericheap.h:108
Definition: blobs.h:83
void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:343
Definition: split.h:37
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
TBOX bounding_box() const
Definition: blobs.cpp:478
void Push(Pair *entry)
Definition: genericheap.h:95
double chop_good_split
Definition: wordrec.h:226
#define update_edge_window()
Definition: plotedges.h:49
#define partial_split_priority(split)
Definition: findseam.cpp:47
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:160
Data * extract_data()
Definition: kdpair.h:131
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:112
double chop_ok_split
Definition: wordrec.h:225
float PRIORITY
Definition: seam.h:42
#define MAX_NUM_SEAMS
Definition: findseam.cpp:55
ScrollView * edge_window
Definition: plotedges.cpp:40
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:272
int WeightedDistance(const EDGEPT &other, int x_factor) const
Definition: blobs.h:106
int16_t right() const
Definition: rect.h:79
Definition: blobs.h:268
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:73
Definition: blobs.h:57
TESSLINE * outlines
Definition: blobs.h:384
EDGEPT * next
Definition: blobs.h:176
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:305
#define is_exterior_point(edge, point)
Definition: outlines.h:98
void CombineWith(const SEAM &other)
Definition: seam.cpp:60
int chop_seam_pile_size
Definition: wordrec.h:214
bool Pop(Pair *entry)
Definition: genericheap.h:118
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:245
void draw_blob_edges(TBLOB *blob)
Definition: plotedges.cpp:74