tesseract  5.0.0-alpha-619-ge9db
findseam.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: findseam.cpp (Formerly findseam.c)
4  * Author: Mark Seaman, OCR Technology
5  *
6  * (c) Copyright 1987, Hewlett-Packard Company.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  *****************************************************************************/
18 /*----------------------------------------------------------------------
19  I n c l u d e s
20 ----------------------------------------------------------------------*/
21 #include "findseam.h"
22 #include "plotedges.h"
23 #include "outlines.h"
24 #include "seam.h"
25 #include "wordrec.h"
26 
27 // Include automatically generated configuration file if running autoconf.
28 #ifdef HAVE_CONFIG_H
29 #include "config_auto.h"
30 #endif
31 
32 /**********************************************************************
33  * partial_split_priority
34  *
35  * Assign a priority to this split based on the features that it has.
36  * Grade it according to the different rating schemes and return the
37  * value of its goodness.
38  **********************************************************************/
39 
40 #define partial_split_priority(split) \
41  (grade_split_length(split) + grade_sharpness(split))
42 
43 /*----------------------------------------------------------------------
44  T y p e s
45 ----------------------------------------------------------------------*/
46 #define SPLIT_CLOSENESS 20/* Difference in x value */
47  /* How many to keep */
48 #define MAX_NUM_SEAMS 150
49  /* How many to keep */
50 #define NO_FULL_PRIORITY -1/* Special marker for pri. */
51  /* Evaluate right away */
52 #define BAD_PRIORITY 9999.0
53 
54 /*----------------------------------------------------------------------
55  F u n c t i o n s
56 ----------------------------------------------------------------------*/
57 namespace tesseract {
58 
59 /**********************************************************************
60  * add_seam_to_queue
61  *
62  * Adds the given new_seam to the seams priority queue, unless it is full
63  * and the new seam is worse than the worst.
64  **********************************************************************/
65 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
66  SeamQueue* seams) {
67  if (new_seam == nullptr) return;
68  if (chop_debug) {
69  tprintf("Pushing new seam with priority %g :", new_priority);
70  new_seam->Print("seam: ");
71  }
72  if (seams->size() >= MAX_NUM_SEAMS) {
73  SeamPair old_pair(0, nullptr);
74  if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
75  if (chop_debug) {
76  tprintf("Old seam staying with priority %g\n", old_pair.key());
77  }
78  delete new_seam;
79  seams->Push(&old_pair);
80  return;
81  } else if (chop_debug) {
82  tprintf("New seam with priority %g beats old worst seam with %g\n",
83  new_priority, old_pair.key());
84  }
85  }
86  SeamPair new_pair(new_priority, new_seam);
87  seams->Push(&new_pair);
88 }
89 
90 
91 /**********************************************************************
92  * choose_best_seam
93  *
94  * Choose the best seam that can be created by assembling this a
95  * collection of splits. A queue of all the possible seams is
96  * maintained. Each new split received is placed in that queue with
97  * its partial priority value. These values in the seam queue are
98  * evaluated and combined until a good enough seam is found. If no
99  * further good seams are being found then this function returns to the
100  * caller, who will send more splits. If this function is called with
101  * a split of nullptr, then no further splits can be supplied by the
102  * caller.
103  **********************************************************************/
104 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
105  PRIORITY priority, SEAM **seam_result,
106  TBLOB *blob, SeamPile *seam_pile) {
107  SEAM *seam;
108  char str[80];
109  float my_priority;
110  /* Add seam of split */
111  my_priority = priority;
112  if (split != nullptr) {
113  TPOINT split_point = split->point1->pos;
114  split_point += split->point2->pos;
115  split_point /= 2;
116  seam = new SEAM(my_priority, split_point, *split);
117  if (chop_debug > 1) seam->Print("Partial priority ");
118  add_seam_to_queue(my_priority, seam, seam_queue);
119 
120  if (my_priority > chop_good_split)
121  return;
122  }
123 
124  TBOX bbox = blob->bounding_box();
125  /* Queue loop */
126  while (!seam_queue->empty()) {
127  SeamPair seam_pair;
128  seam_queue->Pop(&seam_pair);
129  seam = seam_pair.extract_data();
130  /* Set full priority */
131  my_priority = seam->FullPriority(bbox.left(), bbox.right(),
134  if (chop_debug) {
135  sprintf (str, "Full my_priority %0.0f, ", my_priority);
136  seam->Print(str);
137  }
138 
139  if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
140  my_priority < chop_ok_split) {
141  /* No crossing */
142  if (seam->IsHealthy(*blob, chop_min_outline_points,
144  delete *seam_result;
145  *seam_result = new SEAM(*seam);
146  (*seam_result)->set_priority(my_priority);
147  } else {
148  delete seam;
149  seam = nullptr;
150  my_priority = BAD_PRIORITY;
151  }
152  }
153 
154  if (my_priority < chop_good_split) {
155  delete seam;
156  return; /* Made good answer */
157  }
158 
159  if (seam) {
160  /* Combine with others */
161  if (seam_pile->size() < chop_seam_pile_size) {
162  combine_seam(*seam_pile, seam, seam_queue);
163  SeamDecPair pair(seam_pair.key(), seam);
164  seam_pile->Push(&pair);
165  } else if (chop_new_seam_pile &&
166  seam_pile->size() == chop_seam_pile_size &&
167  seam_pile->PeekTop().key() > seam_pair.key()) {
168  combine_seam(*seam_pile, seam, seam_queue);
169  SeamDecPair pair;
170  seam_pile->Pop(&pair); // pop the worst.
171  // Replace the seam in pair (deleting the old one) with
172  // the new seam and score, then push back into the heap.
173  pair.set_key(seam_pair.key());
174  pair.set_data(seam);
175  seam_pile->Push(&pair);
176  } else {
177  delete seam;
178  }
179  }
180 
181  my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
182  : seam_queue->PeekTop().key();
183  if ((my_priority > chop_ok_split) ||
184  (my_priority > chop_good_split && split))
185  return;
186  }
187 }
188 
189 
190 /**********************************************************************
191  * combine_seam
192  *
193  * Find other seams to combine with this one. The new seams that result
194  * from this union should be added to the seam queue. The return value
195  * tells whether or not any additional seams were added to the queue.
196  **********************************************************************/
197 void Wordrec::combine_seam(const SeamPile& seam_pile,
198  const SEAM* seam, SeamQueue* seam_queue) {
199  for (int x = 0; x < seam_pile.size(); ++x) {
200  const SEAM *this_one = seam_pile.get(x).data();
201  if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
202  SEAM *new_one = new SEAM(*seam);
203  new_one->CombineWith(*this_one);
204  if (chop_debug > 1) new_one->Print("Combo priority ");
205  add_seam_to_queue(new_one->priority(), new_one, seam_queue);
206  }
207  }
208 }
209 
210 /**********************************************************************
211  * pick_good_seam
212  *
213  * Find and return a good seam that will split this blob into two pieces.
214  * Work from the outlines provided.
215  **********************************************************************/
217  SeamPile seam_pile(chop_seam_pile_size);
218  EDGEPT *points[MAX_NUM_POINTS];
219  EDGEPT_CLIST new_points;
220  SEAM *seam = nullptr;
221  TESSLINE *outline;
222  int16_t num_points = 0;
223 
224 #ifndef GRAPHICS_DISABLED
225  if (chop_debug > 2)
226  wordrec_display_splits.set_value(true);
227 
228  draw_blob_edges(blob);
229 #endif
230 
231  PointHeap point_heap(MAX_NUM_POINTS);
232  for (outline = blob->outlines; outline; outline = outline->next)
233  prioritize_points(outline, &point_heap);
234 
235  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
236  points[num_points++] = point_heap.PeekTop().data;
237  point_heap.Pop(nullptr);
238  }
239 
240  /* Initialize queue */
241  SeamQueue seam_queue(MAX_NUM_SEAMS);
242 
243  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
244  try_vertical_splits(points, num_points, &new_points,
245  &seam_queue, &seam_pile, &seam, blob);
246 
247  if (seam == nullptr) {
248  choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
249  } else if (seam->priority() > chop_good_split) {
250  choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob,
251  &seam_pile);
252  }
253 
254  EDGEPT_C_IT it(&new_points);
255  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256  EDGEPT *inserted_point = it.data();
257  if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
258  for (outline = blob->outlines; outline; outline = outline->next) {
259  if (outline->loop == inserted_point) {
260  outline->loop = outline->loop->next;
261  }
262  }
263  remove_edgept(inserted_point);
264  }
265  }
266 
267  if (seam) {
268  if (seam->priority() > chop_ok_split) {
269  delete seam;
270  seam = nullptr;
271  }
272 #ifndef GRAPHICS_DISABLED
273  else if (wordrec_display_splits) {
274  seam->Mark(edge_window);
275  if (chop_debug > 2) {
278  }
279  }
280 #endif
281  }
282 
283  if (chop_debug)
284  wordrec_display_splits.set_value(false);
285 
286  return (seam);
287 }
288 
289 
290 /**********************************************************************
291  * try_point_pairs
292  *
293  * Try all the splits that are produced by pairing critical points
294  * together. See if any of them are suitable for use. Use a seam
295  * queue and seam pile that have already been initialized and used.
296  **********************************************************************/
298  int16_t num_points,
299  SeamQueue* seam_queue,
300  SeamPile* seam_pile,
301  SEAM ** seam,
302  TBLOB * blob) {
303  int16_t x;
304  int16_t y;
305  PRIORITY priority;
306 
307  for (x = 0; x < num_points; x++) {
308  for (y = x + 1; y < num_points; y++) {
309  if (points[y] &&
310  points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
312  points[x] != points[y]->next && points[y] != points[x]->next &&
313  !is_exterior_point(points[x], points[y]) &&
314  !is_exterior_point(points[y], points[x])) {
315  SPLIT split(points[x], points[y]);
316  priority = partial_split_priority(&split);
317 
318  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
319  }
320  }
321  }
322 }
323 
324 
325 /**********************************************************************
326  * try_vertical_splits
327  *
328  * Try all the splits that are produced by vertical projection to see
329  * if any of them are suitable for use. Use a seam queue and seam pile
330  * that have already been initialized and used.
331  * Return in new_points a collection of points that were inserted into
332  * the blob while examining vertical splits and which may safely be
333  * removed once a seam is chosen if they are not part of the seam.
334  **********************************************************************/
336  int16_t num_points,
337  EDGEPT_CLIST *new_points,
338  SeamQueue* seam_queue,
339  SeamPile* seam_pile,
340  SEAM ** seam,
341  TBLOB * blob) {
342  EDGEPT *vertical_point = nullptr;
343  int16_t x;
344  PRIORITY priority;
345  TESSLINE *outline;
346 
347  for (x = 0; x < num_points; x++) {
348  vertical_point = nullptr;
349  for (outline = blob->outlines; outline; outline = outline->next) {
350  vertical_projection_point(points[x], outline->loop,
351  &vertical_point, new_points);
352  }
353 
354  if (vertical_point && points[x] != vertical_point->next &&
355  vertical_point != points[x]->next &&
356  points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) <
358  SPLIT split(points[x], vertical_point);
359  priority = partial_split_priority(&split);
360  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
361  }
362  }
363 }
364 
365 }
tesseract::GenericHeap
Definition: genericheap.h:58
tesseract::Wordrec::chop_ok_split
double chop_ok_split
Definition: wordrec.h:221
SPLIT_CLOSENESS
#define SPLIT_CLOSENESS
Definition: findseam.cpp:44
tesseract::GenericHeap::Pop
bool Pop(Pair *entry)
Definition: genericheap.h:118
plotedges.h
MAX_NUM_POINTS
#define MAX_NUM_POINTS
Definition: chop.h:31
tesseract::Wordrec::chop_new_seam_pile
bool chop_new_seam_pile
Definition: wordrec.h:211
TPOINT
Definition: blobs.h:49
TESSLINE::loop
EDGEPT * loop
Definition: blobs.h:278
SPLIT::point2
EDGEPT * point2
Definition: split.h:101
tesseract::Wordrec::try_point_pairs
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:290
tesseract::GenericHeap::PeekTop
const Pair & PeekTop() const
Definition: genericheap.h:108
draw_blob_edges
void draw_blob_edges(TBLOB *blob)
Definition: plotedges.cpp:65
tesseract::Wordrec::chop_centered_maxwidth
int chop_centered_maxwidth
Definition: wordrec.h:218
tesseract::KDPtrPairDec
Definition: kdpair.h:162
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:398
tesseract::Wordrec::try_vertical_splits
void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:327
tesseract::KDPtrPair::extract_data
Data * extract_data()
Definition: kdpair.h:131
tesseract::Wordrec::chop_seam_pile_size
int chop_seam_pile_size
Definition: wordrec.h:210
tesseract::Wordrec::chop_width_change_knob
double chop_width_change_knob
Definition: wordrec.h:220
partial_split_priority
#define partial_split_priority(split)
Definition: findseam.cpp:38
wordrec.h
tesseract::KDPtrPair::set_data
void set_data(Data *new_data)
Definition: kdpair.h:126
tesseract::Wordrec::chop_debug
int chop_debug
Definition: wordrec.h:204
TESSLINE
Definition: blobs.h:201
tesseract::GenericHeap::PopWorst
bool PopWorst(Pair *entry)
Definition: genericheap.h:140
TESSLINE::next
TESSLINE * next
Definition: blobs.h:279
SEAM
Definition: seam.h:36
tesseract::Wordrec::add_seam_to_queue
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:62
tesseract::GenericHeap::size
int size() const
Definition: genericheap.h:71
remove_edgept
void remove_edgept(EDGEPT *point)
Definition: split.cpp:196
tesseract::Wordrec::vertical_projection_point
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:284
tesseract::Wordrec::chop_overlap_knob
double chop_overlap_knob
Definition: wordrec.h:215
is_exterior_point
#define is_exterior_point(edge, point)
Definition: outlines.h:86
tesseract::Wordrec::chop_center_knob
double chop_center_knob
Definition: wordrec.h:216
tesseract::Wordrec::pick_good_seam
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:210
tesseract::Wordrec::prioritize_points
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:172
SEAM::Print
void Print(const char *label) const
Definition: seam.cpp:152
tesseract
Definition: baseapi.h:65
tesseract::Wordrec::choose_best_seam
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:100
tesseract::KDPtrPairInc
Definition: kdpair.h:145
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
wordrec_display_splits
bool wordrec_display_splits
Definition: split.cpp:39
EDGEPT::WeightedDistance
int WeightedDistance(const EDGEPT &other, int x_factor) const
Definition: blobs.h:120
MAX_NUM_SEAMS
#define MAX_NUM_SEAMS
Definition: findseam.cpp:46
tesseract::GenericHeap::Push
void Push(Pair *entry)
Definition: genericheap.h:95
NO_FULL_PRIORITY
#define NO_FULL_PRIORITY
Definition: findseam.cpp:48
SEAM::UsesPoint
bool UsesPoint(const EDGEPT *point) const
Definition: seam.h:80
SEAM::Mark
void Mark(ScrollView *window) const
Definition: seam.cpp:178
TBLOB
Definition: blobs.h:282
findseam.h
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::KDPtrPair::set_key
void set_key(const Key &new_key)
Definition: kdpair.h:119
edge_window_wait
#define edge_window_wait()
Definition: plotedges.h:52
SPLIT
Definition: split.h:34
BAD_PRIORITY
#define BAD_PRIORITY
Definition: findseam.cpp:50
SEAM::CombineableWith
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.cpp:38
TBOX::right
int16_t right() const
Definition: rect.h:78
SEAM::IsHealthy
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:64
update_edge_window
#define update_edge_window()
Definition: plotedges.h:41
EDGEPT
Definition: blobs.h:97
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
SEAM::priority
float priority() const
Definition: seam.h:57
seam.h
tesseract::Wordrec::chop_min_outline_points
int chop_min_outline_points
Definition: wordrec.h:209
tesseract::Wordrec::chop_min_outline_area
int chop_min_outline_area
Definition: wordrec.h:213
SEAM::CombineWith
void CombineWith(const SEAM &other)
Definition: seam.cpp:52
tesseract::Wordrec::chop_x_y_weight
int chop_x_y_weight
Definition: wordrec.h:223
outlines.h
tesseract::GenericHeap::get
const Pair & get(int index) const
Definition: genericheap.h:87
tesseract::Wordrec::chop_split_length
int chop_split_length
Definition: wordrec.h:207
tesseract::GenericHeap::empty
bool empty() const
Definition: genericheap.h:68
tesseract::Wordrec::combine_seam
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:192
PRIORITY
float PRIORITY
Definition: seam.h:34
SPLIT::point1
EDGEPT * point1
Definition: split.h:100
edge_window
ScrollView * edge_window
Definition: plotedges.cpp:33
EDGEPT::pos
TPOINT pos
Definition: blobs.h:184
EDGEPT::next
EDGEPT * next
Definition: blobs.h:190
tesseract::Wordrec::chop_good_split
double chop_good_split
Definition: wordrec.h:222
TBOX
Definition: rect.h:33
tesseract::KDPtrPair::key
const Key & key() const
Definition: kdpair.h:116
SEAM::FullPriority
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:237