tesseract  5.0.0-alpha-619-ge9db
seam.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: seam.cpp (Formerly seam.c)
4  * Author: Mark Seaman, OCR Technology
5  *
6  * (c) Copyright 1987, Hewlett-Packard Company.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  *****************************************************************************/
18 /*----------------------------------------------------------------------
19  I n c l u d e s
20 ----------------------------------------------------------------------*/
21 #include "seam.h"
22 #include "blobs.h"
23 #include "tprintf.h"
24 
25 /*----------------------------------------------------------------------
26  Public Function Code
27 ----------------------------------------------------------------------*/
28 
29 // Returns the bounding box of all the points in the seam.
30 TBOX SEAM::bounding_box() const {
31  TBOX box(location_.x, location_.y, location_.x, location_.y);
32  for (int s = 0; s < num_splits_; ++s) {
33  box += splits_[s].bounding_box();
34  }
35  return box;
36 }
37 
38 // Returns true if other can be combined into *this.
39 bool SEAM::CombineableWith(const SEAM& other, int max_x_dist,
40  float max_total_priority) const {
41  int dist = location_.x - other.location_.x;
42  if (-max_x_dist < dist && dist < max_x_dist &&
43  num_splits_ + other.num_splits_ <= kMaxNumSplits &&
44  priority_ + other.priority_ < max_total_priority &&
45  !OverlappingSplits(other) && !SharesPosition(other)) {
46  return true;
47  } else {
48  return false;
49  }
50 }
51 
52 // Combines other into *this. Only works if CombinableWith returned true.
53 void SEAM::CombineWith(const SEAM& other) {
54  priority_ += other.priority_;
55  location_ += other.location_;
56  location_ /= 2;
57 
58  for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
59  splits_[num_splits_++] = other.splits_[s];
60 }
61 
62 // Returns true if the splits in *this SEAM appear OK in the sense that they
63 // do not cross any outlines and do not chop off any ridiculously small
64 // pieces.
65 bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
66  // TODO(rays) Try testing all the splits. Duplicating original code for now,
67  // which tested only the first.
68  return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
69 }
70 
71 // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
72 // seam, which is about to be inserted at insert_index. Returns false if
73 // any of the computations fails, as this indicates an invalid chop.
74 // widthn_/widthp_ are only changed if modify is true.
76  const GenericVector<TBLOB*>& blobs,
77  int insert_index, bool modify) {
78  for (int s = 0; s < insert_index; ++s) {
79  if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false;
80  }
81  if (!FindBlobWidth(blobs, insert_index, modify)) return false;
82  for (int s = insert_index; s < seams.size(); ++s) {
83  if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false;
84  }
85  return true;
86 }
87 
88 // Computes the widthp_/widthn_ range. Returns false if not all the splits
89 // are accounted for. widthn_/widthp_ are only changed if modify is true.
90 bool SEAM::FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
91  bool modify) {
92  int num_found = 0;
93  if (modify) {
94  widthp_ = 0;
95  widthn_ = 0;
96  }
97  for (int s = 0; s < num_splits_; ++s) {
98  const SPLIT& split = splits_[s];
99  bool found_split = split.ContainedByBlob(*blobs[index]);
100  // Look right.
101  for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
102  found_split = split.ContainedByBlob(*blobs[b]);
103  if (found_split && b - index > widthp_ && modify) widthp_ = b - index;
104  }
105  // Look left.
106  for (int b = index - 1; !found_split && b >= 0; --b) {
107  found_split = split.ContainedByBlob(*blobs[b]);
108  if (found_split && index - b > widthn_ && modify) widthn_ = index - b;
109  }
110  if (found_split) ++num_found;
111  }
112  return num_found == num_splits_;
113 }
114 
115 // Splits this blob into two blobs by applying the splits included in
116 // *this SEAM
117 void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const {
118  for (int s = 0; s < num_splits_; ++s) {
119  splits_[s].SplitOutlineList(blob->outlines);
120  }
121  blob->ComputeBoundingBoxes();
122 
123  divide_blobs(blob, other_blob, italic_blob, location_);
124 
126  other_blob->EliminateDuplicateOutlines();
127 
128  blob->CorrectBlobOrder(other_blob);
129 }
130 
131 // Undoes ApplySeam by removing the seam between these two blobs.
132 // Produces one blob as a result, and deletes other_blob.
133 void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const {
134  if (blob->outlines == nullptr) {
135  blob->outlines = other_blob->outlines;
136  other_blob->outlines = nullptr;
137  }
138 
139  TESSLINE* outline = blob->outlines;
140  while (outline->next) outline = outline->next;
141  outline->next = other_blob->outlines;
142  other_blob->outlines = nullptr;
143  delete other_blob;
144 
145  for (int s = 0; s < num_splits_; ++s) {
146  splits_[s].UnsplitOutlineList(blob);
147  }
148  blob->ComputeBoundingBoxes();
150 }
151 
152 // Prints everything in *this SEAM.
153 void SEAM::Print(const char* label) const {
154  tprintf(label);
155  tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y,
156  widthp_, widthn_);
157  for (int s = 0; s < num_splits_; ++s) {
158  splits_[s].Print();
159  if (s + 1 < num_splits_) tprintf(", ");
160  }
161  tprintf("\n");
162 }
163 
164 // Prints a collection of SEAMs.
165 /* static */
166 void SEAM::PrintSeams(const char* label, const GenericVector<SEAM*>& seams) {
167  if (!seams.empty()) {
168  tprintf("%s\n", label);
169  for (int x = 0; x < seams.size(); ++x) {
170  tprintf("%2d: ", x);
171  seams[x]->Print("");
172  }
173  tprintf("\n");
174  }
175 }
176 
177 #ifndef GRAPHICS_DISABLED
178 // Draws the seam in the given window.
179 void SEAM::Mark(ScrollView* window) const {
180  for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window);
181 }
182 #endif
183 
184 // Break up the blobs in this chain so that they are all independent.
185 // This operation should undo the affect of join_pieces.
186 /* static */
187 void SEAM::BreakPieces(const GenericVector<SEAM*>& seams,
188  const GenericVector<TBLOB*>& blobs, int first,
189  int last) {
190  for (int x = first; x < last; ++x) seams[x]->Reveal();
191 
192  TESSLINE* outline = blobs[first]->outlines;
193  int next_blob = first + 1;
194 
195  while (outline != nullptr && next_blob <= last) {
196  if (outline->next == blobs[next_blob]->outlines) {
197  outline->next = nullptr;
198  outline = blobs[next_blob]->outlines;
199  ++next_blob;
200  } else {
201  outline = outline->next;
202  }
203  }
204 }
205 
206 // Join a group of base level pieces into a single blob that can then
207 // be classified.
208 /* static */
209 void SEAM::JoinPieces(const GenericVector<SEAM*>& seams,
210  const GenericVector<TBLOB*>& blobs, int first, int last) {
211  TESSLINE* outline = blobs[first]->outlines;
212  if (!outline)
213  return;
214 
215  for (int x = first; x < last; ++x) {
216  SEAM *seam = seams[x];
217  if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide();
218  while (outline->next) outline = outline->next;
219  outline->next = blobs[x + 1]->outlines;
220  }
221 }
222 
223 // Hides the seam so the outlines appear not to be cut by it.
224 void SEAM::Hide() const {
225  for (int s = 0; s < num_splits_; ++s) {
226  splits_[s].Hide();
227  }
228 }
229 
230 // Undoes hide, so the outlines are cut by the seam.
231 void SEAM::Reveal() const {
232  for (int s = 0; s < num_splits_; ++s) {
233  splits_[s].Reveal();
234  }
235 }
236 
237 // Computes and returns, but does not set, the full priority of *this SEAM.
238 float SEAM::FullPriority(int xmin, int xmax, double overlap_knob,
239  int centered_maxwidth, double center_knob,
240  double width_change_knob) const {
241  if (num_splits_ == 0) return 0.0f;
242  for (int s = 1; s < num_splits_; ++s) {
243  splits_[s].SplitOutline();
244  }
245  float full_priority =
246  priority_ +
247  splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth,
248  center_knob, width_change_knob);
249  for (int s = num_splits_ - 1; s >= 1; --s) {
250  splits_[s].UnsplitOutlines();
251  }
252  return full_priority;
253 }
254 
262 void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array) {
263  seam_array->truncate(0);
264  TPOINT location;
265 
266  for (int b = 1; b < word->NumBlobs(); ++b) {
267  TBOX bbox = word->blobs[b - 1]->bounding_box();
268  TBOX nbox = word->blobs[b]->bounding_box();
269  location.x = (bbox.right() + nbox.left()) / 2;
270  location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
271  seam_array->push_back(new SEAM(0.0f, location));
272  }
273 }
SEAM::OverlappingSplits
bool OverlappingSplits(const SEAM &other) const
Definition: seam.h:95
ScrollView
Definition: scrollview.h:97
TBLOB::ComputeBoundingBoxes
void ComputeBoundingBoxes()
Definition: blobs.cpp:445
SPLIT::IsHealthy
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:113
SPLIT::SplitOutlineList
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:230
SEAM::ApplySeam
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:116
TPOINT
Definition: blobs.h:49
TWERD
Definition: blobs.h:416
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:398
SPLIT::Print
void Print() const
Definition: split.cpp:214
TESSLINE
Definition: blobs.h:201
TBOX::top
int16_t top() const
Definition: rect.h:57
TESSLINE::next
TESSLINE * next
Definition: blobs.h:279
blobs.h
SEAM
Definition: seam.h:36
SPLIT::ContainedByBlob
bool ContainedByBlob(const TBLOB &blob) const
Definition: split.h:62
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
SPLIT::Hide
void Hide() const
Definition: split.cpp:49
last
LIST last(LIST var_list)
Definition: oldlist.cpp:151
SEAM::UndoSeam
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:132
TPOINT::x
int16_t x
Definition: blobs.h:91
SEAM::PrepareToInsertSeam
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:74
SPLIT::UnsplitOutlines
void UnsplitOutlines() const
Definition: split.cpp:290
TPOINT::y
int16_t y
Definition: blobs.h:92
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:457
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
start_seam_list
void start_seam_list(TWERD *word, GenericVector< SEAM * > *seam_array)
Definition: seam.cpp:261
SEAM::BreakPieces
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:186
SEAM::Print
void Print(const char *label) const
Definition: seam.cpp:152
SEAM::Reveal
void Reveal() const
Definition: seam.cpp:230
SPLIT::FullPriority
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:79
SEAM::bounding_box
TBOX bounding_box() const
Definition: seam.cpp:29
tprintf.h
GenericVector< SEAM * >
divide_blobs
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location)
Definition: blobs.cpp:958
SPLIT::UnsplitOutlineList
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:274
TBLOB::EliminateDuplicateOutlines
void EliminateDuplicateOutlines()
Definition: blobs.cpp:478
TBLOB::CorrectBlobOrder
void CorrectBlobOrder(TBLOB *next)
Definition: blobs.cpp:499
SEAM::Mark
void Mark(ScrollView *window) const
Definition: seam.cpp:178
TBLOB
Definition: blobs.h:282
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:132
TBOX::left
int16_t left() const
Definition: rect.h:71
SPLIT
Definition: split.h:34
SPLIT::SplitOutline
void SplitOutline() const
Definition: split.cpp:249
SEAM::CombineableWith
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.cpp:38
TBOX::right
int16_t right() const
Definition: rect.h:78
SPLIT::bounding_box
TBOX bounding_box() const
Definition: split.cpp:42
SEAM::IsHealthy
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:64
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
seam.h
SEAM::JoinPieces
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:208
SEAM::SharesPosition
bool SharesPosition(const SEAM &other) const
Definition: seam.h:87
SEAM::CombineWith
void CombineWith(const SEAM &other)
Definition: seam.cpp:52
SEAM::Hide
void Hide() const
Definition: seam.cpp:223
GenericVector::size
int size() const
Definition: genericvector.h:71
SEAM::FindBlobWidth
bool FindBlobWidth(const GenericVector< TBLOB * > &blobs, int index, bool modify)
Definition: seam.cpp:89
SEAM::PrintSeams
static void PrintSeams(const char *label, const GenericVector< SEAM * > &seams)
Definition: seam.cpp:165
SPLIT::Reveal
void Reveal() const
Definition: split.cpp:63
TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:446
TBOX
Definition: rect.h:33
SEAM::FullPriority
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:237