tesseract  5.0.0-alpha-619-ge9db
blobs.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: blobs.cpp (Formerly blobs.c)
4  * Description: Blob definition
5  * Author: Mark Seaman, OCR Technology
6  *
7  * (c) Copyright 1989, Hewlett-Packard Company.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  *****************************************************************************/
19 
20 /*----------------------------------------------------------------------
21  I n c l u d e s
22 ----------------------------------------------------------------------*/
23 // Include automatically generated configuration file if running autoconf.
24 #ifdef HAVE_CONFIG_H
25 #include "config_auto.h"
26 #endif
27 
28 #include "blobs.h"
29 #include "ccstruct.h"
30 #include "clst.h"
31 #include <tesseract/helpers.h>
32 #include "linlsq.h"
33 #include "normalis.h"
34 #include "ocrblock.h"
35 #include "ocrrow.h"
36 #include "points.h"
37 #include "polyaprx.h"
38 #include "werd.h"
39 
40 #include <algorithm>
41 
43 
44 // A Vector representing the "vertical" direction when measuring the
45 // divisiblity of blobs into multiple blobs just by separating outlines.
46 // See divisible_blob below for the use.
48 // A vector representing the "vertical" direction for italic text for use
49 // when separating outlines. Using it actually deteriorates final accuracy,
50 // so it is only used for ApplyBoxes chopping to get a better segmentation.
52 
53 /*----------------------------------------------------------------------
54  F u n c t i o n s
55 ----------------------------------------------------------------------*/
56 
58 
59 // Returns true when the two line segments cross each other.
60 // (Moved from outlines.cpp).
61 // Finds where the projected lines would cross and then checks to see if the
62 // point of intersection lies on both of the line segments. If it does
63 // then these two segments cross.
64 /* static */
65 bool TPOINT::IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
66  const TPOINT& b1) {
67  TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;
68 
69  b0a1.x = a1.x - b0.x;
70  b0a0.x = a0.x - b0.x;
71  a1b1.x = b1.x - a1.x;
72  b0b1.x = b1.x - b0.x;
73  a1a0.x = a0.x - a1.x;
74  b0a1.y = a1.y - b0.y;
75  b0a0.y = a0.y - b0.y;
76  a1b1.y = b1.y - a1.y;
77  b0b1.y = b1.y - b0.y;
78  a1a0.y = a0.y - a1.y;
79 
80  int b0a1xb0b1 = b0a1.cross(b0b1);
81  int b0b1xb0a0 = b0b1.cross(b0a0);
82  int a1b1xa1a0 = a1b1.cross(a1a0);
83  // For clarity, we want a1a0.cross(a1b0) here but we have b0a1 instead of a1b0
84  // so use -a1b0.cross(b0a1) instead, which is the same.
85  int a1a0xa1b0 = -a1a0.cross(b0a1);
86 
87  return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) ||
88  (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) &&
89  ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
90 }
91 
92 // Consume the circular list of EDGEPTs to make a TESSLINE.
94  auto* result = new TESSLINE;
95  result->loop = outline;
96  if (outline->src_outline != nullptr) {
97  // ASSUMPTION: This function is only ever called from ApproximateOutline
98  // and therefore either all points have a src_outline or all do not.
99  // Just as SetupFromPos sets the vectors from the vertices, setup the
100  // step_count members to indicate the (positive) number of original
101  // C_OUTLINE steps to the next vertex.
102  EDGEPT* pt = outline;
103  do {
104  pt->step_count = pt->next->start_step - pt->start_step;
105  if (pt->step_count < 0) pt->step_count += pt->src_outline->pathlength();
106  pt = pt->next;
107  } while (pt != outline);
108  }
109  result->SetupFromPos();
110  return result;
111 }
112 
113 // Copies the data and the outline, but leaves next untouched.
114 void TESSLINE::CopyFrom(const TESSLINE& src) {
115  Clear();
116  topleft = src.topleft;
117  botright = src.botright;
118  start = src.start;
119  is_hole = src.is_hole;
120  if (src.loop != nullptr) {
121  EDGEPT* prevpt = nullptr;
122  EDGEPT* newpt = nullptr;
123  EDGEPT* srcpt = src.loop;
124  do {
125  newpt = new EDGEPT(*srcpt);
126  if (prevpt == nullptr) {
127  loop = newpt;
128  } else {
129  newpt->prev = prevpt;
130  prevpt->next = newpt;
131  }
132  prevpt = newpt;
133  srcpt = srcpt->next;
134  } while (srcpt != src.loop);
135  loop->prev = newpt;
136  newpt->next = loop;
137  }
138 }
139 
140 // Deletes owned data.
142  if (loop == nullptr) return;
143 
144  EDGEPT* this_edge = loop;
145  do {
146  EDGEPT* next_edge = this_edge->next;
147  delete this_edge;
148  this_edge = next_edge;
149  } while (this_edge != loop);
150  loop = nullptr;
151 }
152 
153 // Normalize in-place using the DENORM.
154 void TESSLINE::Normalize(const DENORM& denorm) {
155  EDGEPT* pt = loop;
156  do {
157  denorm.LocalNormTransform(pt->pos, &pt->pos);
158  pt = pt->next;
159  } while (pt != loop);
160  SetupFromPos();
161 }
162 
163 // Rotates by the given rotation in place.
164 void TESSLINE::Rotate(const FCOORD rot) {
165  EDGEPT* pt = loop;
166  do {
167  int tmp = static_cast<int>(
168  floor(pt->pos.x * rot.x() - pt->pos.y * rot.y() + 0.5));
169  pt->pos.y = static_cast<int>(
170  floor(pt->pos.y * rot.x() + pt->pos.x * rot.y() + 0.5));
171  pt->pos.x = tmp;
172  pt = pt->next;
173  } while (pt != loop);
174  SetupFromPos();
175 }
176 
177 // Moves by the given vec in place.
178 void TESSLINE::Move(const ICOORD vec) {
179  EDGEPT* pt = loop;
180  do {
181  pt->pos.x += vec.x();
182  pt->pos.y += vec.y();
183  pt = pt->next;
184  } while (pt != loop);
185  SetupFromPos();
186 }
187 
188 // Scales by the given factor in place.
189 void TESSLINE::Scale(float factor) {
190  EDGEPT* pt = loop;
191  do {
192  pt->pos.x = static_cast<int>(floor(pt->pos.x * factor + 0.5));
193  pt->pos.y = static_cast<int>(floor(pt->pos.y * factor + 0.5));
194  pt = pt->next;
195  } while (pt != loop);
196  SetupFromPos();
197 }
198 
199 // Sets up the start and vec members of the loop from the pos members.
201  EDGEPT* pt = loop;
202  do {
203  pt->vec.x = pt->next->pos.x - pt->pos.x;
204  pt->vec.y = pt->next->pos.y - pt->pos.y;
205  pt = pt->next;
206  } while (pt != loop);
207  start = pt->pos;
209 }
210 
211 // Recomputes the bounding box from the points in the loop.
213  int minx = INT32_MAX;
214  int miny = INT32_MAX;
215  int maxx = -INT32_MAX;
216  int maxy = -INT32_MAX;
217 
218  // Find boundaries.
219  start = loop->pos;
220  EDGEPT* this_edge = loop;
221  do {
222  if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) {
223  if (this_edge->pos.x < minx) minx = this_edge->pos.x;
224  if (this_edge->pos.y < miny) miny = this_edge->pos.y;
225  if (this_edge->pos.x > maxx) maxx = this_edge->pos.x;
226  if (this_edge->pos.y > maxy) maxy = this_edge->pos.y;
227  }
228  this_edge = this_edge->next;
229  } while (this_edge != loop);
230  // Reset bounds.
231  topleft.x = minx;
232  topleft.y = maxy;
233  botright.x = maxx;
234  botright.y = miny;
235 }
236 
237 // Computes the min and max cross product of the outline points with the
238 // given vec and returns the results in min_xp and max_xp. Geometrically
239 // this is the left and right edge of the outline perpendicular to the
240 // given direction, but to get the distance units correct, you would
241 // have to divide by the modulus of vec.
242 void TESSLINE::MinMaxCrossProduct(const TPOINT vec, int* min_xp,
243  int* max_xp) const {
244  *min_xp = INT32_MAX;
245  *max_xp = INT32_MIN;
246  EDGEPT* this_edge = loop;
247  do {
248  if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) {
249  int product = this_edge->pos.cross(vec);
250  UpdateRange(product, min_xp, max_xp);
251  }
252  this_edge = this_edge->next;
253  } while (this_edge != loop);
254 }
255 
257  return TBOX(topleft.x, botright.y, botright.x, topleft.y);
258 }
259 
260 #ifndef GRAPHICS_DISABLED
262  ScrollView::Color child_color) {
263  if (is_hole)
264  window->Pen(child_color);
265  else
266  window->Pen(color);
267  window->SetCursor(start.x, start.y);
268  EDGEPT* pt = loop;
269  do {
270  bool prev_hidden = pt->IsHidden();
271  pt = pt->next;
272  if (prev_hidden)
273  window->SetCursor(pt->pos.x, pt->pos.y);
274  else
275  window->DrawTo(pt->pos.x, pt->pos.y);
276  } while (pt != loop);
277 }
278 #endif // GRAPHICS_DISABLED
279 
280 // Returns the first non-hidden EDGEPT that has a different src_outline to
281 // its predecessor, or, if all the same, the lowest indexed point.
283  EDGEPT* best_start = loop;
284  int best_step = loop->start_step;
285  // Iterate the polygon.
286  EDGEPT* pt = loop;
287  do {
288  if (pt->IsHidden()) continue;
289  if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline)
290  return pt; // Qualifies as the best.
291  if (pt->start_step < best_step) {
292  best_step = pt->start_step;
293  best_start = pt;
294  }
295  } while ((pt = pt->next) != loop);
296  return best_start;
297 }
298 
299 // Iterate the given list of outlines, converting to TESSLINE by polygonal
300 // approximation and recursively any children, returning the current tail
301 // of the resulting list of TESSLINEs.
302 static TESSLINE** ApproximateOutlineList(bool allow_detailed_fx,
303  C_OUTLINE_LIST* outlines,
304  bool children, TESSLINE** tail) {
305  C_OUTLINE_IT ol_it(outlines);
306  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
307  C_OUTLINE* outline = ol_it.data();
308  if (outline->pathlength() > 0) {
309  TESSLINE* tessline = ApproximateOutline(allow_detailed_fx, outline);
310  tessline->is_hole = children;
311  *tail = tessline;
312  tail = &tessline->next;
313  }
314  if (!outline->child()->empty()) {
315  tail = ApproximateOutlineList(allow_detailed_fx, outline->child(), true,
316  tail);
317  }
318  }
319  return tail;
320 }
321 
322 // Factory to build a TBLOB from a C_BLOB with polygonal approximation along
323 // the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
324 // contain pointers to the input C_OUTLINEs that enable higher-resolution
325 // feature extraction that does not use the polygonal approximation.
326 TBLOB* TBLOB::PolygonalCopy(bool allow_detailed_fx, C_BLOB* src) {
327  auto* tblob = new TBLOB;
328  ApproximateOutlineList(allow_detailed_fx, src->out_list(), false,
329  &tblob->outlines);
330  return tblob;
331 }
332 
333 // Factory builds a blob with no outlines, but copies the other member data.
335  auto* blob = new TBLOB;
336  blob->denorm_ = src.denorm_;
337  return blob;
338 }
339 
340 // Normalizes the blob for classification only if needed.
341 // (Normally this means a non-zero classify rotation.)
342 // If no Normalization is needed, then nullptr is returned, and the input blob
343 // can be used directly. Otherwise a new TBLOB is returned which must be
344 // deleted after use.
346  TBLOB* rotated_blob = nullptr;
347  // If necessary, copy the blob and rotate it. The rotation is always
348  // +/- 90 degrees, as 180 was already taken care of.
349  if (denorm_.block() != nullptr &&
350  denorm_.block()->classify_rotation().y() != 0.0) {
351  TBOX box = bounding_box();
352  int x_middle = (box.left() + box.right()) / 2;
353  int y_middle = (box.top() + box.bottom()) / 2;
354  rotated_blob = new TBLOB(*this);
355  const FCOORD& rotation = denorm_.block()->classify_rotation();
356  // Move the rotated blob back to the same y-position so that we
357  // can still distinguish similar glyphs with differeny y-position.
358  float target_y =
360  (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle);
361  rotated_blob->Normalize(nullptr, &rotation, &denorm_, x_middle, y_middle,
362  1.0f, 1.0f, 0.0f, target_y, denorm_.inverse(),
363  denorm_.pix());
364  }
365  return rotated_blob;
366 }
367 
368 // Copies the data and the outline, but leaves next untouched.
369 void TBLOB::CopyFrom(const TBLOB& src) {
370  Clear();
371  TESSLINE* prev_outline = nullptr;
372  for (TESSLINE* srcline = src.outlines; srcline != nullptr;
373  srcline = srcline->next) {
374  auto* new_outline = new TESSLINE(*srcline);
375  if (outlines == nullptr)
376  outlines = new_outline;
377  else
378  prev_outline->next = new_outline;
379  prev_outline = new_outline;
380  }
381  denorm_ = src.denorm_;
382 }
383 
384 // Deletes owned data.
385 void TBLOB::Clear() {
386  for (TESSLINE* next_outline = nullptr; outlines != nullptr;
387  outlines = next_outline) {
388  next_outline = outlines->next;
389  delete outlines;
390  }
391 }
392 
393 // Sets up the built-in DENORM and normalizes the blob in-place.
394 // For parameters see DENORM::SetupNormalization, plus the inverse flag for
395 // this blob and the Pix for the full image.
396 void TBLOB::Normalize(const BLOCK* block, const FCOORD* rotation,
397  const DENORM* predecessor, float x_origin, float y_origin,
398  float x_scale, float y_scale, float final_xshift,
399  float final_yshift, bool inverse, Pix* pix) {
400  denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin,
401  x_scale, y_scale, final_xshift, final_yshift);
402  denorm_.set_inverse(inverse);
403  denorm_.set_pix(pix);
404  // TODO(rays) outline->Normalize is more accurate, but breaks tests due
405  // the changes it makes. Reinstate this code with a retraining.
406  // The reason this change is troublesome is that it normalizes for the
407  // baseline value computed independently at each x-coord. If the baseline
408  // is not horizontal, this introduces shear into the normalized blob, which
409  // is useful on the rare occasions that the baseline is really curved, but
410  // the baselines need to be stabilized the rest of the time.
411 #if 0
412  for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) {
413  outline->Normalize(denorm_);
414  }
415 #else
416  denorm_.LocalNormBlob(this);
417 #endif
418 }
419 
420 // Rotates by the given rotation in place.
421 void TBLOB::Rotate(const FCOORD rotation) {
422  for (TESSLINE* outline = outlines; outline != nullptr;
423  outline = outline->next) {
424  outline->Rotate(rotation);
425  }
426 }
427 
428 // Moves by the given vec in place.
429 void TBLOB::Move(const ICOORD vec) {
430  for (TESSLINE* outline = outlines; outline != nullptr;
431  outline = outline->next) {
432  outline->Move(vec);
433  }
434 }
435 
436 // Scales by the given factor in place.
437 void TBLOB::Scale(float factor) {
438  for (TESSLINE* outline = outlines; outline != nullptr;
439  outline = outline->next) {
440  outline->Scale(factor);
441  }
442 }
443 
444 // Recomputes the bounding boxes of the outlines.
446  for (TESSLINE* outline = outlines; outline != nullptr;
447  outline = outline->next) {
448  outline->ComputeBoundingBox();
449  }
450 }
451 
452 // Returns the number of outlines.
453 int TBLOB::NumOutlines() const {
454  int result = 0;
455  for (TESSLINE* outline = outlines; outline != nullptr;
456  outline = outline->next)
457  ++result;
458  return result;
459 }
460 
461 /**********************************************************************
462  * TBLOB::bounding_box()
463  *
464  * Compute the bounding_box of a compound blob, defined to be the
465  * bounding box of the union of all top-level outlines in the blob.
466  **********************************************************************/
467 TBOX TBLOB::bounding_box() const {
468  if (outlines == nullptr) return TBOX(0, 0, 0, 0);
469  TESSLINE* outline = outlines;
470  TBOX box = outline->bounding_box();
471  for (outline = outline->next; outline != nullptr; outline = outline->next) {
472  box += outline->bounding_box();
473  }
474  return box;
475 }
476 
477 // Finds and deletes any duplicate outlines in this blob, without deleting
478 // their EDGEPTs.
480  for (TESSLINE* outline = outlines; outline != nullptr;
481  outline = outline->next) {
482  TESSLINE* last_outline = outline;
483  for (TESSLINE* other_outline = outline->next; other_outline != nullptr;
484  last_outline = other_outline, other_outline = other_outline->next) {
485  if (outline->SameBox(*other_outline)) {
486  last_outline->next = other_outline->next;
487  // This doesn't leak - the outlines share the EDGEPTs.
488  other_outline->loop = nullptr;
489  delete other_outline;
490  other_outline = last_outline;
491  // If it is part of a cut, then it can't be a hole any more.
492  outline->is_hole = false;
493  }
494  }
495  }
496 }
497 
498 // Swaps the outlines of *this and next if needed to keep the centers in
499 // increasing x.
500 void TBLOB::CorrectBlobOrder(TBLOB* next) {
501  TBOX box = bounding_box();
502  TBOX next_box = next->bounding_box();
503  if (box.x_middle() > next_box.x_middle()) {
504  Swap(&outlines, &next->outlines);
505  }
506 }
507 
508 #ifndef GRAPHICS_DISABLED
509 void TBLOB::plot(ScrollView* window, ScrollView::Color color,
510  ScrollView::Color child_color) {
511  for (TESSLINE* outline = outlines; outline != nullptr;
512  outline = outline->next)
513  outline->plot(window, color, child_color);
514 }
515 #endif // GRAPHICS_DISABLED
516 
517 // Computes the center of mass and second moments for the old baseline and
518 // 2nd moment normalizations. Returns the outline length.
519 // The input denorm should be the normalizations that have been applied from
520 // the image to the current state of this TBLOB.
521 int TBLOB::ComputeMoments(FCOORD* center, FCOORD* second_moments) const {
522  // Compute 1st and 2nd moments of the original outline.
523  LLSQ accumulator;
524  TBOX box = bounding_box();
525  // Iterate the outlines, accumulating edges relative the box.botleft().
526  CollectEdges(box, nullptr, &accumulator, nullptr, nullptr);
527  *center = accumulator.mean_point() + box.botleft();
528  // The 2nd moments are just the standard deviation of the point positions.
529  double x2nd = sqrt(accumulator.x_variance());
530  double y2nd = sqrt(accumulator.y_variance());
531  if (x2nd < 1.0) x2nd = 1.0;
532  if (y2nd < 1.0) y2nd = 1.0;
533  second_moments->set_x(x2nd);
534  second_moments->set_y(y2nd);
535  return accumulator.count();
536 }
537 
538 // Computes the precise bounding box of the coords that are generated by
539 // GetEdgeCoords. This may be different from the bounding box of the polygon.
540 void TBLOB::GetPreciseBoundingBox(TBOX* precise_box) const {
541  TBOX box = bounding_box();
542  *precise_box = TBOX();
543  CollectEdges(box, precise_box, nullptr, nullptr, nullptr);
544  precise_box->move(box.botleft());
545 }
546 
547 // Adds edges to the given vectors.
548 // For all the edge steps in all the outlines, or polygonal approximation
549 // where there are no edge steps, collects the steps into x_coords/y_coords.
550 // x_coords is a collection of the x-coords of vertical edges for each
551 // y-coord starting at box.bottom().
552 // y_coords is a collection of the y-coords of horizontal edges for each
553 // x-coord starting at box.left().
554 // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
555 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
556 void TBLOB::GetEdgeCoords(const TBOX& box,
557  GenericVector<GenericVector<int> >* x_coords,
558  GenericVector<GenericVector<int> >* y_coords) const {
559  GenericVector<int> empty;
560  x_coords->init_to_size(box.height(), empty);
561  y_coords->init_to_size(box.width(), empty);
562  CollectEdges(box, nullptr, nullptr, x_coords, y_coords);
563  // Sort the output vectors.
564  for (int i = 0; i < x_coords->size(); ++i) (*x_coords)[i].sort();
565  for (int i = 0; i < y_coords->size(); ++i) (*y_coords)[i].sort();
566 }
567 
568 // Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over
569 // the integer coordinate grid to properly weight long vectors.
570 static void SegmentLLSQ(const FCOORD& pt1, const FCOORD& pt2,
571  LLSQ* accumulator) {
572  FCOORD step(pt2);
573  step -= pt1;
574  int xstart = IntCastRounded(std::min(pt1.x(), pt2.x()));
575  int xend = IntCastRounded(std::max(pt1.x(), pt2.x()));
576  int ystart = IntCastRounded(std::min(pt1.y(), pt2.y()));
577  int yend = IntCastRounded(std::max(pt1.y(), pt2.y()));
578  if (xstart == xend && ystart == yend) return; // Nothing to do.
579  double weight = step.length() / (xend - xstart + yend - ystart);
580  // Compute and save the y-position at the middle of each x-step.
581  for (int x = xstart; x < xend; ++x) {
582  double y = pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x();
583  accumulator->add(x + 0.5, y, weight);
584  }
585  // Compute and save the x-position at the middle of each y-step.
586  for (int y = ystart; y < yend; ++y) {
587  double x = pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y();
588  accumulator->add(x, y + 0.5, weight);
589  }
590 }
591 
592 // Adds any edges from a single segment of outline between pt1 and pt2 to
593 // the x_coords, y_coords vectors. pt1 and pt2 should be relative to the
594 // bottom-left of the bounding box, hence indices to x_coords, y_coords
595 // are clipped to ([0,x_limit], [0,y_limit]).
596 // See GetEdgeCoords above for a description of x_coords, y_coords.
597 static void SegmentCoords(const FCOORD& pt1, const FCOORD& pt2, int x_limit,
598  int y_limit,
599  GenericVector<GenericVector<int> >* x_coords,
600  GenericVector<GenericVector<int> >* y_coords) {
601  FCOORD step(pt2);
602  step -= pt1;
603  int start =
604  ClipToRange(IntCastRounded(std::min(pt1.x(), pt2.x())), 0, x_limit);
605  int end = ClipToRange(IntCastRounded(std::max(pt1.x(), pt2.x())), 0, x_limit);
606  for (int x = start; x < end; ++x) {
607  int y = IntCastRounded(pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x());
608  (*y_coords)[x].push_back(y);
609  }
610  start = ClipToRange(IntCastRounded(std::min(pt1.y(), pt2.y())), 0, y_limit);
611  end = ClipToRange(IntCastRounded(std::max(pt1.y(), pt2.y())), 0, y_limit);
612  for (int y = start; y < end; ++y) {
613  int x = IntCastRounded(pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y());
614  (*x_coords)[y].push_back(x);
615  }
616 }
617 
618 // Adds any edges from a single segment of outline between pt1 and pt2 to
619 // the bbox such that it guarantees to contain anything produced by
620 // SegmentCoords.
621 static void SegmentBBox(const FCOORD& pt1, const FCOORD& pt2, TBOX* bbox) {
622  FCOORD step(pt2);
623  step -= pt1;
624  int x1 = IntCastRounded(std::min(pt1.x(), pt2.x()));
625  int x2 = IntCastRounded(std::max(pt1.x(), pt2.x()));
626  if (x2 > x1) {
627  int y1 =
628  IntCastRounded(pt1.y() + step.y() * (x1 + 0.5 - pt1.x()) / step.x());
629  int y2 =
630  IntCastRounded(pt1.y() + step.y() * (x2 - 0.5 - pt1.x()) / step.x());
631  TBOX point(x1, std::min(y1, y2), x2, std::max(y1, y2));
632  *bbox += point;
633  }
634  int y1 = IntCastRounded(std::min(pt1.y(), pt2.y()));
635  int y2 = IntCastRounded(std::max(pt1.y(), pt2.y()));
636  if (y2 > y1) {
637  int x1 =
638  IntCastRounded(pt1.x() + step.x() * (y1 + 0.5 - pt1.y()) / step.y());
639  int x2 =
640  IntCastRounded(pt1.x() + step.x() * (y2 - 0.5 - pt1.y()) / step.y());
641  TBOX point(std::min(x1, x2), y1, std::max(x1, x2), y2);
642  *bbox += point;
643  }
644 }
645 
646 // Collects edges into the given bounding box, LLSQ accumulator and/or x_coords,
647 // y_coords vectors.
648 // For a description of x_coords/y_coords, see GetEdgeCoords above.
649 // Startpt to lastpt, inclusive, MUST have the same src_outline member,
650 // which may be nullptr. The vector from lastpt to its next is included in
651 // the accumulation. Hidden edges should be excluded by the caller.
652 // The input denorm should be the normalizations that have been applied from
653 // the image to the current state of the TBLOB from which startpt, lastpt come.
654 // box is the bounding box of the blob from which the EDGEPTs are taken and
655 // indices into x_coords, y_coords are offset by box.botleft().
656 static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt,
657  const DENORM& denorm, const TBOX& box,
658  TBOX* bounding_box, LLSQ* accumulator,
659  GenericVector<GenericVector<int> >* x_coords,
660  GenericVector<GenericVector<int> >* y_coords) {
661  const C_OUTLINE* outline = startpt->src_outline;
662  int x_limit = box.width() - 1;
663  int y_limit = box.height() - 1;
664  if (outline != nullptr) {
665  // Use higher-resolution edge points stored on the outline.
666  // The outline coordinates may not match the binary image because of the
667  // rotation for vertical text lines, but the root_denorm IS the matching
668  // start of the DENORM chain.
669  const DENORM* root_denorm = denorm.RootDenorm();
670  int step_length = outline->pathlength();
671  int start_index = startpt->start_step;
672  // Note that if this run straddles the wrap-around point of the outline,
673  // that lastpt->start_step may have a lower index than startpt->start_step,
674  // and we want to use an end_index that allows us to use a positive
675  // increment, so we add step_length if necessary, but that may be beyond the
676  // bounds of the outline steps/ due to wrap-around, so we use % step_length
677  // everywhere, except for start_index.
678  int end_index = lastpt->start_step + lastpt->step_count;
679  if (end_index <= start_index) end_index += step_length;
680  // pos is the integer coordinates of the binary image steps.
681  ICOORD pos = outline->position_at_index(start_index);
682  FCOORD origin(box.left(), box.bottom());
683  // f_pos is a floating-point version of pos that offers improved edge
684  // positioning using greyscale information or smoothing of edge steps.
685  FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index);
686  // pos_normed is f_pos after the appropriate normalization, and relative
687  // to origin.
688  // prev_normed is the previous value of pos_normed.
689  FCOORD prev_normed;
690  denorm.NormTransform(root_denorm, f_pos, &prev_normed);
691  prev_normed -= origin;
692  for (int index = start_index; index < end_index; ++index) {
693  ICOORD step = outline->step(index % step_length);
694  // Only use the point if its edge strength is positive. This excludes
695  // points that don't provide useful information, eg
696  // ___________
697  // |___________
698  // The vertical step provides only noisy, damaging information, as even
699  // with a greyscale image, the positioning of the edge there may be a
700  // fictitious extrapolation, so previous processing has eliminated it.
701  if (outline->edge_strength_at_index(index % step_length) > 0) {
702  FCOORD f_pos =
703  outline->sub_pixel_pos_at_index(pos, index % step_length);
704  FCOORD pos_normed;
705  denorm.NormTransform(root_denorm, f_pos, &pos_normed);
706  pos_normed -= origin;
707  // Accumulate the information that is selected by the caller.
708  if (bounding_box != nullptr) {
709  SegmentBBox(pos_normed, prev_normed, bounding_box);
710  }
711  if (accumulator != nullptr) {
712  SegmentLLSQ(pos_normed, prev_normed, accumulator);
713  }
714  if (x_coords != nullptr && y_coords != nullptr) {
715  SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, x_coords,
716  y_coords);
717  }
718  prev_normed = pos_normed;
719  }
720  pos += step;
721  }
722  } else {
723  // There is no outline, so we are forced to use the polygonal approximation.
724  const EDGEPT* endpt = lastpt->next;
725  const EDGEPT* pt = startpt;
726  do {
727  FCOORD next_pos(pt->next->pos.x - box.left(),
728  pt->next->pos.y - box.bottom());
729  FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom());
730  if (bounding_box != nullptr) {
731  SegmentBBox(next_pos, pos, bounding_box);
732  }
733  if (accumulator != nullptr) {
734  SegmentLLSQ(next_pos, pos, accumulator);
735  }
736  if (x_coords != nullptr && y_coords != nullptr) {
737  SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords);
738  }
739  } while ((pt = pt->next) != endpt);
740  }
741 }
742 
743 // For all the edge steps in all the outlines, or polygonal approximation
744 // where there are no edge steps, collects the steps into the bounding_box,
745 // llsq and/or the x_coords/y_coords. Both are used in different kinds of
746 // normalization.
747 // For a description of x_coords, y_coords, see GetEdgeCoords above.
748 void TBLOB::CollectEdges(const TBOX& box, TBOX* bounding_box, LLSQ* llsq,
749  GenericVector<GenericVector<int> >* x_coords,
750  GenericVector<GenericVector<int> >* y_coords) const {
751  // Iterate the outlines.
752  for (const TESSLINE* ol = outlines; ol != nullptr; ol = ol->next) {
753  // Iterate the polygon.
754  EDGEPT* loop_pt = ol->FindBestStartPt();
755  EDGEPT* pt = loop_pt;
756  if (pt == nullptr) continue;
757  do {
758  if (pt->IsHidden()) continue;
759  // Find a run of equal src_outline.
760  EDGEPT* last_pt = pt;
761  do {
762  last_pt = last_pt->next;
763  } while (last_pt != loop_pt && !last_pt->IsHidden() &&
764  last_pt->src_outline == pt->src_outline);
765  last_pt = last_pt->prev;
766  CollectEdgesOfRun(pt, last_pt, denorm_, box, bounding_box, llsq, x_coords,
767  y_coords);
768  pt = last_pt;
769  } while ((pt = pt->next) != loop_pt);
770  }
771 }
772 
773 // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal
774 // approximation along the way.
775 TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) {
776  auto* tessword = new TWERD;
777  tessword->latin_script = src->flag(W_SCRIPT_IS_LATIN);
778  C_BLOB_IT b_it(src->cblob_list());
779  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
780  C_BLOB* blob = b_it.data();
781  TBLOB* tblob = TBLOB::PolygonalCopy(allow_detailed_fx, blob);
782  tessword->blobs.push_back(tblob);
783  }
784  return tessword;
785 }
786 
787 // Baseline normalizes the blobs in-place, recording the normalization in the
788 // DENORMs in the blobs.
789 void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
790  bool inverse, float x_height, float baseline_shift,
791  bool numeric_mode, tesseract::OcrEngineMode hint,
792  const TBOX* norm_box, DENORM* word_denorm) {
793  TBOX word_box = bounding_box();
794  if (norm_box != nullptr) word_box = *norm_box;
795  float word_middle = (word_box.left() + word_box.right()) / 2.0f;
796  float input_y_offset = 0.0f;
797  auto final_y_offset = static_cast<float>(kBlnBaselineOffset);
798  float scale = kBlnXHeight / x_height;
799  if (row == nullptr) {
800  word_middle = word_box.left();
801  input_y_offset = word_box.bottom();
802  final_y_offset = 0.0f;
803  } else {
804  input_y_offset = row->base_line(word_middle) + baseline_shift;
805  }
806  for (int b = 0; b < blobs.size(); ++b) {
807  TBLOB* blob = blobs[b];
808  TBOX blob_box = blob->bounding_box();
809  float mid_x = (blob_box.left() + blob_box.right()) / 2.0f;
810  float baseline = input_y_offset;
811  float blob_scale = scale;
812  if (numeric_mode) {
813  baseline = blob_box.bottom();
814  blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
815  scale, scale * 1.5f);
816  } else if (row != nullptr) {
817  baseline = row->base_line(mid_x) + baseline_shift;
818  }
819  // The image will be 8-bit grey if the input was grey or color. Note that in
820  // a grey image 0 is black and 255 is white. If the input was binary, then
821  // the pix will be binary and 0 is white, with 1 being black.
822  // To tell the difference pixGetDepth() will return 8 or 1.
823  // The inverse flag will be true iff the word has been determined to be
824  // white on black, and is independent of whether the pix is 8 bit or 1 bit.
825  blob->Normalize(block, nullptr, nullptr, word_middle, baseline, blob_scale,
826  blob_scale, 0.0f, final_y_offset, inverse, pix);
827  }
828  if (word_denorm != nullptr) {
829  word_denorm->SetupNormalization(block, nullptr, nullptr, word_middle,
830  input_y_offset, scale, scale, 0.0f,
831  final_y_offset);
832  word_denorm->set_inverse(inverse);
833  word_denorm->set_pix(pix);
834  }
835 }
836 
837 // Copies the data and the blobs, but leaves next untouched.
838 void TWERD::CopyFrom(const TWERD& src) {
839  Clear();
841  for (int b = 0; b < src.blobs.size(); ++b) {
842  auto* new_blob = new TBLOB(*src.blobs[b]);
843  blobs.push_back(new_blob);
844  }
845 }
846 
847 // Deletes owned data.
848 void TWERD::Clear() {
850  blobs.clear();
851 }
852 
853 // Recomputes the bounding boxes of the blobs.
855  for (int b = 0; b < blobs.size(); ++b) {
856  blobs[b]->ComputeBoundingBoxes();
857  }
858 }
859 
860 TBOX TWERD::bounding_box() const {
861  TBOX result;
862  for (int b = 0; b < blobs.size(); ++b) {
863  TBOX box = blobs[b]->bounding_box();
864  result += box;
865  }
866  return result;
867 }
868 
869 // Merges the blobs from start to end, not including end, and deletes
870 // the blobs between start and end.
871 void TWERD::MergeBlobs(int start, int end) {
872  if (start >= blobs.size() - 1) return; // Nothing to do.
873  TESSLINE* outline = blobs[start]->outlines;
874  for (int i = start + 1; i < end && i < blobs.size(); ++i) {
875  TBLOB* next_blob = blobs[i];
876  // Take the outlines from the next blob.
877  if (outline == nullptr) {
878  blobs[start]->outlines = next_blob->outlines;
879  outline = blobs[start]->outlines;
880  } else {
881  while (outline->next != nullptr) outline = outline->next;
882  outline->next = next_blob->outlines;
883  next_blob->outlines = nullptr;
884  }
885  // Delete the next blob and move on.
886  delete next_blob;
887  blobs[i] = nullptr;
888  }
889  // Remove dead blobs from the vector.
890  for (int i = start + 1; i < end && start + 1 < blobs.size(); ++i) {
891  blobs.remove(start + 1);
892  }
893 }
894 
895 #ifndef GRAPHICS_DISABLED
896 void TWERD::plot(ScrollView* window) {
898  for (int b = 0; b < blobs.size(); ++b) {
899  blobs[b]->plot(window, color, ScrollView::BROWN);
900  color = WERD::NextColor(color);
901  }
902 }
903 #endif // GRAPHICS_DISABLED
904 
905 /**********************************************************************
906  * divisible_blob
907  *
908  * Returns true if the blob contains multiple outlines than can be
909  * separated using divide_blobs. Sets the location to be used in the
910  * call to divide_blobs.
911  **********************************************************************/
912 bool divisible_blob(TBLOB* blob, bool italic_blob, TPOINT* location) {
913  if (blob->outlines == nullptr || blob->outlines->next == nullptr)
914  return false; // Need at least 2 outlines for it to be possible.
915  int max_gap = 0;
916  TPOINT vertical =
918  for (TESSLINE* outline1 = blob->outlines; outline1 != nullptr;
919  outline1 = outline1->next) {
920  if (outline1->is_hole) continue; // Holes do not count as separable.
921  TPOINT mid_pt1(
922  static_cast<int16_t>((outline1->topleft.x + outline1->botright.x) / 2),
923  static_cast<int16_t>((outline1->topleft.y + outline1->botright.y) / 2));
924  int mid_prod1 = mid_pt1.cross(vertical);
925  int min_prod1, max_prod1;
926  outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
927  for (TESSLINE* outline2 = outline1->next; outline2 != nullptr;
928  outline2 = outline2->next) {
929  if (outline2->is_hole) continue; // Holes do not count as separable.
930  TPOINT mid_pt2(static_cast<int16_t>(
931  (outline2->topleft.x + outline2->botright.x) / 2),
932  static_cast<int16_t>(
933  (outline2->topleft.y + outline2->botright.y) / 2));
934  int mid_prod2 = mid_pt2.cross(vertical);
935  int min_prod2, max_prod2;
936  outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2);
937  int mid_gap = abs(mid_prod2 - mid_prod1);
938  int overlap =
939  std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2);
940  if (mid_gap - overlap / 4 > max_gap) {
941  max_gap = mid_gap - overlap / 4;
942  *location = mid_pt1;
943  *location += mid_pt2;
944  *location /= 2;
945  }
946  }
947  }
948  // Use the y component of the vertical vector as an approximation to its
949  // length.
950  return max_gap > vertical.y;
951 }
952 
953 /**********************************************************************
954  * divide_blobs
955  *
956  * Create two blobs by grouping the outlines in the appropriate blob.
957  * The outlines that are beyond the location point are moved to the
958  * other blob. The ones whose x location is less than that point are
959  * retained in the original blob.
960  **********************************************************************/
961 void divide_blobs(TBLOB* blob, TBLOB* other_blob, bool italic_blob,
962  const TPOINT& location) {
963  TPOINT vertical =
965  TESSLINE* outline1 = nullptr;
966  TESSLINE* outline2 = nullptr;
967 
968  TESSLINE* outline = blob->outlines;
969  blob->outlines = nullptr;
970  int location_prod = location.cross(vertical);
971 
972  while (outline != nullptr) {
973  TPOINT mid_pt(
974  static_cast<int16_t>((outline->topleft.x + outline->botright.x) / 2),
975  static_cast<int16_t>((outline->topleft.y + outline->botright.y) / 2));
976  int mid_prod = mid_pt.cross(vertical);
977  if (mid_prod < location_prod) {
978  // Outline is in left blob.
979  if (outline1)
980  outline1->next = outline;
981  else
982  blob->outlines = outline;
983  outline1 = outline;
984  } else {
985  // Outline is in right blob.
986  if (outline2)
987  outline2->next = outline;
988  else
989  other_blob->outlines = outline;
990  outline2 = outline;
991  }
992  outline = outline->next;
993  }
994 
995  if (outline1) outline1->next = nullptr;
996  if (outline2) outline2->next = nullptr;
997 }
TBOX
Definition: cleanapi_test.cc:19
C_OUTLINE::sub_pixel_pos_at_index
FCOORD sub_pixel_pos_at_index(const ICOORD &pos, int index) const
Definition: coutln.h:162
TBLOB::Scale
void Scale(float factor)
Definition: blobs.cpp:437
C_OUTLINE::edge_strength_at_index
int edge_strength_at_index(int index) const
Definition: coutln.h:186
W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
Definition: werd.h:50
TESSLINE::start
TPOINT start
Definition: blobs.h:276
kDivisibleVerticalItalic
const TPOINT kDivisibleVerticalItalic(1, 5)
GenericVector::delete_data_pointers
void delete_data_pointers()
Definition: genericvector.h:872
TBLOB::ClassifyNormalizeIfNeeded
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:345
GenericVector::remove
void remove(int index)
Definition: genericvector.h:765
ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:106
ScrollView
Definition: scrollview.h:97
TWERD::MergeBlobs
void MergeBlobs(int start, int end)
Definition: blobs.cpp:870
TBLOB::ComputeBoundingBoxes
void ComputeBoundingBoxes()
Definition: blobs.cpp:445
normalis.h
TBOX::move
void move(const ICOORD vec)
Definition: rect.h:156
TESSLINE::botright
TPOINT botright
Definition: blobs.h:275
TWERD::latin_script
bool latin_script
Definition: blobs.h:458
TESSLINE::Rotate
void Rotate(const FCOORD rotation)
Definition: blobs.cpp:164
ROW::base_line
float base_line(float xpos) const
Definition: ocrrow.h:58
TESSLINE::ComputeBoundingBox
void ComputeBoundingBox()
Definition: blobs.cpp:212
TESSLINE::Clear
void Clear()
Definition: blobs.cpp:141
LLSQ::add
void add(double x, double y)
Definition: linlsq.cpp:45
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:116
C_BLOB::out_list
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:69
kBlnXHeight
const int kBlnXHeight
Definition: normalis.h:23
LLSQ
Definition: linlsq.h:27
TPOINT
Definition: blobs.h:49
TESSLINE::loop
EDGEPT * loop
Definition: blobs.h:278
TESSLINE::CopyFrom
void CopyFrom(const TESSLINE &src)
Definition: blobs.cpp:114
TBLOB::ShallowCopy
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:334
WERD::NextColor
static ScrollView::Color NextColor(ScrollView::Color colour)
Definition: werd.cpp:291
LLSQ::y_variance
double y_variance() const
Definition: linlsq.h:86
DENORM::NormTransform
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:334
EDGEPT::src_outline
C_OUTLINE * src_outline
Definition: blobs.h:192
TWERD
Definition: blobs.h:416
LLSQ::mean_point
FCOORD mean_point() const
Definition: linlsq.cpp:158
TBLOB::NumOutlines
int NumOutlines() const
Definition: blobs.cpp:453
TBLOB::Clear
void Clear()
Definition: blobs.cpp:385
FCOORD::set_x
void set_x(float xin)
rewrite function
Definition: points.h:213
TBLOB::outlines
TESSLINE * outlines
Definition: blobs.h:398
baseline
Definition: mfoutline.h:62
EDGEPT::step_count
int step_count
Definition: blobs.h:195
EDGEPT::IsHidden
bool IsHidden() const
Definition: blobs.h:174
TESSLINE::Scale
void Scale(float factor)
Definition: blobs.cpp:189
TBLOB::plot
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:508
FCOORD::y
float y() const
Definition: points.h:209
divisible_blob
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:910
ICOORD
integer coordinate
Definition: points.h:30
DENORM::set_inverse
void set_inverse(bool value)
Definition: normalis.h:254
TESSLINE
Definition: blobs.h:201
FCOORD::x
float x() const
Definition: points.h:206
TESSLINE::topleft
TPOINT topleft
Definition: blobs.h:274
TBOX::top
int16_t top() const
Definition: rect.h:57
ScrollView::BROWN
Definition: scrollview.h:120
LLSQ::count
int32_t count() const
Definition: linlsq.h:42
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:717
ScrollView::DrawTo
void DrawTo(int x, int y)
Definition: scrollview.cpp:524
TBLOB::ComputeMoments
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
Definition: blobs.cpp:520
IntCastRounded
int IntCastRounded(double x)
Definition: helpers.h:173
TWERD::Clear
void Clear()
Definition: blobs.cpp:847
TESSLINE::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:256
DENORM::RootDenorm
const DENORM * RootDenorm() const
Definition: normalis.h:257
TESSLINE::next
TESSLINE * next
Definition: blobs.h:279
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
FCOORD
Definition: points.h:187
blobs.h
FCOORD::set_y
void set_y(float yin)
rewrite function
Definition: points.h:217
TWERD::ComputeBoundingBoxes
void ComputeBoundingBoxes()
Definition: blobs.cpp:853
C_BLOB
Definition: stepblob.h:36
TESSLINE::SetupFromPos
void SetupFromPos()
Definition: blobs.cpp:200
C_OUTLINE
Definition: coutln.h:71
TESSLINE::TESSLINE
TESSLINE()
Definition: blobs.h:202
TBOX::height
int16_t height() const
Definition: rect.h:107
werd.h
WERD::cblob_list
C_BLOB_LIST * cblob_list()
Definition: werd.h:94
EDGEPT::prev
EDGEPT * prev
Definition: blobs.h:191
TWERD::CopyFrom
void CopyFrom(const TWERD &src)
Definition: blobs.cpp:837
EDGEPT::start_step
int start_step
Definition: blobs.h:194
tesseract::OcrEngineMode
OcrEngineMode
Definition: publictypes.h:265
C_OUTLINE::position_at_index
ICOORD position_at_index(int index) const
Definition: coutln.h:152
DENORM::block
const BLOCK * block() const
Definition: normalis.h:272
TWERD::TWERD
TWERD()
Definition: blobs.h:417
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
BLOCK
Definition: ocrblock.h:28
tesseract::CCStruct
Definition: ccstruct.h:25
DENORM::SetupNormalization
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:95
ScrollView::BLACK
Definition: scrollview.h:102
TESSLINE::BuildFromOutlineList
static TESSLINE * BuildFromOutlineList(EDGEPT *outline)
Definition: blobs.cpp:93
TBLOB::TBLOB
TBLOB()
Definition: blobs.h:283
TPOINT::x
int16_t x
Definition: blobs.h:91
TPOINT::y
int16_t y
Definition: blobs.h:92
TBOX::x_middle
int x_middle() const
Definition: rect.h:84
TWERD::blobs
GenericVector< TBLOB * > blobs
Definition: blobs.h:457
DENORM::LocalNormBlob
void LocalNormBlob(TBLOB *blob) const
Definition: normalis.cpp:411
TESSLINE::FindBestStartPt
EDGEPT * FindBestStartPt() const
Definition: blobs.cpp:282
TESSLINE::MinMaxCrossProduct
void MinMaxCrossProduct(const TPOINT vec, int *min_xp, int *max_xp) const
Definition: blobs.cpp:242
TBOX::width
int16_t width() const
Definition: rect.h:114
TESSLINE::Normalize
void Normalize(const DENORM &denorm)
Definition: blobs.cpp:154
kDivisibleVerticalUpright
const TPOINT kDivisibleVerticalUpright(0, 1)
TWERD::PolygonalCopy
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
Definition: blobs.cpp:774
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
TBLOB::PolygonalCopy
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:326
TBLOB::Normalize
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:396
linlsq.h
CLISTIZE
CLISTIZE(BLOCK_RES) ELISTIZE(ROW_RES) ELISTIZE(WERD_RES) static const double kStopperAmbiguityThresholdGain
TBLOB::GetPreciseBoundingBox
void GetPreciseBoundingBox(TBOX *precise_box) const
Definition: blobs.cpp:539
helpers.h
TWERD::BLNormalize
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
Definition: blobs.cpp:788
EDGEPT::vec
VECTOR vec
Definition: blobs.h:185
TBOX::botleft
const ICOORD & botleft() const
Definition: rect.h:91
ccstruct.h
ApproximateOutline
TESSLINE * ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline)
Definition: polyaprx.cpp:59
TESSLINE::plot
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:261
TBLOB::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:466
GenericVector
Definition: baseapi.h:40
DENORM::inverse
bool inverse() const
Definition: normalis.h:251
divide_blobs
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location)
Definition: blobs.cpp:958
TBLOB::EliminateDuplicateOutlines
void EliminateDuplicateOutlines()
Definition: blobs.cpp:478
TPOINT::cross
int cross(const TPOINT &other) const
Definition: blobs.h:77
TESSLINE::Move
void Move(const ICOORD vec)
Definition: blobs.cpp:178
DENORM::set_pix
void set_pix(Pix *pix)
Definition: normalis.h:248
TBLOB::CorrectBlobOrder
void CorrectBlobOrder(TBLOB *next)
Definition: blobs.cpp:499
TBLOB::Rotate
void Rotate(const FCOORD rotation)
Definition: blobs.cpp:421
TBLOB
Definition: blobs.h:282
ocrrow.h
WERD
Definition: werd.h:55
TBOX::left
int16_t left() const
Definition: rect.h:71
ROW
Definition: ocrrow.h:35
ocrblock.h
C_OUTLINE::plot
void plot(ScrollView *window, ScrollView::Color colour) const
Definition: coutln.cpp:942
GenericVector::clear
void clear()
Definition: genericvector.h:857
TBLOB::GetEdgeCoords
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
Definition: blobs.cpp:555
TBOX::right
int16_t right() const
Definition: rect.h:78
GenericVector::init_to_size
void init_to_size(int size, const T &t)
Definition: genericvector.h:706
EDGEPT
Definition: blobs.h:97
TBLOB::CopyFrom
void CopyFrom(const TBLOB &src)
Definition: blobs.cpp:369
BLOCK::classify_rotation
FCOORD classify_rotation() const
Definition: ocrblock.h:139
Swap
void Swap(T *p1, T *p2)
Definition: helpers.h:93
DENORM::LocalNormTransform
void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:305
ScrollView::SetCursor
void SetCursor(int x, int y)
Definition: scrollview.cpp:518
C_OUTLINE::pathlength
int32_t pathlength() const
Definition: coutln.h:134
polyaprx.h
TWERD::plot
void plot(ScrollView *window)
Definition: blobs.cpp:895
TWERD::bounding_box
TBOX bounding_box() const
Definition: blobs.cpp:859
C_OUTLINE::step
ICOORD step(int index) const
Definition: coutln.h:143
ScrollView::Color
Color
Definition: scrollview.h:100
UpdateRange
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:118
DENORM::pix
Pix * pix() const
Definition: normalis.h:245
GenericVector::size
int size() const
Definition: genericvector.h:71
TBLOB::Move
void Move(const ICOORD vec)
Definition: blobs.cpp:429
C_OUTLINE::child
C_OUTLINE_LIST * child()
Definition: coutln.h:107
LLSQ::x_variance
double x_variance() const
Definition: linlsq.h:80
TESSLINE::is_hole
bool is_hole
Definition: blobs.h:277
kBlnBaselineOffset
const int kBlnBaselineOffset
Definition: normalis.h:24
EDGEPT::pos
TPOINT pos
Definition: blobs.h:184
EDGEPT::next
EDGEPT * next
Definition: blobs.h:190
clst.h
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
points.h
TBOX
Definition: rect.h:33
DENORM
Definition: normalis.h:49