tesseract  4.0.0-1-g2a2b
textlineprojection.cpp
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #ifdef HAVE_CONFIG_H
15 #include "config_auto.h"
16 #endif
17 
18 #include "textlineprojection.h"
19 #include "allheaders.h"
20 #include "bbgrid.h" // Base class.
21 #include "blobbox.h" // BlobNeighourDir.
22 #include "blobs.h"
23 #include "colpartition.h"
24 #include "normalis.h"
25 
26 #include <algorithm>
27 
28 // Padding factor to use on definitely oriented blobs
29 const int kOrientedPadFactor = 8;
30 // Padding factor to use on not definitely oriented blobs.
31 const int kDefaultPadFactor = 2;
32 // Penalty factor for going away from the line center.
33 const int kWrongWayPenalty = 4;
34 // Ratio between parallel gap and perpendicular gap used to measure total
35 // distance of a box from a target box in curved textline space.
36 // parallel-gap is treated more favorably by this factor to allow catching
37 // quotes and elipsis at the end of textlines.
38 const int kParaPerpDistRatio = 4;
39 // Multiple of scale_factor_ that the inter-line gap must be before we start
40 // padding the increment box perpendicular to the text line.
41 const int kMinLineSpacingFactor = 4;
42 // Maximum tab-stop overrun for horizontal padding, in projection pixels.
43 const int kMaxTabStopOverrun = 6;
44 
45 namespace tesseract {
46 
48  : x_origin_(0), y_origin_(0), pix_(nullptr) {
49  // The projection map should be about 100 ppi, whatever the input.
50  scale_factor_ = IntCastRounded(resolution / 100.0);
51  if (scale_factor_ < 1) scale_factor_ = 1;
52 }
54  pixDestroy(&pix_);
55 }
56 
57 // Build the projection profile given the input_block containing lists of
58 // blobs, a rotation to convert to image coords,
59 // and a full-resolution nontext_map, marking out areas to avoid.
60 // During construction, we have the following assumptions:
61 // The rotation is a multiple of 90 degrees, ie no deskew yet.
62 // The blobs have had their left and right rules set to also limit
63 // the range of projection.
65  const FCOORD& rotation,
66  Pix* nontext_map) {
67  pixDestroy(&pix_);
68  TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
69  x_origin_ = 0;
70  y_origin_ = image_box.height();
71  int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
72  int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
73 
74  pix_ = pixCreate(width, height, 8);
75  ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
76  ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
77  Pix* final_pix = pixBlockconv(pix_, 1, 1);
78 // Pix* final_pix = pixBlockconv(pix_, 2, 2);
79  pixDestroy(&pix_);
80  pix_ = final_pix;
81 }
82 
83 // Display the blobs in the window colored according to textline quality.
84 void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
85  ScrollView* win) {
86 #ifndef GRAPHICS_DISABLED
87  BLOBNBOX_IT it(blobs);
88  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
89  BLOBNBOX* blob = it.data();
90  const TBOX& box = blob->bounding_box();
91  bool bad_box = BoxOutOfHTextline(box, nullptr, false);
92  if (blob->UniquelyVertical())
93  win->Pen(ScrollView::YELLOW);
94  else
95  win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
96  win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
97  }
98  win->Update();
99 #endif // GRAPHICS_DISABLED
100 }
101 
102 // Moves blobs that look like they don't sit well on a textline from the
103 // input blobs list to the output small_blobs list.
104 // This gets them away from initial textline finding to stop diacritics
105 // from forming incorrect textlines. (Introduced mainly to fix Thai.)
107  BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
108  BLOBNBOX_IT it(blobs);
109  BLOBNBOX_IT small_it(small_blobs);
110  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
111  BLOBNBOX* blob = it.data();
112  const TBOX& box = blob->bounding_box();
113  bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
114  box.bottom());
115  if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) {
116  blob->ClearNeighbours();
117  small_it.add_to_end(it.extract());
118  }
119  }
120 }
121 
122 // Create a window and display the projection in it.
124 #ifndef GRAPHICS_DISABLED
125  int width = pixGetWidth(pix_);
126  int height = pixGetHeight(pix_);
127  Pix* pixc = pixCreate(width, height, 32);
128  int src_wpl = pixGetWpl(pix_);
129  int col_wpl = pixGetWpl(pixc);
130  uint32_t* src_data = pixGetData(pix_);
131  uint32_t* col_data = pixGetData(pixc);
132  for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
133  for (int x = 0; x < width; ++x) {
134  int pixel = GET_DATA_BYTE(src_data, x);
135  l_uint32 result;
136  if (pixel <= 17)
137  composeRGBPixel(0, 0, pixel * 15, &result);
138  else if (pixel <= 145)
139  composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
140  else
141  composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
142  col_data[x] = result;
143  }
144  }
145  ScrollView* win = new ScrollView("Projection", 0, 0,
146  width, height, width, height);
147  win->Image(pixc, 0, 0);
148  win->Update();
149  pixDestroy(&pixc);
150 #endif // GRAPHICS_DISABLED
151 }
152 
153 // Compute the distance of the box from the partition using curved projection
154 // space. As DistanceOfBoxFromBox, except that the direction is taken from
155 // the ColPartition and the median bounds of the ColPartition are used as
156 // the to_box.
158  const ColPartition& part,
159  const DENORM* denorm,
160  bool debug) const {
161  // Compute a partition box that uses the median top/bottom of the blobs
162  // within and median left/right for vertical.
163  TBOX part_box = part.bounding_box();
164  if (part.IsHorizontalType()) {
165  part_box.set_top(part.median_top());
166  part_box.set_bottom(part.median_bottom());
167  } else {
168  part_box.set_left(part.median_left());
169  part_box.set_right(part.median_right());
170  }
171  // Now use DistanceOfBoxFromBox to make the actual calculation.
172  return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
173  denorm, debug);
174 }
175 
176 // Compute the distance from the from_box to the to_box using curved
177 // projection space. Separation that involves a decrease in projection
178 // density (moving from the from_box to the to_box) is weighted more heavily
179 // than constant density, and an increase is weighted less.
180 // If horizontal_textline is true, then curved space is used vertically,
181 // as for a diacritic on the edge of a textline.
182 // The projection uses original image coords, so denorm is used to get
183 // back to the image coords from box/part space.
184 // How the calculation works: Think of a diacritic near a textline.
185 // Distance is measured from the far side of the from_box to the near side of
186 // the to_box. Shown is the horizontal textline case.
187 // |------^-----|
188 // | from | box |
189 // |------|-----|
190 // perpendicular |
191 // <------v-------->|--------------------|
192 // parallel | to box |
193 // |--------------------|
194 // Perpendicular distance uses "curved space" See VerticalDistance below.
195 // Parallel distance is linear.
196 // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
198  const TBOX& to_box,
199  bool horizontal_textline,
200  const DENORM* denorm,
201  bool debug) const {
202  // The parallel_gap is the horizontal gap between a horizontal textline and
203  // the box. Analogous for vertical.
204  int parallel_gap = 0;
205  // start_pt is the box end of the line to be modified for curved space.
206  TPOINT start_pt;
207  // end_pt is the partition end of the line to be modified for curved space.
208  TPOINT end_pt;
209  if (horizontal_textline) {
210  parallel_gap = from_box.x_gap(to_box) + from_box.width();
211  start_pt.x = (from_box.left() + from_box.right()) / 2;
212  end_pt.x = start_pt.x;
213  if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
214  start_pt.y = from_box.top();
215  end_pt.y = std::min(to_box.top(), start_pt.y);
216  } else {
217  start_pt.y = from_box.bottom();
218  end_pt.y = std::max(to_box.bottom(), start_pt.y);
219  }
220  } else {
221  parallel_gap = from_box.y_gap(to_box) + from_box.height();
222  if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
223  start_pt.x = from_box.right();
224  end_pt.x = std::min(to_box.right(), start_pt.x);
225  } else {
226  start_pt.x = from_box.left();
227  end_pt.x = std::max(to_box.left(), start_pt.x);
228  }
229  start_pt.y = (from_box.bottom() + from_box.top()) / 2;
230  end_pt.y = start_pt.y;
231  }
232  // The perpendicular gap is the max vertical distance gap out of:
233  // top of from_box to to_box top and bottom of from_box to to_box bottom.
234  // This value is then modified for curved projection space.
235  // Analogous for vertical.
236  int perpendicular_gap = 0;
237  // If start_pt == end_pt, then the from_box lies entirely within the to_box
238  // (in the perpendicular direction), so we don't need to calculate the
239  // perpendicular_gap.
240  if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
241  if (denorm != nullptr) {
242  // Denormalize the start and end.
243  denorm->DenormTransform(nullptr, start_pt, &start_pt);
244  denorm->DenormTransform(nullptr, end_pt, &end_pt);
245  }
246  if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
247  perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
248  end_pt.y);
249  } else {
250  perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
251  start_pt.y);
252  }
253  }
254  // The parallel_gap weighs less than the perpendicular_gap.
255  return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
256 }
257 
258 // Compute the distance between (x, y1) and (x, y2) using the rule that
259 // a decrease in textline density is weighted more heavily than an increase.
260 // The coordinates are in source image space, ie processed by any denorm
261 // already, but not yet scaled by scale_factor_.
262 // Going from the outside of a textline to the inside should measure much
263 // less distance than going from the inside of a textline to the outside.
264 // How it works:
265 // An increase is cheap (getting closer to a textline).
266 // Constant costs unity.
267 // A decrease is expensive (getting further from a textline).
268 // Pixels in projection map Counted distance
269 // 2
270 // 3 1/x
271 // 3 1
272 // 2 x
273 // 5 1/x
274 // 7 1/x
275 // Total: 1 + x + 3/x where x = kWrongWayPenalty.
277  int y1, int y2) const {
278  x = ImageXToProjectionX(x);
279  y1 = ImageYToProjectionY(y1);
280  y2 = ImageYToProjectionY(y2);
281  if (y1 == y2) return 0;
282  int wpl = pixGetWpl(pix_);
283  int step = y1 < y2 ? 1 : -1;
284  uint32_t* data = pixGetData(pix_) + y1 * wpl;
285  wpl *= step;
286  int prev_pixel = GET_DATA_BYTE(data, x);
287  int distance = 0;
288  int right_way_steps = 0;
289  for (int y = y1; y != y2; y += step) {
290  data += wpl;
291  int pixel = GET_DATA_BYTE(data, x);
292  if (debug)
293  tprintf("At (%d,%d), pix = %d, prev=%d\n",
294  x, y + step, pixel, prev_pixel);
295  if (pixel < prev_pixel)
296  distance += kWrongWayPenalty;
297  else if (pixel > prev_pixel)
298  ++right_way_steps;
299  else
300  ++distance;
301  prev_pixel = pixel;
302  }
303  return distance * scale_factor_ +
304  right_way_steps * scale_factor_ / kWrongWayPenalty;
305 }
306 
307 // Compute the distance between (x1, y) and (x2, y) using the rule that
308 // a decrease in textline density is weighted more heavily than an increase.
309 int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
310  int y) const {
311  x1 = ImageXToProjectionX(x1);
312  x2 = ImageXToProjectionX(x2);
313  y = ImageYToProjectionY(y);
314  if (x1 == x2) return 0;
315  int wpl = pixGetWpl(pix_);
316  int step = x1 < x2 ? 1 : -1;
317  uint32_t* data = pixGetData(pix_) + y * wpl;
318  int prev_pixel = GET_DATA_BYTE(data, x1);
319  int distance = 0;
320  int right_way_steps = 0;
321  for (int x = x1; x != x2; x += step) {
322  int pixel = GET_DATA_BYTE(data, x + step);
323  if (debug)
324  tprintf("At (%d,%d), pix = %d, prev=%d\n",
325  x + step, y, pixel, prev_pixel);
326  if (pixel < prev_pixel)
327  distance += kWrongWayPenalty;
328  else if (pixel > prev_pixel)
329  ++right_way_steps;
330  else
331  ++distance;
332  prev_pixel = pixel;
333  }
334  return distance * scale_factor_ +
335  right_way_steps * scale_factor_ / kWrongWayPenalty;
336 }
337 
338 // Returns true if the blob appears to be outside of a textline.
339 // Such blobs are potentially diacritics (even if large in Thai) and should
340 // be kept away from initial textline finding.
342  const DENORM* denorm,
343  bool debug) const {
344  int grad1 = 0;
345  int grad2 = 0;
346  EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr);
347  int worst_result = std::min(grad1, grad2);
348  int total_result = grad1 + grad2;
349  if (total_result >= 6) return false; // Strongly in textline.
350  // Medium strength: if either gradient is negative, it is likely outside
351  // the body of the textline.
352  if (worst_result < 0)
353  return true;
354  return false;
355 }
356 
357 // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
358 // but uses the median top/bottom for horizontal and median left/right for
359 // vertical instead of the bounding box edges.
360 // Evaluates for both horizontal and vertical and returns the best result,
361 // with a positive value for horizontal and a negative value for vertical.
363  const DENORM* denorm,
364  bool debug) const {
365  if (part.IsSingleton())
366  return EvaluateBox(part.bounding_box(), denorm, debug);
367  // Test vertical orientation.
368  TBOX box = part.bounding_box();
369  // Use the partition median for left/right.
370  box.set_left(part.median_left());
371  box.set_right(part.median_right());
372  int vresult = EvaluateBox(box, denorm, debug);
373 
374  // Test horizontal orientation.
375  box = part.bounding_box();
376  // Use the partition median for top/bottom.
377  box.set_top(part.median_top());
378  box.set_bottom(part.median_bottom());
379  int hresult = EvaluateBox(box, denorm, debug);
380  if (debug) {
381  tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
382  part.bounding_box().print();
383  part.Print();
384  }
385  return hresult >= -vresult ? hresult : vresult;
386 }
387 
388 // Computes the mean projection gradients over the horizontal and vertical
389 // edges of the box:
390 // -h-h-h-h-h-h
391 // |------------| mean=htop -v|+v--------+v|-v
392 // |+h+h+h+h+h+h| -v|+v +v|-v
393 // | | -v|+v +v|-v
394 // | box | -v|+v box +v|-v
395 // | | -v|+v +v|-v
396 // |+h+h+h+h+h+h| -v|+v +v|-v
397 // |------------| mean=hbot -v|+v--------+v|-v
398 // -h-h-h-h-h-h
399 // mean=vleft mean=vright
400 //
401 // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
402 // for a horizontal textline, a negative number for a vertical textline,
403 // and near zero for undecided. Undecided is most likely non-text.
404 // All the gradients are truncated to remain non-negative, since negative
405 // horizontal gradients don't give any indication of being vertical and
406 // vice versa.
407 // Additional complexity: The coordinates have to be transformed to original
408 // image coordinates with denorm (if not null), scaled to match the projection
409 // pix, and THEN step out 2 pixels each way from the edge to compute the
410 // gradient, and tries 3 positions, each measuring the gradient over a
411 // 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by
412 // several layers of helpers below.
413 int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
414  bool debug) const {
415  return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr);
416 }
417 
418 // Internal version of EvaluateBox returns the unclipped gradients as well
419 // as the result of EvaluateBox.
420 // hgrad1 and hgrad2 are the gradients for the horizontal textline.
421 int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
422  const DENORM* denorm, bool debug,
423  int* hgrad1, int* hgrad2,
424  int* vgrad1, int* vgrad2) const {
425  int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
426  box.top(), true);
427  int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
428  box.bottom(), false);
429  int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
430  box.top(), true);
431  int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
432  box.bottom(), box.top(),
433  false);
434  int top_clipped = std::max(top_gradient, 0);
435  int bottom_clipped = std::max(bottom_gradient, 0);
436  int left_clipped = std::max(left_gradient, 0);
437  int right_clipped = std::max(right_gradient, 0);
438  if (debug) {
439  tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
440  top_gradient, bottom_gradient, left_gradient, right_gradient);
441  box.print();
442  }
443  int result = std::max(top_clipped, bottom_clipped) -
444  std::max(left_clipped, right_clipped);
445  if (hgrad1 != nullptr && hgrad2 != nullptr) {
446  *hgrad1 = top_gradient;
447  *hgrad2 = bottom_gradient;
448  }
449  if (vgrad1 != nullptr && vgrad2 != nullptr) {
450  *vgrad1 = left_gradient;
451  *vgrad2 = right_gradient;
452  }
453  return result;
454 }
455 
456 // Helper returns the mean gradient value for the horizontal row at the given
457 // y, (in the external coordinates) by subtracting the mean of the transformed
458 // row 2 pixels above from the mean of the transformed row 2 pixels below.
459 // This gives a positive value for a good top edge and negative for bottom.
460 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
461 int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
462  int16_t min_x, int16_t max_x, int16_t y,
463  bool best_is_max) const {
464  TPOINT start_pt(min_x, y);
465  TPOINT end_pt(max_x, y);
466  int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
467  int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
468  int best_gradient = lower - upper;
469  upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
470  lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
471  int gradient = lower - upper;
472  if ((gradient > best_gradient) == best_is_max)
473  best_gradient = gradient;
474  upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
475  lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
476  gradient = lower - upper;
477  if ((gradient > best_gradient) == best_is_max)
478  best_gradient = gradient;
479  return best_gradient;
480 }
481 
482 // Helper returns the mean gradient value for the vertical column at the
483 // given x, (in the external coordinates) by subtracting the mean of the
484 // transformed column 2 pixels left from the mean of the transformed column
485 // 2 pixels to the right.
486 // This gives a positive value for a good left edge and negative for right.
487 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
488 int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x,
489  int16_t min_y, int16_t max_y,
490  bool best_is_max) const {
491  TPOINT start_pt(x, min_y);
492  TPOINT end_pt(x, max_y);
493  int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
494  int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
495  int best_gradient = right - left;
496  left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
497  right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
498  int gradient = right - left;
499  if ((gradient > best_gradient) == best_is_max)
500  best_gradient = gradient;
501  left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
502  right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
503  gradient = right - left;
504  if ((gradient > best_gradient) == best_is_max)
505  best_gradient = gradient;
506  return best_gradient;
507 }
508 
509 // Helper returns the mean pixel value over the line between the start_pt and
510 // end_pt (inclusive), but shifted perpendicular to the line in the projection
511 // image by offset pixels. For simplicity, it is assumed that the vector is
512 // either nearly horizontal or nearly vertical. It works on skewed textlines!
513 // The end points are in external coordinates, and will be denormalized with
514 // the denorm if not nullptr before further conversion to pix coordinates.
515 // After all the conversions, the offset is added to the direction
516 // perpendicular to the line direction. The offset is thus in projection image
517 // coordinates, which allows the caller to get a guaranteed displacement
518 // between pixels used to calculate gradients.
519 int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
520  int offset,
521  TPOINT start_pt,
522  TPOINT end_pt) const {
523  TransformToPixCoords(denorm, &start_pt);
524  TransformToPixCoords(denorm, &end_pt);
525  TruncateToImageBounds(&start_pt);
526  TruncateToImageBounds(&end_pt);
527  int wpl = pixGetWpl(pix_);
528  uint32_t* data = pixGetData(pix_);
529  int total = 0;
530  int count = 0;
531  int x_delta = end_pt.x - start_pt.x;
532  int y_delta = end_pt.y - start_pt.y;
533  if (abs(x_delta) >= abs(y_delta)) {
534  if (x_delta == 0)
535  return 0;
536  // Horizontal line. Add the offset vertically.
537  int x_step = x_delta > 0 ? 1 : -1;
538  // Correct offset for rotation, keeping it anti-clockwise of the delta.
539  offset *= x_step;
540  start_pt.y += offset;
541  end_pt.y += offset;
542  TruncateToImageBounds(&start_pt);
543  TruncateToImageBounds(&end_pt);
544  x_delta = end_pt.x - start_pt.x;
545  y_delta = end_pt.y - start_pt.y;
546  count = x_delta * x_step + 1;
547  for (int x = start_pt.x; x != end_pt.x; x += x_step) {
548  int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
549  total += GET_DATA_BYTE(data + wpl * y, x);
550  }
551  } else {
552  // Vertical line. Add the offset horizontally.
553  int y_step = y_delta > 0 ? 1 : -1;
554  // Correct offset for rotation, keeping it anti-clockwise of the delta.
555  // Pix holds the image with y=0 at the top, so the offset is negated.
556  offset *= -y_step;
557  start_pt.x += offset;
558  end_pt.x += offset;
559  TruncateToImageBounds(&start_pt);
560  TruncateToImageBounds(&end_pt);
561  x_delta = end_pt.x - start_pt.x;
562  y_delta = end_pt.y - start_pt.y;
563  count = y_delta * y_step + 1;
564  for (int y = start_pt.y; y != end_pt.y; y += y_step) {
565  int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
566  total += GET_DATA_BYTE(data + wpl * y, x);
567  }
568  }
569  return DivRounded(total, count);
570 }
571 
572 // Given an input pix, and a box, the sides of the box are shrunk inwards until
573 // they bound any black pixels found within the original box.
574 // The function converts between tesseract coords and the pix coords assuming
575 // that this pix is full resolution equal in size to the original image.
576 // Returns an empty box if there are no black pixels in the source box.
577 static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
578  int im_height = pixGetHeight(pix);
579  Box* input_box = boxCreate(box.left(), im_height - box.top(),
580  box.width(), box.height());
581  Box* output_box = nullptr;
582  pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
583  TBOX result_box;
584  if (output_box != nullptr) {
585  l_int32 x, y, width, height;
586  boxGetGeometry(output_box, &x, &y, &width, &height);
587  result_box.set_left(x);
588  result_box.set_right(x + width);
589  result_box.set_top(im_height - y);
590  result_box.set_bottom(result_box.top() - height);
591  boxDestroy(&output_box);
592  }
593  boxDestroy(&input_box);
594  return result_box;
595 }
596 
597 // Splits the given box in half at x_middle or y_middle according to split_on_x
598 // and checks for nontext_map pixels in each half. Reduces the bbox so that it
599 // still includes the middle point, but does not touch any fg pixels in
600 // nontext_map. An empty box may be returned if there is no such box.
601 static void TruncateBoxToMissNonText(int x_middle, int y_middle,
602  bool split_on_x, Pix* nontext_map,
603  TBOX* bbox) {
604  TBOX box1(*bbox);
605  TBOX box2(*bbox);
606  TBOX im_box;
607  if (split_on_x) {
608  box1.set_right(x_middle);
609  im_box = BoundsWithinBox(nontext_map, box1);
610  if (!im_box.null_box()) box1.set_left(im_box.right());
611  box2.set_left(x_middle);
612  im_box = BoundsWithinBox(nontext_map, box2);
613  if (!im_box.null_box()) box2.set_right(im_box.left());
614  } else {
615  box1.set_bottom(y_middle);
616  im_box = BoundsWithinBox(nontext_map, box1);
617  if (!im_box.null_box()) box1.set_top(im_box.bottom());
618  box2.set_top(y_middle);
619  im_box = BoundsWithinBox(nontext_map, box2);
620  if (!im_box.null_box()) box2.set_bottom(im_box.top());
621  }
622  box1 += box2;
623  *bbox = box1;
624 }
625 
626 
627 // Helper function to add 1 to a rectangle in source image coords to the
628 // internal projection pix_.
629 void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
630  int scaled_left = ImageXToProjectionX(box.left());
631  int scaled_top = ImageYToProjectionY(box.top());
632  int scaled_right = ImageXToProjectionX(box.right());
633  int scaled_bottom = ImageYToProjectionY(box.bottom());
634  int wpl = pixGetWpl(pix_);
635  uint32_t* data = pixGetData(pix_) + scaled_top * wpl;
636  for (int y = scaled_top; y <= scaled_bottom; ++y) {
637  for (int x = scaled_left; x <= scaled_right; ++x) {
638  int pixel = GET_DATA_BYTE(data, x);
639  if (pixel < 255)
640  SET_DATA_BYTE(data, x, pixel + 1);
641  }
642  data += wpl;
643  }
644 }
645 
646 // Inserts a list of blobs into the projection.
647 // Rotation is a multiple of 90 degrees to get from blob coords to
648 // nontext_map coords, nontext_map_box is the bounds of the nontext_map.
649 // Blobs are spread horizontally or vertically according to their internal
650 // flags, but the spreading is truncated by set pixels in the nontext_map
651 // and also by the horizontal rule line limits on the blobs.
652 void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
653  const FCOORD& rotation,
654  const TBOX& nontext_map_box,
655  Pix* nontext_map) {
656  BLOBNBOX_IT blob_it(blobs);
657  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
658  BLOBNBOX* blob = blob_it.data();
659  TBOX bbox = blob->bounding_box();
660  ICOORD middle((bbox.left() + bbox.right()) / 2,
661  (bbox.bottom() + bbox.top()) / 2);
662  bool spreading_horizontally = PadBlobBox(blob, &bbox);
663  // Rotate to match the nontext_map.
664  bbox.rotate(rotation);
665  middle.rotate(rotation);
666  if (rotation.x() == 0.0f)
667  spreading_horizontally = !spreading_horizontally;
668  // Clip to the image before applying the increments.
669  bbox &= nontext_map_box; // This is in-place box intersection.
670  // Check for image pixels before spreading.
671  TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
672  nontext_map, &bbox);
673  if (bbox.area() > 0) {
674  IncrementRectangle8Bit(bbox);
675  }
676  }
677 }
678 
679 // Pads the bounding box of the given blob according to whether it is on
680 // a horizontal or vertical text line, taking into account tab-stops near
681 // the blob. Returns true if padding was in the horizontal direction.
682 bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
683  // Determine which direction to spread.
684  // If text is well spaced out, it can be useful to pad perpendicular to
685  // the textline direction, so as to ensure diacritics get absorbed
686  // correctly, but if the text is tightly spaced, this will destroy the
687  // blank space between textlines in the projection map, and that would
688  // be very bad.
689  int pad_limit = scale_factor_ * kMinLineSpacingFactor;
690  int xpad = 0;
691  int ypad = 0;
692  bool padding_horizontally = false;
693  if (blob->UniquelyHorizontal()) {
694  xpad = bbox->height() * kOrientedPadFactor;
695  padding_horizontally = true;
696  // If the text appears to be very well spaced, pad the other direction by a
697  // single pixel in the projection profile space to help join diacritics to
698  // the textline.
699  if ((blob->neighbour(BND_ABOVE) == nullptr ||
700  bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
701  (blob->neighbour(BND_BELOW) == nullptr ||
702  bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
703  ypad = scale_factor_;
704  }
705  } else if (blob->UniquelyVertical()) {
706  ypad = bbox->width() * kOrientedPadFactor;
707  if ((blob->neighbour(BND_LEFT) == nullptr ||
708  bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
709  (blob->neighbour(BND_RIGHT) == nullptr ||
710  bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
711  xpad = scale_factor_;
712  }
713  } else {
714  if ((blob->neighbour(BND_ABOVE) != nullptr &&
715  blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
716  (blob->neighbour(BND_BELOW) != nullptr &&
717  blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
718  ypad = bbox->width() * kDefaultPadFactor;
719  }
720  if ((blob->neighbour(BND_RIGHT) != nullptr &&
721  blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
722  (blob->neighbour(BND_LEFT) != nullptr &&
723  blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
724  xpad = bbox->height() * kDefaultPadFactor;
725  padding_horizontally = true;
726  }
727  }
728  bbox->pad(xpad, ypad);
729  pad_limit = scale_factor_ * kMaxTabStopOverrun;
730  // Now shrink horizontally to avoid stepping more than pad_limit over a
731  // tab-stop.
732  if (bbox->left() < blob->left_rule() - pad_limit) {
733  bbox->set_left(blob->left_rule() - pad_limit);
734  }
735  if (bbox->right() > blob->right_rule() + pad_limit) {
736  bbox->set_right(blob->right_rule() + pad_limit);
737  }
738  return padding_horizontally;
739 }
740 
741 // Helper denormalizes the TPOINT with the denorm if not nullptr, then
742 // converts to pix_ coordinates.
743 void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
744  TPOINT* pt) const {
745  if (denorm != nullptr) {
746  // Denormalize the point.
747  denorm->DenormTransform(nullptr, *pt, pt);
748  }
749  pt->x = ImageXToProjectionX(pt->x);
750  pt->y = ImageYToProjectionY(pt->y);
751 }
752 
753 #ifdef _MSC_VER
754 #pragma optimize("g", off)
755 #endif // _MSC_VER
756 // Helper truncates the TPOINT to be within the pix_.
757 void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
758  pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
759  pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
760 }
761 #ifdef _MSC_VER
762 #pragma optimize("", on)
763 #endif // _MSC_VER
764 
765 // Transform tesseract image coordinates to coordinates used in the projection.
766 int TextlineProjection::ImageXToProjectionX(int x) const {
767  x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
768  return x;
769 }
770 int TextlineProjection::ImageYToProjectionY(int y) const {
771  y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
772  return y;
773 }
774 
775 } // namespace tesseract.
int VerticalDistance(bool debug, int x, int y1, int y2) const
bool IsHorizontalType() const
Definition: colpartition.h:446
const int kParaPerpDistRatio
void rotate(const FCOORD &vec)
Definition: rect.h:197
void set_top(int y)
Definition: rect.h:61
bool IsSingleton() const
Definition: colpartition.h:362
void print() const
Definition: rect.h:278
bool null_box() const
Definition: rect.h:50
int y_gap(const TBOX &box) const
Definition: rect.h:233
void set_bottom(int y)
Definition: rect.h:68
int count(LIST var_list)
Definition: oldlist.cpp:98
Definition: rect.h:34
static bool WithinTestRegion(int detail_level, int x, int y)
int x_gap(const TBOX &box) const
Definition: rect.h:225
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
const int kMaxTabStopOverrun
static void Update()
Definition: scrollview.cpp:711
const int kDefaultPadFactor
void set_right(int x)
Definition: rect.h:82
int16_t width() const
Definition: rect.h:115
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
int16_t left() const
Definition: rect.h:72
int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm, bool debug) const
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:390
int16_t top() const
Definition: rect.h:58
bool UniquelyHorizontal() const
Definition: blobbox.h:414
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
integer coordinate
Definition: points.h:32
const int kMinLineSpacingFactor
int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const
bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
int IntCastRounded(double x)
Definition: helpers.h:168
int HorizontalDistance(bool debug, int x1, int x2, int y) const
const int kWrongWayPenalty
void Image(struct Pix *image, int x_pos, int y_pos)
Definition: scrollview.cpp:768
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void ClearNeighbours()
Definition: blobbox.h:511
int32_t area() const
Definition: rect.h:122
int16_t x
Definition: blobs.h:78
void set_left(int x)
Definition: rect.h:75
const TBOX & bounding_box() const
Definition: colpartition.h:110
Definition: points.h:189
const TBOX & bounding_box() const
Definition: blobbox.h:231
int left_rule() const
Definition: blobbox.h:314
const int kOrientedPadFactor
int16_t right() const
Definition: rect.h:79
float x() const
Definition: points.h:208
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:602
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:111
bool UniquelyVertical() const
Definition: blobbox.h:411
int DivRounded(int a, int b)
Definition: helpers.h:162
Definition: blobs.h:57
int16_t y
Definition: blobs.h:79
BLOBNBOX * neighbour(BlobNeighbourDir n) const
Definition: blobbox.h:371
void Pen(Color color)
Definition: scrollview.cpp:722
int16_t bottom() const
Definition: rect.h:65
int right_rule() const
Definition: blobbox.h:320
int16_t height() const
Definition: rect.h:108
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
void pad(int xpad, int ypad)
Definition: rect.h:131