tesseract  4.0.0-1-g2a2b
ccnontextdetect.cpp
Go to the documentation of this file.
1 // File: ccnontextdetect.cpp
3 // Description: Connected-Component-based photo (non-text) detection.
4 // Copyright 2011 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 // Created: Sat Jun 11 10:12:01 PST 2011
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config_auto.h"
22 #endif
23 
24 #include "ccnontextdetect.h"
25 #include "imagefind.h"
26 #include "strokewidth.h"
27 
28 namespace tesseract {
29 
30 // Max number of neighbour small objects per squared gridsize before a grid
31 // cell becomes image.
32 const double kMaxSmallNeighboursPerPix = 1.0 / 32;
33 // Max number of small blobs a large blob may overlap before it is rejected
34 // and determined to be image.
36 // Max number of small blobs a medium blob may overlap before it is rejected
37 // and determined to be image. Larger than for large blobs as medium blobs
38 // may be complex Chinese characters. Very large Chinese characters are going
39 // to overlap more medium blobs than small.
41 // Max number of normal blobs a large blob may overlap before it is rejected
42 // and determined to be image. This is set higher to allow for drop caps, which
43 // may overlap a lot of good text blobs.
45 // Multiplier of original noise_count used to test for the case of spreading
46 // noise beyond where it should really be.
47 const int kOriginalNoiseMultiple = 8;
48 // Pixel padding for noise blobs when rendering on the image
49 // mask to encourage them to join together. Make it too big and images
50 // will fatten out too much and have to be clipped to text.
51 const int kNoisePadding = 4;
52 // Fraction of max_noise_count_ to be added to the noise count if there is
53 // photo mask in the background.
54 const double kPhotoOffsetFraction = 0.375;
55 // Min ratio of perimeter^2/16area for a "good" blob in estimating noise
56 // density. Good blobs are supposed to be highly likely real text.
57 // We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
58 // of 16. Digital circles are weird and have a minimum ratio of pi/64, not
59 // the 1/(4pi) that you would expect.
60 const double kMinGoodTextPARatio = 1.5;
61 
63  const ICOORD& bleft, const ICOORD& tright)
64  : BlobGrid(gridsize, bleft, tright),
65  max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
66  gridsize * gridsize)),
67  noise_density_(nullptr) {
68  // TODO(rays) break max_noise_count_ out into an area-proportional
69  // value, as now plus an additive constant for the number of text blobs
70  // in the 3x3 neighbourhood - maybe 9.
71 }
72 
74  delete noise_density_;
75 }
76 
77 // Creates and returns a Pix with the same resolution as the original
78 // in which 1 (black) pixels represent likely non text (photo, line drawing)
79 // areas of the page, deleting from the blob_block the blobs that were
80 // determined to be non-text.
81 // The photo_map is used to bias the decision towards non-text, rather than
82 // supplying definite decision.
83 // The blob_block is the usual result of connected component analysis,
84 // holding the detected blobs.
85 // The returned Pix should be PixDestroyed after use.
86 Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
87  TO_BLOCK* blob_block) {
88  // Insert the smallest blobs into the grid.
89  InsertBlobList(&blob_block->small_blobs);
90  InsertBlobList(&blob_block->noise_blobs);
91  // Add the medium blobs that don't have a good strokewidth neighbour.
92  // Those that do go into good_grid as an antidote to spreading beyond the
93  // real reaches of a noise region.
94  BlobGrid good_grid(gridsize(), bleft(), tright());
95  BLOBNBOX_IT blob_it(&blob_block->blobs);
96  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
97  BLOBNBOX* blob = blob_it.data();
98  double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
99  perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
100  if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
101  InsertBBox(true, true, blob);
102  else
103  good_grid.InsertBBox(true, true, blob);
104  }
105  noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
106  good_grid.Clear(); // Not needed any more.
107  Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
108  if (debug) {
109  pixWrite("junknoisemask.png", pix, IFF_PNG);
110  }
111  ScrollView* win = nullptr;
112  #ifndef GRAPHICS_DISABLED
113  if (debug) {
114  win = MakeWindow(0, 400, "Photo Mask Blobs");
115  }
116  #endif // GRAPHICS_DISABLED
117  // Large and medium blobs are not text if they overlap with "a lot" of small
118  // blobs.
119  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
121  win, ScrollView::DARK_GREEN, pix);
122  MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
123  win, ScrollView::WHITE, pix);
124  // Clear the grid of small blobs and insert the medium blobs.
125  Clear();
126  InsertBlobList(&blob_block->blobs);
127  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
129  win, ScrollView::DARK_GREEN, pix);
130  // Clear again before we start deleting the blobs in the grid.
131  Clear();
132  MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
133  win, ScrollView::CORAL, pix);
134  MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
135  win, ScrollView::GOLDENROD, pix);
136  MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
137  win, ScrollView::WHITE, pix);
138  if (debug) {
139  #ifndef GRAPHICS_DISABLED
140  win->Update();
141  #endif // GRAPHICS_DISABLED
142  pixWrite("junkccphotomask.png", pix, IFF_PNG);
143  #ifndef GRAPHICS_DISABLED
144  delete win->AwaitEvent(SVET_DESTROY);
145  delete win;
146  #endif // GRAPHICS_DISABLED
147  }
148  return pix;
149 }
150 
151 // Computes and returns the noise_density IntGrid, at the same gridsize as
152 // this by summing the number of small elements in a 3x3 neighbourhood of
153 // each grid cell. good_grid is filled with blobs that are considered most
154 // likely good text, and this is filled with small and medium blobs that are
155 // more likely non-text.
156 // The photo_map is used to bias the decision towards non-text, rather than
157 // supplying definite decision.
158 IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
159  BlobGrid* good_grid) {
160  IntGrid* noise_counts = CountCellElements();
161  IntGrid* noise_density = noise_counts->NeighbourhoodSum();
162  IntGrid* good_counts = good_grid->CountCellElements();
163  // Now increase noise density in photo areas, to bias the decision and
164  // minimize hallucinated text on image, but trim the noise_density where
165  // there are good blobs and the original count is low in non-photo areas,
166  // indicating that most of the result came from neighbouring cells.
167  int height = pixGetHeight(photo_map);
168  int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
169  for (int y = 0; y < gridheight(); ++y) {
170  for (int x = 0; x < gridwidth(); ++x) {
171  int noise = noise_density->GridCellValue(x, y);
172  if (max_noise_count_ < noise + photo_offset &&
173  noise <= max_noise_count_) {
174  // Test for photo.
175  int left = x * gridsize();
176  int right = left + gridsize();
177  int bottom = height - y * gridsize();
178  int top = bottom - gridsize();
179  if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
180  &bottom)) {
181  noise_density->SetGridCell(x, y, noise + photo_offset);
182  }
183  }
184  if (debug && noise > max_noise_count_ &&
185  good_counts->GridCellValue(x, y) > 0) {
186  tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
187  x * gridsize(), y * gridsize(),
188  noise_density->GridCellValue(x, y),
189  good_counts->GridCellValue(x, y),
190  noise_counts->GridCellValue(x, y), max_noise_count_);
191  }
192  if (noise > max_noise_count_ &&
193  good_counts->GridCellValue(x, y) > 0 &&
194  noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
195  max_noise_count_) {
196  noise_density->SetGridCell(x, y, 0);
197  }
198  }
199  }
200  delete noise_counts;
201  delete good_counts;
202  return noise_density;
203 }
204 
205 // Helper to expand a box in one of the 4 directions by the given pad,
206 // provided it does not expand into any cell with a zero noise density.
207 // If that is not possible, try expanding all round by a small constant.
208 static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
209  int pad) {
210  TBOX expanded_box(box);
211  expanded_box.set_right(box.right() + pad);
212  if (!noise_density.AnyZeroInRect(expanded_box))
213  return expanded_box;
214  expanded_box = box;
215  expanded_box.set_left(box.left() - pad);
216  if (!noise_density.AnyZeroInRect(expanded_box))
217  return expanded_box;
218  expanded_box = box;
219  expanded_box.set_top(box.top() + pad);
220  if (!noise_density.AnyZeroInRect(expanded_box))
221  return expanded_box;
222  expanded_box = box;
223  expanded_box.set_bottom(box.bottom() + pad);
224  if (!noise_density.AnyZeroInRect(expanded_box))
225  return expanded_box;
226  expanded_box = box;
227  expanded_box.pad(kNoisePadding, kNoisePadding);
228  if (!noise_density.AnyZeroInRect(expanded_box))
229  return expanded_box;
230  return box;
231 }
232 
233 // Tests each blob in the list to see if it is certain non-text using 2
234 // conditions:
235 // 1. blob overlaps a cell with high value in noise_density_ (previously set
236 // by ComputeNoiseDensity).
237 // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
238 // condition is disabled with max_blob_overlaps == -1.
239 // If it does, the blob is declared non-text, and is used to mark up the
240 // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
241 // neighbours reset, as they may now point to deleted data.
242 // WARNING: The blobs list blobs may be in the *this grid, but they are
243 // not removed. If any deleted blobs might be in *this, then this must be
244 // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
245 // If the win is not nullptr, deleted blobs are drawn on it in red, and kept
246 // blobs are drawn on it in ok_color.
247 void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
248  int max_blob_overlaps,
249  ScrollView* win,
250  ScrollView::Color ok_color,
251  Pix* nontext_mask) {
252  int imageheight = tright().y() - bleft().x();
253  BLOBNBOX_IT blob_it(blobs);
254  BLOBNBOX_LIST dead_blobs;
255  BLOBNBOX_IT dead_it(&dead_blobs);
256  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
257  BLOBNBOX* blob = blob_it.data();
258  TBOX box = blob->bounding_box();
259  if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
260  (max_blob_overlaps < 0 ||
261  !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
262  blob->ClearNeighbours();
263  #ifndef GRAPHICS_DISABLED
264  if (win != nullptr)
265  blob->plot(win, ok_color, ok_color);
266  #endif // GRAPHICS_DISABLED
267  } else {
268  if (noise_density_->AnyZeroInRect(box)) {
269  // There is a danger that the bounding box may overlap real text, so
270  // we need to render the outline.
271  Pix* blob_pix = blob->cblob()->render_outline();
272  pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
273  box.width(), box.height(), PIX_SRC | PIX_DST,
274  blob_pix, 0, 0);
275  pixDestroy(&blob_pix);
276  } else {
277  if (box.area() < gridsize() * gridsize()) {
278  // It is a really bad idea to make lots of small components in the
279  // photo mask, so try to join it to a bigger area by expanding the
280  // box in a way that does not touch any zero noise density cell.
281  box = AttemptBoxExpansion(box, *noise_density_, gridsize());
282  }
283  // All overlapped cells are non-zero, so just mark the rectangle.
284  pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
285  box.width(), box.height(), PIX_SET, nullptr, 0, 0);
286  }
287  #ifndef GRAPHICS_DISABLED
288  if (win != nullptr)
289  blob->plot(win, ScrollView::RED, ScrollView::RED);
290  #endif // GRAPHICS_DISABLED
291  // It is safe to delete the cblob now, as it isn't used by the grid
292  // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
293  // dead_blobs list.
294  // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
295  // the cblob.
296  delete blob->cblob();
297  dead_it.add_to_end(blob_it.extract());
298  }
299  }
300 }
301 
302 // Returns true if the given blob overlaps more than max_overlaps blobs
303 // in the current grid.
304 bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
305  // Search the grid to see what intersects it.
306  // Setup a Rectangle search for overlapping this blob.
307  BlobGridSearch rsearch(this);
308  const TBOX& box = blob->bounding_box();
309  rsearch.StartRectSearch(box);
310  rsearch.SetUniqueMode(true);
311  BLOBNBOX* neighbour;
312  int overlap_count = 0;
313  while (overlap_count <= max_overlaps &&
314  (neighbour = rsearch.NextRectSearch()) != nullptr) {
315  if (box.major_overlap(neighbour->bounding_box())) {
316  ++overlap_count;
317  if (overlap_count > max_overlaps)
318  return true;
319  }
320  }
321  return false;
322 }
323 
324 } // namespace tesseract.
const int kMaxLargeOverlapsWithMedium
Pix * render_outline()
Definition: stepblob.cpp:524
void InsertBlobList(BLOBNBOX_LIST *blobs)
Definition: blobgrid.cpp:36
int GoodTextBlob() const
Definition: blobbox.cpp:227
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:591
int gridsize() const
Definition: bbgrid.h:64
void set_top(int y)
Definition: rect.h:61
void set_bottom(int y)
Definition: rect.h:68
const ICOORD & bleft() const
Definition: bbgrid.h:73
CCNonTextDetect(int gridsize, const ICOORD &bleft, const ICOORD &tright)
int16_t y() const
access_function
Definition: points.h:57
const int kMaxMediumOverlapsWithSmall
int32_t enclosed_area() const
Definition: blobbox.h:254
Definition: rect.h:34
const int kNoisePadding
void plot(ScrollView *window, ScrollView::Color blob_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:486
const double kMinGoodTextPARatio
int GridCellValue(int grid_x, int grid_y) const
Definition: bbgrid.h:121
static void Update()
Definition: scrollview.cpp:711
void Clear()
Definition: bbgrid.h:457
int32_t perimeter()
Definition: stepblob.cpp:294
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:445
bool AnyZeroInRect(const TBOX &rect) const
Definition: bbgrid.cpp:175
const int kMaxLargeOverlapsWithSmall
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
const int kOriginalNoiseMultiple
bool RectMostlyOverThreshold(const TBOX &rect, int threshold) const
Definition: bbgrid.cpp:155
bool major_overlap(const TBOX &box) const
Definition: rect.h:368
int IntCastRounded(double x)
Definition: helpers.h:168
void InsertBBox(bool h_spread, bool v_spread, BLOBNBOX *bbox)
Definition: bbgrid.h:488
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void SetGridCell(int grid_x, int grid_y, int value)
Definition: bbgrid.h:125
void ClearNeighbours()
Definition: blobbox.h:511
int32_t area() const
Definition: rect.h:122
const double kPhotoOffsetFraction
void set_left(int x)
Definition: rect.h:75
IntGrid * NeighbourhoodSum() const
Definition: bbgrid.cpp:133
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
BLOBNBOX_LIST blobs
Definition: blobbox.h:785
static bool BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end)
Definition: imagefind.cpp:333
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:789
Pix * ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block)
int gridwidth() const
Definition: bbgrid.h:67
int16_t bottom() const
Definition: rect.h:65
int16_t height() const
Definition: rect.h:108
C_BLOB * cblob() const
Definition: blobbox.h:269
void pad(int xpad, int ypad)
Definition: rect.h:131
int gridheight() const
Definition: bbgrid.h:70
const double kMaxSmallNeighboursPerPix
Pix * ThresholdToPix(int threshold) const
Definition: bbgrid.cpp:191
const ICOORD & tright() const
Definition: bbgrid.h:76
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:788
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:787