tesseract  5.0.0-alpha-619-ge9db
ccnontextdetect.cpp
Go to the documentation of this file.
1 // File: ccnontextdetect.cpp
3 // Description: Connected-Component-based photo (non-text) detection.
4 // Author: rays@google.com (Ray Smith)
5 //
6 // Copyright 2011 Google Inc. All Rights Reserved.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config_auto.h"
21 #endif
22 
23 #include "ccnontextdetect.h"
24 #include "imagefind.h"
25 #include "strokewidth.h"
26 
27 namespace tesseract {
28 
29 // Max number of neighbour small objects per squared gridsize before a grid
30 // cell becomes image.
31 const double kMaxSmallNeighboursPerPix = 1.0 / 32;
32 // Max number of small blobs a large blob may overlap before it is rejected
33 // and determined to be image.
35 // Max number of small blobs a medium blob may overlap before it is rejected
36 // and determined to be image. Larger than for large blobs as medium blobs
37 // may be complex Chinese characters. Very large Chinese characters are going
38 // to overlap more medium blobs than small.
40 // Max number of normal blobs a large blob may overlap before it is rejected
41 // and determined to be image. This is set higher to allow for drop caps, which
42 // may overlap a lot of good text blobs.
44 // Multiplier of original noise_count used to test for the case of spreading
45 // noise beyond where it should really be.
46 const int kOriginalNoiseMultiple = 8;
47 // Pixel padding for noise blobs when rendering on the image
48 // mask to encourage them to join together. Make it too big and images
49 // will fatten out too much and have to be clipped to text.
50 const int kNoisePadding = 4;
51 // Fraction of max_noise_count_ to be added to the noise count if there is
52 // photo mask in the background.
53 const double kPhotoOffsetFraction = 0.375;
54 // Min ratio of perimeter^2/16area for a "good" blob in estimating noise
55 // density. Good blobs are supposed to be highly likely real text.
56 // We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
57 // of 16. Digital circles are weird and have a minimum ratio of pi/64, not
58 // the 1/(4pi) that you would expect.
59 const double kMinGoodTextPARatio = 1.5;
60 
62  const ICOORD& bleft, const ICOORD& tright)
63  : BlobGrid(gridsize, bleft, tright),
64  max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
65  gridsize * gridsize)),
66  noise_density_(nullptr) {
67  // TODO(rays) break max_noise_count_ out into an area-proportional
68  // value, as now plus an additive constant for the number of text blobs
69  // in the 3x3 neighbourhood - maybe 9.
70 }
71 
73  delete noise_density_;
74 }
75 
76 // Creates and returns a Pix with the same resolution as the original
77 // in which 1 (black) pixels represent likely non text (photo, line drawing)
78 // areas of the page, deleting from the blob_block the blobs that were
79 // determined to be non-text.
80 // The photo_map is used to bias the decision towards non-text, rather than
81 // supplying definite decision.
82 // The blob_block is the usual result of connected component analysis,
83 // holding the detected blobs.
84 // The returned Pix should be PixDestroyed after use.
85 Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
86  TO_BLOCK* blob_block) {
87  // Insert the smallest blobs into the grid.
88  InsertBlobList(&blob_block->small_blobs);
89  InsertBlobList(&blob_block->noise_blobs);
90  // Add the medium blobs that don't have a good strokewidth neighbour.
91  // Those that do go into good_grid as an antidote to spreading beyond the
92  // real reaches of a noise region.
93  BlobGrid good_grid(gridsize(), bleft(), tright());
94  BLOBNBOX_IT blob_it(&blob_block->blobs);
95  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
96  BLOBNBOX* blob = blob_it.data();
97  double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
98  perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
99  if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
100  InsertBBox(true, true, blob);
101  else
102  good_grid.InsertBBox(true, true, blob);
103  }
104  noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
105  good_grid.Clear(); // Not needed any more.
106  Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
107  if (debug) {
108  pixWrite("junknoisemask.png", pix, IFF_PNG);
109  }
110  ScrollView* win = nullptr;
111  #ifndef GRAPHICS_DISABLED
112  if (debug) {
113  win = MakeWindow(0, 400, "Photo Mask Blobs");
114  }
115  #endif // GRAPHICS_DISABLED
116  // Large and medium blobs are not text if they overlap with "a lot" of small
117  // blobs.
118  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
120  win, ScrollView::DARK_GREEN, pix);
121  MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
122  win, ScrollView::WHITE, pix);
123  // Clear the grid of small blobs and insert the medium blobs.
124  Clear();
125  InsertBlobList(&blob_block->blobs);
126  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
128  win, ScrollView::DARK_GREEN, pix);
129  // Clear again before we start deleting the blobs in the grid.
130  Clear();
131  MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
132  win, ScrollView::CORAL, pix);
133  MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
134  win, ScrollView::GOLDENROD, pix);
135  MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
136  win, ScrollView::WHITE, pix);
137  if (debug) {
138  #ifndef GRAPHICS_DISABLED
139  win->Update();
140  #endif // GRAPHICS_DISABLED
141  pixWrite("junkccphotomask.png", pix, IFF_PNG);
142  #ifndef GRAPHICS_DISABLED
143  delete win->AwaitEvent(SVET_DESTROY);
144  delete win;
145  #endif // GRAPHICS_DISABLED
146  }
147  return pix;
148 }
149 
150 // Computes and returns the noise_density IntGrid, at the same gridsize as
151 // this by summing the number of small elements in a 3x3 neighbourhood of
152 // each grid cell. good_grid is filled with blobs that are considered most
153 // likely good text, and this is filled with small and medium blobs that are
154 // more likely non-text.
155 // The photo_map is used to bias the decision towards non-text, rather than
156 // supplying definite decision.
157 IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
158  BlobGrid* good_grid) {
159  IntGrid* noise_counts = CountCellElements();
160  IntGrid* noise_density = noise_counts->NeighbourhoodSum();
161  IntGrid* good_counts = good_grid->CountCellElements();
162  // Now increase noise density in photo areas, to bias the decision and
163  // minimize hallucinated text on image, but trim the noise_density where
164  // there are good blobs and the original count is low in non-photo areas,
165  // indicating that most of the result came from neighbouring cells.
166  int height = pixGetHeight(photo_map);
167  int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
168  for (int y = 0; y < gridheight(); ++y) {
169  for (int x = 0; x < gridwidth(); ++x) {
170  int noise = noise_density->GridCellValue(x, y);
171  if (max_noise_count_ < noise + photo_offset &&
172  noise <= max_noise_count_) {
173  // Test for photo.
174  int left = x * gridsize();
175  int right = left + gridsize();
176  int bottom = height - y * gridsize();
177  int top = bottom - gridsize();
178  if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
179  &bottom)) {
180  noise_density->SetGridCell(x, y, noise + photo_offset);
181  }
182  }
183  if (debug && noise > max_noise_count_ &&
184  good_counts->GridCellValue(x, y) > 0) {
185  tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
186  x * gridsize(), y * gridsize(),
187  noise_density->GridCellValue(x, y),
188  good_counts->GridCellValue(x, y),
189  noise_counts->GridCellValue(x, y), max_noise_count_);
190  }
191  if (noise > max_noise_count_ &&
192  good_counts->GridCellValue(x, y) > 0 &&
193  noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
194  max_noise_count_) {
195  noise_density->SetGridCell(x, y, 0);
196  }
197  }
198  }
199  delete noise_counts;
200  delete good_counts;
201  return noise_density;
202 }
203 
204 // Helper to expand a box in one of the 4 directions by the given pad,
205 // provided it does not expand into any cell with a zero noise density.
206 // If that is not possible, try expanding all round by a small constant.
207 static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
208  int pad) {
209  TBOX expanded_box(box);
210  expanded_box.set_right(box.right() + pad);
211  if (!noise_density.AnyZeroInRect(expanded_box))
212  return expanded_box;
213  expanded_box = box;
214  expanded_box.set_left(box.left() - pad);
215  if (!noise_density.AnyZeroInRect(expanded_box))
216  return expanded_box;
217  expanded_box = box;
218  expanded_box.set_top(box.top() + pad);
219  if (!noise_density.AnyZeroInRect(expanded_box))
220  return expanded_box;
221  expanded_box = box;
222  expanded_box.set_bottom(box.bottom() + pad);
223  if (!noise_density.AnyZeroInRect(expanded_box))
224  return expanded_box;
225  expanded_box = box;
226  expanded_box.pad(kNoisePadding, kNoisePadding);
227  if (!noise_density.AnyZeroInRect(expanded_box))
228  return expanded_box;
229  return box;
230 }
231 
232 // Tests each blob in the list to see if it is certain non-text using 2
233 // conditions:
234 // 1. blob overlaps a cell with high value in noise_density_ (previously set
235 // by ComputeNoiseDensity).
236 // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
237 // condition is disabled with max_blob_overlaps == -1.
238 // If it does, the blob is declared non-text, and is used to mark up the
239 // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
240 // neighbours reset, as they may now point to deleted data.
241 // WARNING: The blobs list blobs may be in the *this grid, but they are
242 // not removed. If any deleted blobs might be in *this, then this must be
243 // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
244 // If the win is not nullptr, deleted blobs are drawn on it in red, and kept
245 // blobs are drawn on it in ok_color.
246 void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
247  int max_blob_overlaps,
248  ScrollView* win,
249  ScrollView::Color ok_color,
250  Pix* nontext_mask) {
251  int imageheight = tright().y() - bleft().x();
252  BLOBNBOX_IT blob_it(blobs);
253  BLOBNBOX_LIST dead_blobs;
254  BLOBNBOX_IT dead_it(&dead_blobs);
255  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
256  BLOBNBOX* blob = blob_it.data();
257  TBOX box = blob->bounding_box();
258  if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
259  (max_blob_overlaps < 0 ||
260  !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
261  blob->ClearNeighbours();
262  #ifndef GRAPHICS_DISABLED
263  if (win != nullptr)
264  blob->plot(win, ok_color, ok_color);
265  #endif // GRAPHICS_DISABLED
266  } else {
267  if (noise_density_->AnyZeroInRect(box)) {
268  // There is a danger that the bounding box may overlap real text, so
269  // we need to render the outline.
270  Pix* blob_pix = blob->cblob()->render_outline();
271  pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
272  box.width(), box.height(), PIX_SRC | PIX_DST,
273  blob_pix, 0, 0);
274  pixDestroy(&blob_pix);
275  } else {
276  if (box.area() < gridsize() * gridsize()) {
277  // It is a really bad idea to make lots of small components in the
278  // photo mask, so try to join it to a bigger area by expanding the
279  // box in a way that does not touch any zero noise density cell.
280  box = AttemptBoxExpansion(box, *noise_density_, gridsize());
281  }
282  // All overlapped cells are non-zero, so just mark the rectangle.
283  pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
284  box.width(), box.height(), PIX_SET, nullptr, 0, 0);
285  }
286  #ifndef GRAPHICS_DISABLED
287  if (win != nullptr)
288  blob->plot(win, ScrollView::RED, ScrollView::RED);
289  #endif // GRAPHICS_DISABLED
290  // It is safe to delete the cblob now, as it isn't used by the grid
291  // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
292  // dead_blobs list.
293  // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
294  // the cblob.
295  delete blob->cblob();
296  dead_it.add_to_end(blob_it.extract());
297  }
298  }
299 }
300 
301 // Returns true if the given blob overlaps more than max_overlaps blobs
302 // in the current grid.
303 bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
304  // Search the grid to see what intersects it.
305  // Setup a Rectangle search for overlapping this blob.
306  BlobGridSearch rsearch(this);
307  const TBOX& box = blob->bounding_box();
308  rsearch.StartRectSearch(box);
309  rsearch.SetUniqueMode(true);
310  BLOBNBOX* neighbour;
311  int overlap_count = 0;
312  while (overlap_count <= max_overlaps &&
313  (neighbour = rsearch.NextRectSearch()) != nullptr) {
314  if (box.major_overlap(neighbour->bounding_box())) {
315  ++overlap_count;
316  if (overlap_count > max_overlaps)
317  return true;
318  }
319  }
320  return false;
321 }
322 
323 } // namespace tesseract.
BLOBNBOX::ClearNeighbours
void ClearNeighbours()
Definition: blobbox.h:498
tesseract::ImageFind::BoundsWithinRect
static bool BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end)
Definition: imagefind.cpp:332
TO_BLOCK::small_blobs
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:774
ScrollView
Definition: scrollview.h:97
SVET_DESTROY
Definition: scrollview.h:45
C_BLOB::perimeter
int32_t perimeter()
Definition: stepblob.cpp:284
ScrollView::DARK_GREEN
Definition: scrollview.h:124
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::InsertBBox
void InsertBBox(bool h_spread, bool v_spread, BLOBNBOX *bbox)
Definition: bbgrid.h:486
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::CountCellElements
IntGrid * CountCellElements()
Definition: bbgrid.h:561
tesseract::kNoisePadding
const int kNoisePadding
Definition: ccnontextdetect.cpp:50
tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >::MakeWindow
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
TO_BLOCK::noise_blobs
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:773
BLOBNBOX::plot
void plot(ScrollView *window, ScrollView::Color blob_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:483
tesseract::kMaxSmallNeighboursPerPix
const double kMaxSmallNeighboursPerPix
Definition: ccnontextdetect.cpp:31
tesseract::IntGrid
Definition: bbgrid.h:97
ICOORD
integer coordinate
Definition: points.h:30
tesseract::GridBase::gridwidth
int gridwidth() const
Definition: bbgrid.h:66
tesseract::CCNonTextDetect::CCNonTextDetect
CCNonTextDetect(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: ccnontextdetect.cpp:61
tesseract::IntGrid::NeighbourhoodSum
IntGrid * NeighbourhoodSum() const
Definition: bbgrid.cpp:132
tesseract::kMaxLargeOverlapsWithMedium
const int kMaxLargeOverlapsWithMedium
Definition: ccnontextdetect.cpp:43
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::BBGrid::Clear
void Clear()
Definition: bbgrid.h:455
TBOX::area
int32_t area() const
Definition: rect.h:121
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
tesseract::BlobGrid
Definition: blobgrid.h:33
TO_BLOCK
Definition: blobbox.h:691
TBOX::set_top
void set_top(int y)
Definition: rect.h:60
tesseract::IntGrid::RectMostlyOverThreshold
bool RectMostlyOverThreshold(const TBOX &rect, int threshold) const
Definition: bbgrid.cpp:154
IntCastRounded
int IntCastRounded(double x)
Definition: helpers.h:173
tesseract::CCNonTextDetect::~CCNonTextDetect
~CCNonTextDetect() override
Definition: ccnontextdetect.cpp:72
tesseract::kMaxLargeOverlapsWithSmall
const int kMaxLargeOverlapsWithSmall
Definition: ccnontextdetect.cpp:34
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
BLOBNBOX
Definition: blobbox.h:142
tesseract::GridBase::tright
const ICOORD & tright() const
Definition: bbgrid.h:75
TBOX::height
int16_t height() const
Definition: rect.h:107
tesseract::kMinGoodTextPARatio
const double kMinGoodTextPARatio
Definition: ccnontextdetect.cpp:59
BLOBNBOX::GoodTextBlob
int GoodTextBlob() const
Definition: blobbox.cpp:224
strokewidth.h
TO_BLOCK::large_blobs
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:775
tesseract::kOriginalNoiseMultiple
const int kOriginalNoiseMultiple
Definition: ccnontextdetect.cpp:46
tesstrain_utils.int
int
Definition: tesstrain_utils.py:154
TBOX::width
int16_t width() const
Definition: rect.h:114
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
ScrollView::WHITE
Definition: scrollview.h:103
tesseract
Definition: baseapi.h:65
ScrollView::RED
Definition: scrollview.h:104
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
tesseract::IntGrid::GridCellValue
int GridCellValue(int grid_x, int grid_y) const
Definition: bbgrid.h:120
ScrollView::AwaitEvent
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:443
tesseract::GridBase::gridsize
int gridsize() const
Definition: bbgrid.h:63
ScrollView::GOLDENROD
Definition: scrollview.h:123
TBOX::pad
void pad(int xpad, int ypad)
Definition: rect.h:130
BLOBNBOX::enclosed_area
int32_t enclosed_area() const
Definition: blobbox.h:252
TBOX::major_overlap
bool major_overlap(const TBOX &box) const
Definition: rect.h:362
imagefind.h
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::CCNonTextDetect::ComputeNonTextMask
Pix * ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block)
Definition: ccnontextdetect.cpp:85
TBOX::right
int16_t right() const
Definition: rect.h:78
ccnontextdetect.h
tesseract::BlobGrid::InsertBlobList
void InsertBlobList(BLOBNBOX_LIST *blobs)
Definition: blobgrid.cpp:36
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
ScrollView::Update
static void Update()
Definition: scrollview.cpp:708
tesseract::IntGrid::SetGridCell
void SetGridCell(int grid_x, int grid_y, int value)
Definition: bbgrid.h:124
tesseract::kPhotoOffsetFraction
const double kPhotoOffsetFraction
Definition: ccnontextdetect.cpp:53
C_BLOB::render_outline
Pix * render_outline()
Definition: stepblob.cpp:510
ScrollView::CORAL
Definition: scrollview.h:119
TBOX::set_bottom
void set_bottom(int y)
Definition: rect.h:67
ScrollView::Color
Color
Definition: scrollview.h:100
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
tesseract::BlobGridSearch
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
tesseract::kMaxMediumOverlapsWithSmall
const int kMaxMediumOverlapsWithSmall
Definition: ccnontextdetect.cpp:39
tesseract::GridBase::gridheight
int gridheight() const
Definition: bbgrid.h:69
tesseract::IntGrid::AnyZeroInRect
bool AnyZeroInRect(const TBOX &rect) const
Definition: bbgrid.cpp:174
tesseract::GridBase::bleft
const ICOORD & bleft() const
Definition: bbgrid.h:72
TBOX::set_left
void set_left(int x)
Definition: rect.h:74
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
tesseract::IntGrid::ThresholdToPix
Pix * ThresholdToPix(int threshold) const
Definition: bbgrid.cpp:190
TBOX
Definition: rect.h:33