tesseract  5.0.0-alpha-619-ge9db
thresholder.cpp
Go to the documentation of this file.
1 // File: thresholder.cpp
3 // Description: Base API for thresholding images in tesseract.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2008, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "allheaders.h"
20 
21 #include <tesseract/thresholder.h>
22 
23 #include <cstdint> // for uint32_t
24 #include <cstring>
25 
26 #include "otsuthr.h"
27 #include "tprintf.h" // for tprintf
28 
29 #if defined(USE_OPENCL)
30 #include "openclwrapper.h" // for OpenclDevice
31 #endif
32 
33 namespace tesseract {
34 
36  : pix_(nullptr),
37  image_width_(0), image_height_(0),
38  pix_channels_(0), pix_wpl_(0),
39  scale_(1), yres_(300), estimated_res_(300) {
40  SetRectangle(0, 0, 0, 0);
41 }
42 
44  Clear();
45 }
46 
47 // Destroy the Pix if there is one, freeing memory.
49  pixDestroy(&pix_);
50 }
51 
52 // Return true if no image has been set.
54  return pix_ == nullptr;
55 }
56 
57 // SetImage makes a copy of all the image data, so it may be deleted
58 // immediately after this call.
59 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
60 // Palette color images will not work properly and must be converted to
61 // 24 bit.
62 // Binary images of 1 bit per pixel may also be given but they must be
63 // byte packed with the MSB of the first byte being the first pixel, and a
64 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
65 void ImageThresholder::SetImage(const unsigned char* imagedata,
66  int width, int height,
67  int bytes_per_pixel, int bytes_per_line) {
68  int bpp = bytes_per_pixel * 8;
69  if (bpp == 0) bpp = 1;
70  Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
71  l_uint32* data = pixGetData(pix);
72  int wpl = pixGetWpl(pix);
73  switch (bpp) {
74  case 1:
75  for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
76  for (int x = 0; x < width; ++x) {
77  if (imagedata[x / 8] & (0x80 >> (x % 8)))
78  CLEAR_DATA_BIT(data, x);
79  else
80  SET_DATA_BIT(data, x);
81  }
82  }
83  break;
84 
85  case 8:
86  // Greyscale just copies the bytes in the right order.
87  for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
88  for (int x = 0; x < width; ++x)
89  SET_DATA_BYTE(data, x, imagedata[x]);
90  }
91  break;
92 
93  case 24:
94  // Put the colors in the correct places in the line buffer.
95  for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
96  for (int x = 0; x < width; ++x, ++data) {
97  SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
98  SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
99  SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
100  }
101  }
102  break;
103 
104  case 32:
105  // Maintain byte order consistency across different endianness.
106  for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
107  for (int x = 0; x < width; ++x) {
108  data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
109  (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
110  }
111  }
112  break;
113 
114  default:
115  tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
116  }
117  SetImage(pix);
118  pixDestroy(&pix);
119 }
120 
121 // Store the coordinates of the rectangle to process for later use.
122 // Doesn't actually do any thresholding.
123 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
124  rect_left_ = left;
125  rect_top_ = top;
126  rect_width_ = width;
127  rect_height_ = height;
128 }
129 
130 // Get enough parameters to be able to rebuild bounding boxes in the
131 // original image (not just within the rectangle).
132 // Left and top are enough with top-down coordinates, but
133 // the height of the rectangle and the image are needed for bottom-up.
134 void ImageThresholder::GetImageSizes(int* left, int* top,
135  int* width, int* height,
136  int* imagewidth, int* imageheight) {
137  *left = rect_left_;
138  *top = rect_top_;
139  *width = rect_width_;
140  *height = rect_height_;
141  *imagewidth = image_width_;
142  *imageheight = image_height_;
143 }
144 
145 // Pix vs raw, which to use? Pix is the preferred input for efficiency,
146 // since raw buffers are copied.
147 // SetImage for Pix clones its input, so the source pix may be pixDestroyed
148 // immediately after, but may not go away until after the Thresholder has
149 // finished with it.
150 void ImageThresholder::SetImage(const Pix* pix) {
151  if (pix_ != nullptr)
152  pixDestroy(&pix_);
153  Pix* src = const_cast<Pix*>(pix);
154  int depth;
155  pixGetDimensions(src, &image_width_, &image_height_, &depth);
156  // Convert the image as necessary so it is one of binary, plain RGB, or
157  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
158  // not just a clone of the input.
159  if (pixGetColormap(src)) {
160  Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
161  depth = pixGetDepth(tmp);
162  if (depth > 1 && depth < 8) {
163  pix_ = pixConvertTo8(tmp, false);
164  pixDestroy(&tmp);
165  } else {
166  pix_ = tmp;
167  }
168  } else if (depth > 1 && depth < 8) {
169  pix_ = pixConvertTo8(src, false);
170  } else {
171  pix_ = pixCopy(nullptr, src);
172  }
173  depth = pixGetDepth(pix_);
174  pix_channels_ = depth / 8;
175  pix_wpl_ = pixGetWpl(pix_);
176  scale_ = 1;
177  estimated_res_ = yres_ = pixGetYRes(pix_);
178  Init();
179 }
180 
181 // Threshold the source image as efficiently as possible to the output Pix.
182 // Creates a Pix and sets pix to point to the resulting pointer.
183 // Caller must use pixDestroy to free the created Pix.
185 bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
186  if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
187  tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
188  return false;
189  }
190  if (pix_channels_ == 0) {
191  // We have a binary image, but it still has to be copied, as this API
192  // allows the caller to modify the output.
193  Pix* original = GetPixRect();
194  *pix = pixCopy(nullptr, original);
195  pixDestroy(&original);
196  } else {
198  }
199  return true;
200 }
201 
202 // Gets a pix that contains an 8 bit threshold value at each pixel. The
203 // returned pix may be an integer reduction of the binary image such that
204 // the scale factor may be inferred from the ratio of the sizes, even down
205 // to the extreme of a 1x1 pixel thresholds image.
206 // Ideally the 8 bit threshold should be the exact threshold used to generate
207 // the binary image in ThresholdToPix, but this is not a hard constraint.
208 // Returns nullptr if the input is binary. PixDestroy after use.
210  if (IsBinary()) return nullptr;
211  Pix* pix_grey = GetPixRectGrey();
212  int width = pixGetWidth(pix_grey);
213  int height = pixGetHeight(pix_grey);
214  int* thresholds;
215  int* hi_values;
216  OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
217  pixDestroy(&pix_grey);
218  Pix* pix_thresholds = pixCreate(width, height, 8);
219  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
220  pixSetAllArbitrary(pix_thresholds, threshold);
221  delete [] thresholds;
222  delete [] hi_values;
223  return pix_thresholds;
224 }
225 
226 // Common initialization shared between SetImage methods.
229 }
230 
231 // Get a clone/copy of the source image rectangle.
232 // The returned Pix must be pixDestroyed.
233 // This function will be used in the future by the page layout analysis, and
234 // the layout analysis that uses it will only be available with Leptonica,
235 // so there is no raw equivalent.
237  if (IsFullImage()) {
238  // Just clone the whole thing.
239  return pixClone(pix_);
240  } else {
241  // Crop to the given rectangle.
242  Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
243  Pix* cropped = pixClipRectangle(pix_, box, nullptr);
244  boxDestroy(&box);
245  return cropped;
246  }
247 }
248 
249 // Get a clone/copy of the source image rectangle, reduced to greyscale,
250 // and at the same resolution as the output binary.
251 // The returned Pix must be pixDestroyed.
252 // Provided to the classifier to extract features from the greyscale image.
254  Pix* pix = GetPixRect(); // May have to be reduced to grey.
255  int depth = pixGetDepth(pix);
256  if (depth != 8) {
257  Pix* result = depth < 8 ? pixConvertTo8(pix, false)
258  : pixConvertRGBToLuminance(pix);
259  pixDestroy(&pix);
260  return result;
261  }
262  return pix;
263 }
264 
265 // Otsu thresholds the rectangle, taking the rectangle from *this.
267  Pix** out_pix) const {
268  int* thresholds;
269  int* hi_values;
270 
271  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
272  rect_height_, &thresholds, &hi_values);
273  // only use opencl if compiled w/ OpenCL and selected device is opencl
274 #ifdef USE_OPENCL
275  OpenclDevice od;
276  if (num_channels == 4 &&
277  od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
278  od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
279  pixGetWpl(src_pix) * 4, thresholds, hi_values,
280  out_pix /*pix_OCL*/, rect_height_, rect_width_,
282  } else {
283 #endif
284  ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
285 #ifdef USE_OPENCL
286  }
287 #endif
288  delete [] thresholds;
289  delete [] hi_values;
290 }
291 
295 // arrays and also the bytes per pixel in src_pix.
297  int num_channels,
298  const int* thresholds,
299  const int* hi_values,
300  Pix** pix) const {
301  *pix = pixCreate(rect_width_, rect_height_, 1);
302  uint32_t* pixdata = pixGetData(*pix);
303  int wpl = pixGetWpl(*pix);
304  int src_wpl = pixGetWpl(src_pix);
305  uint32_t* srcdata = pixGetData(src_pix);
306  pixSetXRes(*pix, pixGetXRes(src_pix));
307  pixSetYRes(*pix, pixGetYRes(src_pix));
308  for (int y = 0; y < rect_height_; ++y) {
309  const uint32_t* linedata = srcdata + (y + rect_top_) * src_wpl;
310  uint32_t* pixline = pixdata + y * wpl;
311  for (int x = 0; x < rect_width_; ++x) {
312  bool white_result = true;
313  for (int ch = 0; ch < num_channels; ++ch) {
314  int pixel =
315  GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
316  if (hi_values[ch] >= 0 &&
317  (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
318  white_result = false;
319  break;
320  }
321  }
322  if (white_result)
323  CLEAR_DATA_BIT(pixline, x);
324  else
325  SET_DATA_BIT(pixline, x);
326  }
327  }
328 }
329 
330 } // namespace tesseract.
tesseract::ImageThresholder::GetImageSizes
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
Definition: thresholder.cpp:134
tesseract::ImageThresholder::pix_wpl_
int pix_wpl_
Words per line of pix_.
Definition: thresholder.h:175
tesseract::ImageThresholder::Init
virtual void Init()
Common initialization shared between SetImage methods.
Definition: thresholder.cpp:227
tesseract::ImageThresholder::rect_left_
int rect_left_
Definition: thresholder.h:180
tesseract::ImageThresholder::rect_height_
int rect_height_
Definition: thresholder.h:183
tesseract::ImageThresholder::IsFullImage
bool IsFullImage() const
Return true if we are processing the full image.
Definition: thresholder.h:152
tesseract::ImageThresholder::IsBinary
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:74
tesseract::ImageThresholder::GetPixRectThresholds
virtual Pix * GetPixRectThresholds()
Definition: thresholder.cpp:209
tesseract::ImageThresholder::image_height_
int image_height_
Height of source pix_.
Definition: thresholder.h:173
tesseract::ImageThresholder::image_width_
int image_width_
Width of source pix_.
Definition: thresholder.h:172
tesseract::ImageThresholder::ThresholdRectToPix
void ThresholdRectToPix(Pix *src_pix, int num_channels, const int *thresholds, const int *hi_values, Pix **pix) const
Definition: thresholder.cpp:296
tesseract::ImageThresholder::estimated_res_
int estimated_res_
Resolution estimate from text size.
Definition: thresholder.h:179
tesseract::ImageThresholder::pix_
Pix * pix_
Definition: thresholder.h:170
tesseract::ImageThresholder::rect_top_
int rect_top_
Definition: thresholder.h:181
tesseract::ImageThresholder::Clear
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:48
tesseract::ImageThresholder::pix_channels_
int pix_channels_
Number of 8-bit channels in pix_.
Definition: thresholder.h:174
openclwrapper.h
otsuthr.h
tesseract::ImageThresholder::yres_
int yres_
y pixels/inch in source image.
Definition: thresholder.h:178
tesseract
Definition: baseapi.h:65
tprintf.h
tesseract::ImageThresholder::GetPixRectGrey
virtual Pix * GetPixRectGrey()
Definition: thresholder.cpp:253
tesseract::ImageThresholder::IsEmpty
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:53
tesseract::OtsuThreshold
int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values)
Definition: otsuthr.cpp:56
thresholder.h
tesseract::ImageThresholder::SetImage
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:65
tesseract::PageSegMode
PageSegMode
Definition: publictypes.h:159
tesseract::ImageThresholder::ThresholdToPix
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
Returns false on error.
Definition: thresholder.cpp:185
tesseract::ImageThresholder::GetPixRect
Pix * GetPixRect()
Definition: thresholder.cpp:236
tesseract::ImageThresholder::OtsuThresholdRectToPix
void OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const
Definition: thresholder.cpp:266
tesseract::ImageThresholder::~ImageThresholder
virtual ~ImageThresholder()
Definition: thresholder.cpp:43
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::ImageThresholder::scale_
int scale_
Scale factor from original image.
Definition: thresholder.h:177
tesseract::ImageThresholder::SetRectangle
void SetRectangle(int left, int top, int width, int height)
Definition: thresholder.cpp:123
tesseract::ImageThresholder::ImageThresholder
ImageThresholder()
Definition: thresholder.cpp:35
tesseract::ImageThresholder::rect_width_
int rect_width_
Definition: thresholder.h:182