tesseract  5.0.0-alpha-619-ge9db
degradeimage.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: degradeimage.cpp
3  * Description: Function to degrade an image (usually of text) as if it
4  * has been printed and then scanned.
5  * Authors: Ray Smith
6  * Created: Tue Nov 19 2013
7  *
8  * (C) Copyright 2013, Google Inc.
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  * http://www.apache.org/licenses/LICENSE-2.0
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  **********************************************************************/
20 
21 #include "degradeimage.h"
22 
23 #include <cstdlib>
24 #include "allheaders.h" // from leptonica
26 #include <tesseract/helpers.h> // For TRand.
27 #include "rect.h"
28 
29 namespace tesseract {
30 
31 // A randomized perspective distortion can be applied to synthetic input.
32 // The perspective distortion comes from leptonica, which uses 2 sets of 4
33 // corners to determine the distortion. There are random values for each of
34 // the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead
35 // defined in terms of a single shear value. This reduces the degrees of
36 // freedom enough to make the distortion more realistic than it would otherwise
37 // be if all 8 coordinates could move independently.
38 // One additional factor is used for the color of the pixels that don't exist
39 // in the source image.
40 // Name for each of the randomizing factors.
41 enum FactorNames {
42  FN_INCOLOR,
43  FN_Y0,
44  FN_Y1,
45  FN_Y2,
46  FN_Y3,
47  FN_X0,
48  FN_X1,
49  FN_SHEAR,
50  // x2 = x1 - shear
51  // x3 = x0 + shear
53 };
54 
55 // Rotation is +/- kRotationRange radians.
56 const float kRotationRange = 0.02f;
57 // Number of grey levels to shift by for each exposure step.
58 const int kExposureFactor = 16;
59 // Salt and pepper noise is +/- kSaltnPepper.
60 const int kSaltnPepper = 5;
61 // Min sum of width + height on which to operate the ramp.
62 const int kMinRampSize = 1000;
63 
64 // Degrade the pix as if by a print/copy/scan cycle with exposure > 0
65 // corresponding to darkening on the copier and <0 lighter and 0 not copied.
66 // Exposures in [-2,2] are most useful, with -3 and 3 being extreme.
67 // If rotation is nullptr, rotation is skipped. If *rotation is non-zero, the
68 // pix is rotated by *rotation else it is randomly rotated and *rotation is
69 // modified.
70 //
71 // HOW IT WORKS:
72 // Most of the process is really dictated by the fact that the minimum
73 // available convolution is 3X3, which is too big really to simulate a
74 // good quality print/scan process. (2X2 would be better.)
75 // 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the
76 // images generally biased to being too light, so most of the work is to make
77 // them darker. 3 levels of thickening/darkening are achieved with 2 dilations,
78 // (using a greyscale erosion) one heavy (by being before convolution) and one
79 // light (after convolution).
80 // With no dilation, after covolution, the images are so light that a heavy
81 // constant offset is required to make the 0 image look reasonable. A simple
82 // constant offset multiple of exposure to undo this value is enough to achieve
83 // all the required lightening. This gives the advantage that exposure level 1
84 // with a single dilation gives a good impression of the broken-yet-too-dark
85 // problem that is often seen in scans.
86 // A small random rotation gives some varying greyscale values on the edges,
87 // and some random salt and pepper noise on top helps to realistically jaggy-up
88 // the edges.
89 // Finally a greyscale ramp provides a continuum of effects between exposure
90 // levels.
91 Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
92  float* rotation) {
93  Pix* pix = pixConvertTo8(input, false);
94  pixDestroy(&input);
95  input = pix;
96  int width = pixGetWidth(input);
97  int height = pixGetHeight(input);
98 
99  if (exposure >= 2) {
100  // An erosion simulates the spreading darkening of a dark copy.
101  // This is backwards to binary morphology,
102  // see http://www.leptonica.com/grayscale-morphology.html
103  pix = input;
104  input = pixErodeGray(pix, 3, 3);
105  pixDestroy(&pix);
106  }
107  // A convolution is essential to any mode as no scanner produces an
108  // image as sharp as the electronic image.
109  pix = pixBlockconv(input, 1, 1);
110  pixDestroy(&input);
111  // A small random rotation helps to make the edges jaggy in a realistic way.
112  if (rotation != nullptr) {
113  float radians_clockwise = 0.0f;
114  if (*rotation) {
115  radians_clockwise = *rotation;
116  } else if (randomizer != nullptr) {
117  radians_clockwise = randomizer->SignedRand(kRotationRange);
118  }
119 
120  input = pixRotate(pix, radians_clockwise,
121  L_ROTATE_AREA_MAP, L_BRING_IN_WHITE,
122  0, 0);
123  // Rotate the boxes to match.
124  *rotation = radians_clockwise;
125  pixDestroy(&pix);
126  } else {
127  input = pix;
128  }
129 
130  if (exposure >= 3 || exposure == 1) {
131  // Erosion after the convolution is not as heavy as before, so it is
132  // good for level 1 and in addition as a level 3.
133  // This is backwards to binary morphology,
134  // see http://www.leptonica.com/grayscale-morphology.html
135  pix = input;
136  input = pixErodeGray(pix, 3, 3);
137  pixDestroy(&pix);
138  }
139  // The convolution really needed to be 2x2 to be realistic enough, but
140  // we only have 3x3, so we have to bias the image darker or lose thin
141  // strokes.
142  int erosion_offset = 0;
143  // For light and 0 exposure, there is no dilation, so compensate for the
144  // convolution with a big darkening bias which is undone for lighter
145  // exposures.
146  if (exposure <= 0)
147  erosion_offset = -3 * kExposureFactor;
148  // Add in a general offset of the greyscales for the exposure level so
149  // a threshold of 128 gives a reasonable binary result.
150  erosion_offset -= exposure * kExposureFactor;
151  // Add a gradual fade over the page and a small amount of salt and pepper
152  // noise to simulate noise in the sensor/paper fibres and varying
153  // illumination.
154  l_uint32* data = pixGetData(input);
155  for (int y = 0; y < height; ++y) {
156  for (int x = 0; x < width; ++x) {
157  int pixel = GET_DATA_BYTE(data, x);
158  if (randomizer != nullptr)
159  pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;
160  if (height + width > kMinRampSize)
161  pixel -= (2*x + y) * 32 / (height + width);
162  pixel += erosion_offset;
163  if (pixel < 0)
164  pixel = 0;
165  if (pixel > 255)
166  pixel = 255;
167  SET_DATA_BYTE(data, x, pixel);
168  }
169  data += input->wpl;
170  }
171  return input;
172 }
173 
174 // Creates and returns a Pix distorted by various means according to the bool
175 // flags. If boxes is not nullptr, the boxes are resized/positioned according to
176 // any spatial distortion and also by the integer reduction factor box_scale
177 // so they will match what the network will output.
178 // Returns nullptr on error. The returned Pix must be pixDestroyed.
179 Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
180  bool white_noise, bool smooth_noise, bool blur,
181  int box_reduction, TRand* randomizer,
182  GenericVector<TBOX>* boxes) {
183  Pix* distorted = pixCopy(nullptr, const_cast<Pix*>(pix));
184  // Things to do to synthetic training data.
185  if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
186  // TODO(rays) Cook noise in a more thread-safe manner than rand().
187  // Attempt to make the sequences reproducible.
188  srand(randomizer->IntRand());
189  Pix* pixn = pixAddGaussianNoise(distorted, 8.0);
190  pixDestroy(&distorted);
191  if (smooth_noise) {
192  distorted = pixBlockconv(pixn, 1, 1);
193  pixDestroy(&pixn);
194  } else {
195  distorted = pixn;
196  }
197  }
198  if (blur && randomizer->SignedRand(1.0) > 0.0) {
199  Pix* blurred = pixBlockconv(distorted, 1, 1);
200  pixDestroy(&distorted);
201  distorted = blurred;
202  }
203  if (perspective)
204  GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
205  if (boxes != nullptr) {
206  for (int b = 0; b < boxes->size(); ++b) {
207  (*boxes)[b].scale(1.0f / box_reduction);
208  if ((*boxes)[b].width() <= 0)
209  (*boxes)[b].set_right((*boxes)[b].left() + 1);
210  }
211  }
212  if (invert && randomizer->SignedRand(1.0) < -0)
213  pixInvert(distorted, distorted);
214  return distorted;
215 }
216 
217 // Distorts anything that has a non-null pointer with the same pseudo-random
218 // perspective distortion. Width and height only need to be set if there
219 // is no pix. If there is a pix, then they will be taken from there.
220 void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
221  Pix** pix, GenericVector<TBOX>* boxes) {
222  if (pix != nullptr && *pix != nullptr) {
223  width = pixGetWidth(*pix);
224  height = pixGetHeight(*pix);
225  }
226  float* im_coeffs = nullptr;
227  float* box_coeffs = nullptr;
228  l_int32 incolor =
229  ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
230  if (pix != nullptr && *pix != nullptr) {
231  // Transform the image.
232  Pix* transformed = pixProjective(*pix, im_coeffs, incolor);
233  if (transformed == nullptr) {
234  tprintf("Projective transformation failed!!\n");
235  return;
236  }
237  pixDestroy(pix);
238  *pix = transformed;
239  }
240  if (boxes != nullptr) {
241  // Transform the boxes.
242  for (int b = 0; b < boxes->size(); ++b) {
243  int x1, y1, x2, y2;
244  const TBOX& box = (*boxes)[b];
245  projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,
246  &y1);
247  projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),
248  &x2, &y2);
249  TBOX new_box1(x1, height - y2, x2, height - y1);
250  projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),
251  &x1, &y1);
252  projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,
253  &y2);
254  TBOX new_box2(x1, height - y1, x2, height - y2);
255  (*boxes)[b] = new_box1.bounding_union(new_box2);
256  }
257  }
258  free(im_coeffs);
259  free(box_coeffs);
260 }
261 
262 // Computes the coefficients of a randomized projective transformation.
263 // The image transform requires backward transformation coefficient, and the
264 // box transform the forward coefficients.
265 // Returns the incolor arg to pixProjective.
266 int ProjectiveCoeffs(int width, int height, TRand* randomizer,
267  float** im_coeffs, float** box_coeffs) {
268  // Setup "from" points.
269  Pta* src_pts = ptaCreate(4);
270  ptaAddPt(src_pts, 0.0f, 0.0f);
271  ptaAddPt(src_pts, width, 0.0f);
272  ptaAddPt(src_pts, width, height);
273  ptaAddPt(src_pts, 0.0f, height);
274  // Extract factors from pseudo-random sequence.
275  float factors[FN_NUM_FACTORS];
276  float shear = 0.0f; // Shear is signed.
277  for (int i = 0; i < FN_NUM_FACTORS; ++i) {
278  // Everything is squared to make wild values rarer.
279  if (i == FN_SHEAR) {
280  // Shear is signed.
281  shear = randomizer->SignedRand(0.5 / 3.0);
282  shear = shear >= 0.0 ? shear * shear : -shear * shear;
283  // Keep the sheared points within the original rectangle.
284  if (shear < -factors[FN_X0]) shear = -factors[FN_X0];
285  if (shear > factors[FN_X1]) shear = factors[FN_X1];
286  factors[i] = shear;
287  } else if (i != FN_INCOLOR) {
288  factors[i] = fabs(randomizer->SignedRand(1.0));
289  if (i <= FN_Y3)
290  factors[i] *= 5.0 / 8.0;
291  else
292  factors[i] *= 0.5;
293  factors[i] *= factors[i];
294  }
295  }
296  // Setup "to" points.
297  Pta* dest_pts = ptaCreate(4);
298  ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
299  ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
300  ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,
301  (1 - factors[FN_Y2]) * height);
302  ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,
303  (1 - factors[FN_Y3]) * height);
304  getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
305  getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
306  ptaDestroy(&src_pts);
307  ptaDestroy(&dest_pts);
308  return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
309 }
310 
311 } // namespace tesseract
tesseract::FN_INCOLOR
Definition: degradeimage.cpp:76
tesseract::FactorNames
FactorNames
Definition: degradeimage.cpp:58
tesseract::FN_Y1
Definition: degradeimage.cpp:78
tesseract::FN_X1
Definition: degradeimage.cpp:82
tesseract::FN_Y2
Definition: degradeimage.cpp:79
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::TRand::IntRand
int32_t IntRand()
Definition: helpers.h:80
tesseract::FN_X0
Definition: degradeimage.cpp:81
rect.h
tesseract::kSaltnPepper
const int kSaltnPepper
Definition: degradeimage.cpp:77
genericvector.h
tesseract::FN_Y0
Definition: degradeimage.cpp:77
tesseract::FN_Y3
Definition: degradeimage.cpp:80
tesseract::DegradeImage
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
Definition: degradeimage.cpp:108
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::PrepareDistortedPix
Pix * PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes)
Definition: degradeimage.cpp:196
helpers.h
tesseract
Definition: baseapi.h:65
tesseract::TRand::SignedRand
double SignedRand(double range)
Definition: helpers.h:85
GenericVector< TBOX >
tesseract::GeneratePerspectiveDistortion
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix, GenericVector< TBOX > *boxes)
Definition: degradeimage.cpp:237
tesseract::kRotationRange
const float kRotationRange
Definition: degradeimage.cpp:73
degradeimage.h
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::ProjectiveCoeffs
int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
Definition: degradeimage.cpp:283
TBOX::right
int16_t right() const
Definition: rect.h:78
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::FN_NUM_FACTORS
Definition: degradeimage.cpp:86
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::TRand
Definition: helpers.h:50
tesseract::kExposureFactor
const int kExposureFactor
Definition: degradeimage.cpp:75
tesseract::FN_SHEAR
Definition: degradeimage.cpp:83
tesseract::kMinRampSize
const int kMinRampSize
Definition: degradeimage.cpp:79
TBOX
Definition: rect.h:33