tesseract  4.0.0-1-g2a2b
networkio.cpp
Go to the documentation of this file.
1 // File: networkio.cpp
3 // Description: Network input/output data, allowing float/int implementations.
4 // Author: Ray Smith
5 // Created: Thu Jun 19 13:01:31 PST 2014
6 //
7 // (C) Copyright 2014, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 #include "networkio.h"
20 #include <cfloat> // for FLT_MAX
21 
22 #include "allheaders.h"
23 #include "functions.h"
24 #include "statistc.h"
25 #include "tprintf.h"
26 
27 namespace tesseract {
28 
29 // Minimum value to output for certainty.
30 const float kMinCertainty = -20.0f;
31 // Probability corresponding to kMinCertainty.
32 const float kMinProb = exp(kMinCertainty);
33 
34 // Holds the optimal integer multiplier for this machine.
35 // This is a leaked, lazily initialized singleton, and is used for computing
36 // padding to apply to i_ for SIMD use.
37 IntSimdMatrix* NetworkIO::multiplier_ = nullptr;
38 
39 // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
40 void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
41  stride_map_ = StrideMap();
42  int_mode_ = int_mode;
43  if (int_mode_) {
44  i_.ResizeNoInit(width, num_features, GetPadding(num_features));
45  } else {
46  f_.ResizeNoInit(width, num_features);
47  }
48 }
49 
50 // Resizes to a specific stride_map.
51 void NetworkIO::ResizeToMap(bool int_mode, const StrideMap& stride_map,
52  int num_features) {
53  // If this method crashes with this == nullptr,
54  // it most likely got here through an uninitialized scratch element,
55  // ie call NetworkScratch::IO::Resizexxx() not NetworkIO::Resizexxx()!!
56  stride_map_ = stride_map;
57  int_mode_ = int_mode;
58  if (int_mode_) {
59  i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features));
60  } else {
61  f_.ResizeNoInit(stride_map.Width(), num_features);
62  }
64 }
65 
66 // Shrinks image size by x_scale,y_scale, and use given number of features.
68  int x_scale, int y_scale, int num_features) {
69  StrideMap stride_map = src.stride_map_;
70  stride_map.ScaleXY(x_scale, y_scale);
71  ResizeToMap(src.int_mode_, stride_map, num_features);
72 }
73 
74 // Resizes to just 1 x-coord, whatever the input.
75 void NetworkIO::ResizeXTo1(const NetworkIO& src, int num_features) {
76  StrideMap stride_map = src.stride_map_;
78  ResizeToMap(src.int_mode_, stride_map, num_features);
79 }
80 
81 // Initialize all the array to zero.
83  int width = Width();
84  // Zero out the everything. Column-by-column in case it is aligned.
85  for (int t = 0; t < width; ++t) {
86  ZeroTimeStep(t);
87  }
88 }
89 
90 // Initializes to zero all elements of the array that do not correspond to
91 // valid image positions. (If a batch of different-sized images are packed
92 // together, then there will be padding pixels.)
94  int num_features = NumFeatures();
95  int full_width = stride_map_.Size(FD_WIDTH);
96  int full_height = stride_map_.Size(FD_HEIGHT);
97  StrideMap::Index b_index(stride_map_);
98  do {
99  int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
100  if (end_x < full_width) {
101  // The width is small, so fill for every valid y.
102  StrideMap::Index y_index(b_index);
103  int fill_size = num_features * (full_width - end_x);
104  do {
105  StrideMap::Index z_index(y_index);
106  z_index.AddOffset(end_x, FD_WIDTH);
107  if (int_mode_) {
108  ZeroVector(fill_size, i_[z_index.t()]);
109  } else {
110  ZeroVector(fill_size, f_[z_index.t()]);
111  }
112  } while (y_index.AddOffset(1, FD_HEIGHT));
113  }
114  int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1;
115  if (end_y < full_height) {
116  // The height is small, so fill in the space in one go.
117  StrideMap::Index y_index(b_index);
118  y_index.AddOffset(end_y, FD_HEIGHT);
119  int fill_size = num_features * full_width * (full_height - end_y);
120  if (int_mode_) {
121  ZeroVector(fill_size, i_[y_index.t()]);
122  } else {
123  ZeroVector(fill_size, f_[y_index.t()]);
124  }
125  }
126  } while (b_index.AddOffset(1, FD_BATCH));
127 }
128 
129 // Helper computes a black point and white point to contrast-enhance an image.
130 // The computation is based on the assumption that the image is of a single line
131 // of text, so a horizontal line through the middle of the image passes through
132 // at least some of it, so local minima and maxima are a good proxy for black
133 // and white pixel samples.
134 static void ComputeBlackWhite(Pix* pix, float* black, float* white) {
135  int width = pixGetWidth(pix);
136  int height = pixGetHeight(pix);
137  STATS mins(0, 256), maxes(0, 256);
138  if (width >= 3) {
139  int y = height / 2;
140  l_uint32* line = pixGetData(pix) + pixGetWpl(pix) * y;
141  int prev = GET_DATA_BYTE(line, 0);
142  int curr = GET_DATA_BYTE(line, 1);
143  for (int x = 1; x + 1 < width; ++x) {
144  int next = GET_DATA_BYTE(line, x + 1);
145  if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) {
146  // Local minimum.
147  mins.add(curr, 1);
148  }
149  if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) {
150  // Local maximum.
151  maxes.add(curr, 1);
152  }
153  prev = curr;
154  curr = next;
155  }
156  }
157  if (mins.get_total() == 0) mins.add(0, 1);
158  if (maxes.get_total() == 0) maxes.add(255, 1);
159  *black = mins.ile(0.25);
160  *white = maxes.ile(0.75);
161 }
162 
163 // Sets up the array from the given image, using the currently set int_mode_.
164 // If the image width doesn't match the shape, the image is truncated or padded
165 // with noise to match.
166 void NetworkIO::FromPix(const StaticShape& shape, const Pix* pix,
167  TRand* randomizer) {
168  std::vector<const Pix*> pixes(1, pix);
169  FromPixes(shape, pixes, randomizer);
170 }
171 
172 // Sets up the array from the given set of images, using the currently set
173 // int_mode_. If the image width doesn't match the shape, the images are
174 // truncated or padded with noise to match.
176  const std::vector<const Pix*>& pixes,
177  TRand* randomizer) {
178  int target_height = shape.height();
179  int target_width = shape.width();
180  std::vector<std::pair<int, int>> h_w_pairs;
181  for (auto pix : pixes) {
182  Pix* var_pix = const_cast<Pix*>(pix);
183  int width = pixGetWidth(var_pix);
184  if (target_width != 0) width = target_width;
185  int height = pixGetHeight(var_pix);
186  if (target_height != 0) height = target_height;
187  h_w_pairs.emplace_back(height, width);
188  }
189  stride_map_.SetStride(h_w_pairs);
190  ResizeToMap(int_mode(), stride_map_, shape.depth());
191  // Iterate over the images again to copy the data.
192  for (size_t b = 0; b < pixes.size(); ++b) {
193  Pix* pix = const_cast<Pix*>(pixes[b]);
194  float black = 0.0f, white = 255.0f;
195  if (shape.depth() != 3) ComputeBlackWhite(pix, &black, &white);
196  float contrast = (white - black) / 2.0f;
197  if (contrast <= 0.0f) contrast = 1.0f;
198  if (shape.height() == 1) {
199  Copy1DGreyImage(b, pix, black, contrast, randomizer);
200  } else {
201  Copy2DImage(b, pix, black, contrast, randomizer);
202  }
203  }
204 }
205 
206 // Copies the given pix to *this at the given batch index, stretching and
207 // clipping the pixel values so that [black, black + 2*contrast] maps to the
208 // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
209 // This is a 2-d operation in the sense that the output depth is the number
210 // of input channels, the height is the height of the image, and the width
211 // is the width of the image, or truncated/padded with noise if the width
212 // is a fixed size.
213 void NetworkIO::Copy2DImage(int batch, Pix* pix, float black, float contrast,
214  TRand* randomizer) {
215  int width = pixGetWidth(pix);
216  int height = pixGetHeight(pix);
217  int wpl = pixGetWpl(pix);
218  StrideMap::Index index(stride_map_);
219  index.AddOffset(batch, FD_BATCH);
220  int t = index.t();
221  int target_height = stride_map_.Size(FD_HEIGHT);
222  int target_width = stride_map_.Size(FD_WIDTH);
223  int num_features = NumFeatures();
224  bool color = num_features == 3;
225  if (width > target_width) width = target_width;
226  uint32_t* line = pixGetData(pix);
227  for (int y = 0; y < target_height; ++y, line += wpl) {
228  int x = 0;
229  if (y < height) {
230  for (x = 0; x < width; ++x, ++t) {
231  if (color) {
232  int f = 0;
233  for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
234  int pixel = GET_DATA_BYTE(line + x, c);
235  SetPixel(t, f++, pixel, black, contrast);
236  }
237  } else {
238  int pixel = GET_DATA_BYTE(line, x);
239  SetPixel(t, 0, pixel, black, contrast);
240  }
241  }
242  }
243  for (; x < target_width; ++x) Randomize(t++, 0, num_features, randomizer);
244  }
245 }
246 
247 // Copies the given pix to *this at the given batch index, as Copy2DImage
248 // above, except that the output depth is the height of the input image, the
249 // output height is 1, and the output width as for Copy2DImage.
250 // The image is thus treated as a 1-d set of vertical pixel strips.
251 void NetworkIO::Copy1DGreyImage(int batch, Pix* pix, float black,
252  float contrast, TRand* randomizer) {
253  int width = pixGetWidth(pix);
254  int height = pixGetHeight(pix);
255  ASSERT_HOST(height == NumFeatures());
256  int wpl = pixGetWpl(pix);
257  StrideMap::Index index(stride_map_);
258  index.AddOffset(batch, FD_BATCH);
259  int t = index.t();
260  int target_width = stride_map_.Size(FD_WIDTH);
261  if (width > target_width) width = target_width;
262  int x;
263  for (x = 0; x < width; ++x, ++t) {
264  for (int y = 0; y < height; ++y) {
265  uint32_t* line = pixGetData(pix) + wpl * y;
266  int pixel = GET_DATA_BYTE(line, x);
267  SetPixel(t, y, pixel, black, contrast);
268  }
269  }
270  for (; x < target_width; ++x) Randomize(t++, 0, height, randomizer);
271 }
272 
273 // Helper stores the pixel value in i_ or f_ according to int_mode_.
274 // t: is the index from the StrideMap corresponding to the current
275 // [batch,y,x] position
276 // f: is the index into the depth/channel
277 // pixel: the value of the pixel from the image (in one channel)
278 // black: the pixel value to map to the lowest of the range of *this
279 // contrast: the range of pixel values to stretch to half the range of *this.
280 void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
281  float float_pixel = (pixel - black) / contrast - 1.0f;
282  if (int_mode_) {
283  i_[t][f] = ClipToRange<int>(IntCastRounded((INT8_MAX + 1) * float_pixel),
284  -INT8_MAX, INT8_MAX);
285  } else {
286  f_[t][f] = float_pixel;
287  }
288 }
289 
290 // Converts the array to a Pix. Must be pixDestroyed after use.
291 Pix* NetworkIO::ToPix() const {
292  // Count the width of the image, and find the max multiplication factor.
293  int im_width = stride_map_.Size(FD_WIDTH);
294  int im_height = stride_map_.Size(FD_HEIGHT);
295  int num_features = NumFeatures();
296  int feature_factor = 1;
297  if (num_features == 3) {
298  // Special hack for color.
299  num_features = 1;
300  feature_factor = 3;
301  }
302  Pix* pix = pixCreate(im_width, im_height * num_features, 32);
303  StrideMap::Index index(stride_map_);
304  do {
305  int im_x = index.index(FD_WIDTH);
306  int top_im_y = index.index(FD_HEIGHT);
307  int im_y = top_im_y;
308  int t = index.t();
309  if (int_mode_) {
310  const int8_t* features = i_[t];
311  for (int y = 0; y < num_features; ++y, im_y += im_height) {
312  int pixel = features[y * feature_factor];
313  // 1 or 2 features use greyscale.
314  int red = ClipToRange<int>(pixel + 128, 0, 255);
315  int green = red, blue = red;
316  if (feature_factor == 3) {
317  // With 3 features assume RGB color.
318  green = ClipToRange<int>(features[y * feature_factor + 1] + 128, 0, 255);
319  blue = ClipToRange<int>(features[y * feature_factor + 2] + 128, 0, 255);
320  } else if (num_features > 3) {
321  // More than 3 features use false yellow/blue color, assuming a signed
322  // input in the range [-1,1].
323  red = abs(pixel) * 2;
324  if (pixel >= 0) {
325  green = red;
326  blue = 0;
327  } else {
328  blue = red;
329  green = red = 0;
330  }
331  }
332  pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
333  (green << L_GREEN_SHIFT) |
334  (blue << L_BLUE_SHIFT));
335  }
336  } else {
337  const float* features = f_[t];
338  for (int y = 0; y < num_features; ++y, im_y += im_height) {
339  float pixel = features[y * feature_factor];
340  // 1 or 2 features use greyscale.
341  int red = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
342  int green = red, blue = red;
343  if (feature_factor == 3) {
344  // With 3 features assume RGB color.
345  pixel = features[y * feature_factor + 1];
346  green = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
347  pixel = features[y * feature_factor + 2];
348  blue = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
349  } else if (num_features > 3) {
350  // More than 3 features use false yellow/blue color, assuming a signed
351  // input in the range [-1,1].
352  red = ClipToRange<int>(IntCastRounded(fabs(pixel) * 255), 0, 255);
353  if (pixel >= 0) {
354  green = red;
355  blue = 0;
356  } else {
357  blue = red;
358  green = red = 0;
359  }
360  }
361  pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
362  (green << L_GREEN_SHIFT) |
363  (blue << L_BLUE_SHIFT));
364  }
365  }
366  } while (index.Increment());
367  return pix;
368 }
369 
370 // Prints the first and last num timesteps of the array for each feature.
371 void NetworkIO::Print(int num) const {
372  int num_features = NumFeatures();
373  for (int y = 0; y < num_features; ++y) {
374  for (int t = 0; t < Width(); ++t) {
375  if (num == 0 || t < num || t + num >= Width()) {
376  if (int_mode_) {
377  tprintf(" %g", static_cast<float>(i_[t][y]) / INT8_MAX);
378  } else {
379  tprintf(" %g", f_[t][y]);
380  }
381  }
382  }
383  tprintf("\n");
384  }
385 }
386 
387 // Copies a single time step from src.
388 void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO& src, int src_t) {
389  ASSERT_HOST(int_mode_ == src.int_mode_);
390  if (int_mode_) {
391  memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0]));
392  } else {
393  memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0]));
394  }
395 }
396 
397 // Copies a part of single time step from src.
398 void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset,
399  int num_features, const NetworkIO& src,
400  int src_t, int src_offset) {
401  ASSERT_HOST(int_mode_ == src.int_mode_);
402  if (int_mode_) {
403  memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset,
404  num_features * sizeof(i_[0][0]));
405  } else {
406  memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset,
407  num_features * sizeof(f_[0][0]));
408  }
409 }
410 
411 // Zeroes a single time step.
412 void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) {
413  if (int_mode_) {
414  ZeroVector(num_features, i_[t] + offset);
415  } else {
416  ZeroVector(num_features, f_[t] + offset);
417  }
418 }
419 
420 // Sets the given range to random values.
421 void NetworkIO::Randomize(int t, int offset, int num_features,
422  TRand* randomizer) {
423  if (int_mode_) {
424  int8_t* line = i_[t] + offset;
425  for (int i = 0; i < num_features; ++i)
426  line[i] = IntCastRounded(randomizer->SignedRand(INT8_MAX));
427  } else {
428  // float mode.
429  float* line = f_[t] + offset;
430  for (int i = 0; i < num_features; ++i)
431  line[i] = randomizer->SignedRand(1.0);
432  }
433 }
434 
435 // Helper returns the label and score of the best choice over a range.
436 int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this,
437  int null_ch, float* rating,
438  float* certainty) const {
439  if (t_end <= t_start) return -1;
440  int max_char = -1;
441  float min_score = 0.0f;
442  for (int c = 0; c < NumFeatures(); ++c) {
443  if (c == not_this || c == null_ch) continue;
444  ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty);
445  if (max_char < 0 || *rating < min_score) {
446  min_score = *rating;
447  max_char = c;
448  }
449  }
450  ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty);
451  return max_char;
452 }
453 
454 // Helper returns the rating and certainty of the choice over a range in output.
455 void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch,
456  float* rating, float* certainty) const {
457  ASSERT_HOST(!int_mode_);
458  *rating = 0.0f;
459  *certainty = 0.0f;
460  if (t_end <= t_start || t_end <= 0) return;
461  float ratings[3] = {0.0f, 0.0f, 0.0f};
462  float certs[3] = {0.0f, 0.0f, 0.0f};
463  for (int t = t_start; t < t_end; ++t) {
464  const float* line = f_[t];
465  float score = ProbToCertainty(line[choice]);
466  float zero = ProbToCertainty(line[null_ch]);
467  if (t == t_start) {
468  ratings[2] = FLT_MAX;
469  ratings[1] = -score;
470  certs[1] = score;
471  } else {
472  for (int i = 2; i >= 1; --i) {
473  if (ratings[i] > ratings[i - 1]) {
474  ratings[i] = ratings[i - 1];
475  certs[i] = certs[i - 1];
476  }
477  }
478  ratings[2] -= zero;
479  if (zero < certs[2]) certs[2] = zero;
480  ratings[1] -= score;
481  if (score < certs[1]) certs[1] = score;
482  }
483  ratings[0] -= zero;
484  if (zero < certs[0]) certs[0] = zero;
485  }
486  int best_i = ratings[2] < ratings[1] ? 2 : 1;
487  *rating = ratings[best_i] + t_end - t_start;
488  *certainty = certs[best_i];
489 }
490 
491 // Returns the index (label) of the best value at the given timestep,
492 // excluding not_this and not_that, and if not null, sets the score to the
493 // log of the corresponding value.
494 int NetworkIO::BestLabel(int t, int not_this, int not_that,
495  float* score) const {
496  ASSERT_HOST(!int_mode_);
497  int best_index = -1;
498  float best_score = -FLT_MAX;
499  const float* line = f_[t];
500  for (int i = 0; i < f_.dim2(); ++i) {
501  if (line[i] > best_score && i != not_this && i != not_that) {
502  best_score = line[i];
503  best_index = i;
504  }
505  }
506  if (score != nullptr) *score = ProbToCertainty(best_score);
507  return best_index;
508 }
509 
510 // Returns the best start position out of [start, end) (into which all labels
511 // must fit) to obtain the highest cumulative score for the given labels.
513  int end) const {
514  int length = labels.size();
515  int last_start = end - length;
516  int best_start = -1;
517  double best_score = 0.0;
518  for (int s = start; s <= last_start; ++s) {
519  double score = ScoreOfLabels(labels, s);
520  if (score > best_score || best_start < 0) {
521  best_score = score;
522  best_start = s;
523  }
524  }
525  return best_start;
526 }
527 
528 // Returns the cumulative score of the given labels starting at start, and
529 // using one label per time-step.
531  int start) const {
532  int length = labels.size();
533  double score = 0.0;
534  for (int i = 0; i < length; ++i) {
535  score += f_(start + i, labels[i]);
536  }
537  return score;
538 }
539 
540 // Helper function sets all the outputs for a single timestep, such that
541 // label has value ok_score, and the other labels share 1 - ok_score.
542 void NetworkIO::SetActivations(int t, int label, float ok_score) {
543  ASSERT_HOST(!int_mode_);
544  int num_classes = NumFeatures();
545  float bad_score = (1.0f - ok_score) / (num_classes - 1);
546  float* targets = f_[t];
547  for (int i = 0; i < num_classes; ++i)
548  targets[i] = bad_score;
549  targets[label] = ok_score;
550 }
551 
552 // Modifies the values, only if needed, so that the given label is
553 // the winner at the given time step t.
554 void NetworkIO::EnsureBestLabel(int t, int label) {
555  ASSERT_HOST(!int_mode_);
556  if (BestLabel(t, nullptr) != label) {
557  // Output value needs enhancing. Third all the other elements and add the
558  // remainder to best_label.
559  int num_classes = NumFeatures();
560  float* targets = f_[t];
561  for (int c = 0; c < num_classes; ++c) {
562  if (c == label) {
563  targets[c] += (1.0 - targets[c]) * (2 / 3.0);
564  } else {
565  targets[c] /= 3.0;
566  }
567  }
568  }
569 }
570 
571 // Helper function converts prob to certainty taking the minimum into account.
572 /* static */
573 float NetworkIO::ProbToCertainty(float prob) {
574  return prob > kMinProb ? log(prob) : kMinCertainty;
575 }
576 
577 // Returns true if there is any bad value that is suspiciously like a GT
578 // error. Assuming that *this is the difference(gradient) between target
579 // and forward output, returns true if there is a large negative value
580 // (correcting a very confident output) for which there is no corresponding
581 // positive value in an adjacent timestep for the same feature index. This
582 // allows the box-truthed samples to make fine adjustments to position while
583 // stopping other disagreements of confident output with ground truth.
584 bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const {
585  int num_features = NumFeatures();
586  for (int t = 0; t < Width(); ++t) {
587  const float* features = f_[t];
588  for (int y = 0; y < num_features; ++y) {
589  float grad = features[y];
590  if (grad < -confidence_thr) {
591  // Correcting strong output. Check for movement.
592  if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&
593  (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) {
594  return true; // No strong positive on either side.
595  }
596  }
597  }
598  }
599  return false;
600 }
601 
602 // Reads a single timestep to floats in the range [-1, 1].
603 void NetworkIO::ReadTimeStep(int t, double* output) const {
604  if (int_mode_) {
605  const int8_t* line = i_[t];
606  for (int i = 0; i < i_.dim2(); ++i) {
607  output[i] = static_cast<double>(line[i]) / INT8_MAX;
608  }
609  } else {
610  const float* line = f_[t];
611  for (int i = 0; i < f_.dim2(); ++i) {
612  output[i] = static_cast<double>(line[i]);
613  }
614  }
615 }
616 
617 // Adds a single timestep to floats.
618 void NetworkIO::AddTimeStep(int t, double* inout) const {
619  int num_features = NumFeatures();
620  if (int_mode_) {
621  const int8_t* line = i_[t];
622  for (int i = 0; i < num_features; ++i) {
623  inout[i] += static_cast<double>(line[i]) / INT8_MAX;
624  }
625  } else {
626  const float* line = f_[t];
627  for (int i = 0; i < num_features; ++i) {
628  inout[i] += line[i];
629  }
630  }
631 }
632 
633 // Adds part of a single timestep to floats.
634 void NetworkIO::AddTimeStepPart(int t, int offset, int num_features,
635  float* inout) const {
636  if (int_mode_) {
637  const int8_t* line = i_[t] + offset;
638  for (int i = 0; i < num_features; ++i) {
639  inout[i] += static_cast<float>(line[i]) / INT8_MAX;
640  }
641  } else {
642  const float* line = f_[t] + offset;
643  for (int i = 0; i < num_features; ++i) {
644  inout[i] += line[i];
645  }
646  }
647 }
648 
649 // Writes a single timestep from floats in the range [-1, 1].
650 void NetworkIO::WriteTimeStep(int t, const double* input) {
651  WriteTimeStepPart(t, 0, NumFeatures(), input);
652 }
653 
654 // Writes a single timestep from floats in the range [-1, 1] writing only
655 // num_features elements of input to (*this)[t], starting at offset.
656 void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features,
657  const double* input) {
658  if (int_mode_) {
659  int8_t* line = i_[t] + offset;
660  for (int i = 0; i < num_features; ++i) {
661  line[i] = ClipToRange<int>(IntCastRounded(input[i] * INT8_MAX),
662  -INT8_MAX, INT8_MAX);
663  }
664  } else {
665  float* line = f_[t] + offset;
666  for (int i = 0; i < num_features; ++i) {
667  line[i] = static_cast<float>(input[i]);
668  }
669  }
670 }
671 
672 // Maxpools a single time step from src.
673 void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t,
674  int* max_line) {
675  ASSERT_HOST(int_mode_ == src.int_mode_);
676  if (int_mode_) {
677  int dim = i_.dim2();
678  int8_t* dest_line = i_[dest_t];
679  const int8_t* src_line = src.i_[src_t];
680  for (int i = 0; i < dim; ++i) {
681  if (dest_line[i] < src_line[i]) {
682  dest_line[i] = src_line[i];
683  max_line[i] = src_t;
684  }
685  }
686  } else {
687  int dim = f_.dim2();
688  float* dest_line = f_[dest_t];
689  const float* src_line = src.f_[src_t];
690  for (int i = 0; i < dim; ++i) {
691  if (dest_line[i] < src_line[i]) {
692  dest_line[i] = src_line[i];
693  max_line[i] = src_t;
694  }
695  }
696  }
697 }
698 
699 // Runs maxpool backward, using maxes to index timesteps in *this.
701  const GENERIC_2D_ARRAY<int>& maxes) {
702  ASSERT_HOST(!int_mode_);
703  Zero();
704  StrideMap::Index index(fwd.stride_map_);
705  do {
706  int t = index.t();
707  const int* max_line = maxes[t];
708  const float* fwd_line = fwd.f_[t];
709  int num_features = fwd.f_.dim2();
710  for (int i = 0; i < num_features; ++i) {
711  f_[max_line[i]][i] = fwd_line[i];
712  }
713  } while (index.Increment());
714 }
715 
716 // Returns the min over time of the maxes over features of the outputs.
717 float NetworkIO::MinOfMaxes() const {
718  float min_max = 0.0f;
719  int width = Width();
720  int num_features = NumFeatures();
721  for (int t = 0; t < width; ++t) {
722  float max_value = -FLT_MAX;
723  if (int_mode_) {
724  const int8_t* column = i_[t];
725  for (int i = 0; i < num_features; ++i) {
726  if (column[i] > max_value) max_value = column[i];
727  }
728  } else {
729  const float* column = f_[t];
730  for (int i = 0; i < num_features; ++i) {
731  if (column[i] > max_value) max_value = column[i];
732  }
733  }
734  if (t == 0 || max_value < min_max) min_max = max_value;
735  }
736  return min_max;
737 }
738 
739 // Computes combined results for a combiner that chooses between an existing
740 // input and itself, with an additional output to indicate the choice.
741 void NetworkIO::CombineOutputs(const NetworkIO& base_output,
742  const NetworkIO& combiner_output) {
743  int no = base_output.NumFeatures();
744  ASSERT_HOST(combiner_output.NumFeatures() == no + 1);
745  Resize(base_output, no);
746  int width = Width();
747  if (int_mode_) {
748  // Number of outputs from base and final result.
749  for (int t = 0; t < width; ++t) {
750  int8_t* out_line = i_[t];
751  const int8_t* base_line = base_output.i_[t];
752  const int8_t* comb_line = combiner_output.i_[t];
753  float base_weight = static_cast<float>(comb_line[no]) / INT8_MAX;
754  float boost_weight = 1.0f - base_weight;
755  for (int i = 0; i < no; ++i) {
756  out_line[i] = IntCastRounded(base_line[i] * base_weight +
757  comb_line[i] * boost_weight);
758  }
759  }
760  } else {
761  for (int t = 0; t < width; ++t) {
762  float* out_line = f_[t];
763  const float* base_line = base_output.f_[t];
764  const float* comb_line = combiner_output.f_[t];
765  float base_weight = comb_line[no];
766  float boost_weight = 1.0f - base_weight;
767  for (int i = 0; i < no; ++i) {
768  out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight;
769  }
770  }
771  }
772 }
773 
774 // Computes deltas for a combiner that chooses between 2 sets of inputs.
776  const NetworkIO& base_output) {
777  ASSERT_HOST(!int_mode_);
778  // Compute the deltas for the combiner.
779  int width = Width();
780  int no = NumFeatures() - 1;
781  ASSERT_HOST(fwd_deltas.NumFeatures() == no);
782  ASSERT_HOST(base_output.NumFeatures() == no);
783  // Number of outputs from base and final result.
784  for (int t = 0; t < width; ++t) {
785  const float* delta_line = fwd_deltas.f_[t];
786  const float* base_line = base_output.f_[t];
787  float* comb_line = f_[t];
788  float base_weight = comb_line[no];
789  float boost_weight = 1.0f - base_weight;
790  float max_base_delta = 0.0;
791  for (int i = 0; i < no; ++i) {
792  // What did the combiner actually produce?
793  float output = base_line[i] * base_weight + comb_line[i] * boost_weight;
794  // Reconstruct the target from the delta.
795  float comb_target = delta_line[i] + output;
796  comb_line[i] = comb_target - comb_line[i];
797  float base_delta = fabs(comb_target - base_line[i]);
798  if (base_delta > max_base_delta) max_base_delta = base_delta;
799  }
800  if (max_base_delta >= 0.5) {
801  // The base network got it wrong. The combiner should output the right
802  // answer and 0 for the base network.
803  comb_line[no] = 0.0 - base_weight;
804  } else {
805  // The base network was right. The combiner should flag that.
806  for (int i = 0; i < no; ++i) {
807  // All other targets are 0.
808  if (comb_line[i] > 0.0) comb_line[i] -= 1.0;
809  }
810  comb_line[no] = 1.0 - base_weight;
811  }
812  }
813 }
814 
815 // Copies the array checking that the types match.
816 void NetworkIO::CopyAll(const NetworkIO& src) {
817  ASSERT_HOST(src.int_mode_ == int_mode_);
818  f_ = src.f_;
819 }
820 
821 // Checks that both are floats and adds the src array to *this.
823  ASSERT_HOST(!int_mode_);
824  ASSERT_HOST(!src.int_mode_);
825  f_ += src.f_;
826 }
827 
828 // Subtracts the array from a float array. src must also be float.
830  ASSERT_HOST(!int_mode_);
831  ASSERT_HOST(!src.int_mode_);
832  f_ -= src.f_;
833 }
834 
835 // Copies src to *this, with maxabs normalization to match scale.
837  const NetworkIO& scale) {
838  ASSERT_HOST(!int_mode_);
839  ASSERT_HOST(!src.int_mode_);
840  ASSERT_HOST(!scale.int_mode_);
841  float src_max = src.f_.MaxAbs();
842  ASSERT_HOST(std::isfinite(src_max));
843  float scale_max = scale.f_.MaxAbs();
844  ASSERT_HOST(std::isfinite(scale_max));
845  if (src_max > 0.0f) {
846  float factor = scale_max / src_max;
847  for (int t = 0; t < src.Width(); ++t) {
848  const float* src_ptr = src.f_[t];
849  float* dest_ptr = f_[t];
850  for (int i = 0; i < src.f_.dim2(); ++i) dest_ptr[i] = src_ptr[i] * factor;
851  }
852  } else {
853  f_.Clear();
854  }
855 }
856 
857 // Copies src to *this with independent reversal of the y dimension.
859  int num_features = src.NumFeatures();
860  Resize(src, num_features);
861  StrideMap::Index b_index(src.stride_map_);
862  do {
863  int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
864  StrideMap::Index fwd_index(b_index);
865  StrideMap::Index rev_index(b_index);
866  rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT);
867  do {
868  int fwd_t = fwd_index.t();
869  int rev_t = rev_index.t();
870  for (int x = 0; x < width; ++x) CopyTimeStepFrom(rev_t++, src, fwd_t++);
871  } while (fwd_index.AddOffset(1, FD_HEIGHT) &&
872  rev_index.AddOffset(-1, FD_HEIGHT));
873  } while (b_index.AddOffset(1, FD_BATCH));
874 }
875 
876 // Copies src to *this with independent reversal of the x dimension.
878  int num_features = src.NumFeatures();
879  Resize(src, num_features);
880  StrideMap::Index b_index(src.stride_map_);
881  do {
882  StrideMap::Index y_index(b_index);
883  do {
884  StrideMap::Index fwd_index(y_index);
885  StrideMap::Index rev_index(y_index);
886  rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH);
887  do {
888  CopyTimeStepFrom(rev_index.t(), src, fwd_index.t());
889  } while (fwd_index.AddOffset(1, FD_WIDTH) &&
890  rev_index.AddOffset(-1, FD_WIDTH));
891  } while (y_index.AddOffset(1, FD_HEIGHT));
892  } while (b_index.AddOffset(1, FD_BATCH));
893 }
894 
895 // Copies src to *this with independent transpose of the x and y dimensions.
897  int num_features = src.NumFeatures();
898  stride_map_ = src.stride_map_;
899  stride_map_.TransposeXY();
900  ResizeToMap(src.int_mode(), stride_map_, num_features);
901  StrideMap::Index src_b_index(src.stride_map_);
902  StrideMap::Index dest_b_index(stride_map_);
903  do {
904  StrideMap::Index src_y_index(src_b_index);
905  StrideMap::Index dest_x_index(dest_b_index);
906  do {
907  StrideMap::Index src_x_index(src_y_index);
908  StrideMap::Index dest_y_index(dest_x_index);
909  do {
910  CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t());
911  } while (src_x_index.AddOffset(1, FD_WIDTH) &&
912  dest_y_index.AddOffset(1, FD_HEIGHT));
913  } while (src_y_index.AddOffset(1, FD_HEIGHT) &&
914  dest_x_index.AddOffset(1, FD_WIDTH));
915  } while (src_b_index.AddOffset(1, FD_BATCH) &&
916  dest_b_index.AddOffset(1, FD_BATCH));
917 }
918 
919 // Copies src to *this, at the given feature_offset, returning the total
920 // feature offset after the copy. Multiple calls will stack outputs from
921 // multiple sources in feature space.
922 int NetworkIO::CopyPacking(const NetworkIO& src, int feature_offset) {
923  ASSERT_HOST(int_mode_ == src.int_mode_);
924  int width = src.Width();
925  ASSERT_HOST(width <= Width());
926  int num_features = src.NumFeatures();
927  ASSERT_HOST(num_features + feature_offset <= NumFeatures());
928  if (int_mode_) {
929  for (int t = 0; t < width; ++t) {
930  memcpy(i_[t] + feature_offset, src.i_[t],
931  num_features * sizeof(i_[t][0]));
932  }
933  for (int t = width; t < i_.dim1(); ++t) {
934  memset(i_[t], 0, num_features * sizeof(i_[t][0]));
935  }
936  } else {
937  for (int t = 0; t < width; ++t) {
938  memcpy(f_[t] + feature_offset, src.f_[t],
939  num_features * sizeof(f_[t][0]));
940  }
941  for (int t = width; t < f_.dim1(); ++t) {
942  memset(f_[t], 0, num_features * sizeof(f_[t][0]));
943  }
944  }
945  return num_features + feature_offset;
946 }
947 
948 // Opposite of CopyPacking, fills *this with a part of src, starting at
949 // feature_offset, and picking num_features.
950 void NetworkIO::CopyUnpacking(const NetworkIO& src, int feature_offset,
951  int num_features) {
952  Resize(src, num_features);
953  int width = src.Width();
954  ASSERT_HOST(num_features + feature_offset <= src.NumFeatures());
955  if (int_mode_) {
956  for (int t = 0; t < width; ++t) {
957  memcpy(i_[t], src.i_[t] + feature_offset,
958  num_features * sizeof(i_[t][0]));
959  }
960  } else {
961  for (int t = 0; t < width; ++t) {
962  memcpy(f_[t], src.f_[t] + feature_offset,
963  num_features * sizeof(f_[t][0]));
964  }
965  }
966 }
967 
968 // Transposes the float part of *this into dest.
970  int width = Width();
971  dest->ResizeNoInit(NumFeatures(), width);
972  for (int t = 0; t < width; ++t) dest->WriteStrided(t, f_[t]);
973 }
974 
975 // Clips the content of a single time-step to +/-range.
976 void NetworkIO::ClipVector(int t, float range) {
977  ASSERT_HOST(!int_mode_);
978  float* v = f_[t];
979  int dim = f_.dim2();
980  for (int i = 0; i < dim; ++i)
981  v[i] = ClipToRange<float>(v[i], -range, range);
982 }
983 
984 // Returns the padding required for the given number of features in order
985 // for the SIMD operations to be safe.
986 /* static */
987 int NetworkIO::GetPadding(int num_features) {
988  if (multiplier_ == nullptr)
989  multiplier_ = IntSimdMatrix::GetFastestMultiplier();
990  int pad = 0;
991  if (multiplier_ != nullptr) {
992  pad = multiplier_->RoundInputs(num_features) - num_features;
993  }
994  return pad;
995 }
996 
997 } // namespace tesseract.
int BestLabel(int t, float *score) const
Definition: networkio.h:161
void ZeroTimeStepGeneral(int t, int offset, int num_features)
Definition: networkio.cpp:412
int size() const
Definition: genericvector.h:71
void CopyWithXReversal(const NetworkIO &src)
Definition: networkio.cpp:877
void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output)
Definition: networkio.cpp:741
void Print(int num) const
Definition: networkio.cpp:371
void SetStride(const std::vector< std::pair< int, int >> &h_w_pairs)
Definition: stridemap.cpp:127
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const
Definition: networkio.cpp:634
void SetActivations(int t, int label, float ok_score)
Definition: networkio.cpp:542
void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY< int > &maxes)
Definition: networkio.cpp:700
bool AddOffset(int offset, FlexDimensions dimension)
Definition: stridemap.cpp:63
void ClipVector(int t, float range)
Definition: networkio.cpp:976
void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t)
Definition: networkio.cpp:388
static IntSimdMatrix * GetFastestMultiplier()
int CopyPacking(const NetworkIO &src, int feature_offset)
Definition: networkio.cpp:922
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:436
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line)
Definition: networkio.cpp:673
void AddAllToFloat(const NetworkIO &src)
Definition: networkio.cpp:822
int RoundInputs(int size) const
Definition: intsimdmatrix.h:81
void WriteTimeStep(int t, const double *input)
Definition: networkio.cpp:650
Definition: statistc.h:33
void SubtractAllFromFloat(const NetworkIO &src)
Definition: networkio.cpp:829
T MaxAbs() const
Definition: matrix.h:355
void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:213
const int8_t * i(int t) const
Definition: networkio.h:123
void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:251
void ResizeNoInit(int size1, int size2, int pad=0)
Definition: matrix.h:91
void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features)
Definition: networkio.cpp:67
int MaxIndexOfDim(FlexDimensions dim) const
Definition: stridemap.cpp:44
int Size(FlexDimensions dimension) const
Definition: stridemap.h:116
void Resize(const NetworkIO &src, int num_features)
Definition: networkio.h:45
const float kMinProb
Definition: networkio.cpp:32
void ZeroInvalidElements()
Definition: networkio.cpp:93
void Transpose(TransposedArray *dest) const
Definition: networkio.cpp:969
int Width() const
Definition: stridemap.h:118
void CopyWithXYTranspose(const NetworkIO &src)
Definition: networkio.cpp:896
void Clear()
Definition: matrix.h:136
int index(FlexDimensions dimension) const
Definition: stridemap.h:60
void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale)
Definition: networkio.cpp:836
void ZeroTimeStep(int t)
Definition: networkio.h:148
int IntCastRounded(double x)
Definition: helpers.h:168
void FromPixes(const StaticShape &shape, const std::vector< const Pix *> &pixes, TRand *randomizer)
Definition: networkio.cpp:175
Pix * ToPix() const
Definition: networkio.cpp:291
void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output)
Definition: networkio.cpp:775
const StrideMap & stride_map() const
Definition: networkio.h:133
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
static float ProbToCertainty(float prob)
Definition: networkio.cpp:573
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:455
void AddTimeStep(int t, double *inout) const
Definition: networkio.cpp:618
void WriteTimeStepPart(int t, int offset, int num_features, const double *input)
Definition: networkio.cpp:656
int dim1() const
Definition: matrix.h:206
float * f(int t)
Definition: networkio.h:115
void CopyAll(const NetworkIO &src)
Definition: networkio.cpp:816
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer)
Definition: networkio.cpp:166
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
Definition: networkio.cpp:398
bool int_mode() const
Definition: networkio.h:127
int PositionOfBestMatch(const GenericVector< int > &labels, int start, int end) const
Definition: networkio.cpp:512
void SetPixel(int t, int f, int pixel, float black, float contrast)
Definition: networkio.cpp:280
double SignedRand(double range)
Definition: helpers.h:61
void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features)
Definition: networkio.cpp:950
void ScaleXY(int x_factor, int y_factor)
Definition: stridemap.cpp:145
double ScoreOfLabels(const GenericVector< int > &labels, int start) const
Definition: networkio.cpp:530
int dim2() const
Definition: matrix.h:207
const float kMinCertainty
Definition: networkio.cpp:30
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
Definition: networkio.cpp:51
void ReadTimeStep(int t, double *output) const
Definition: networkio.cpp:603
void CopyWithYReversal(const NetworkIO &src)
Definition: networkio.cpp:858
void WriteStrided(int t, const float *data)
Definition: weightmatrix.h:40
void ZeroVector(int n, T *vec)
Definition: functions.h:219
void EnsureBestLabel(int t, int label)
Definition: networkio.cpp:554
void ResizeXTo1(const NetworkIO &src, int num_features)
Definition: networkio.cpp:75
float MinOfMaxes() const
Definition: networkio.cpp:717
int Width() const
Definition: networkio.h:107
void Resize2d(bool int_mode, int width, int num_features)
Definition: networkio.cpp:40
bool AnySuspiciousTruth(float confidence_thr) const
Definition: networkio.cpp:584
int NumFeatures() const
Definition: networkio.h:111
#define ASSERT_HOST(x)
Definition: errcode.h:84
void Randomize(int t, int offset, int num_features, TRand *randomizer)
Definition: networkio.cpp:421