tesseract  4.0.0-1-g2a2b
trainingsample.cpp
Go to the documentation of this file.
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
15 
16 // Include automatically generated configuration file if running autoconf.
17 #ifdef HAVE_CONFIG_H
18 #include "config_auto.h"
19 #endif
20 
21 #include "trainingsample.h"
22 
23 #include <cmath>
24 #include "allheaders.h"
25 #include "helpers.h"
26 #include "intfeaturemap.h"
27 #include "normfeat.h"
28 #include "shapetable.h"
29 
30 namespace tesseract {
31 
32 ELISTIZE(TrainingSample)
33 
34 // Center of randomizing operations.
35 const int kRandomizingCenter = 128;
36 
37 // Randomizing factors.
38 const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {
39  6, 3, -3, -6, 0
40 };
41 const double TrainingSample::kScaleValues[kSampleScaleSize] = {
42  1.0625, 0.9375, 1.0
43 };
44 
46  delete [] features_;
47  delete [] micro_features_;
48 }
49 
50 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
51 // members, which is mostly the mapped features, and the weight.
52 // It is assumed these can all be reconstructed from what is saved.
53 // Writes to the given file. Returns false in case of error.
54 bool TrainingSample::Serialize(FILE* fp) const {
55  if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
56  if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
57  if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
58  if (!bounding_box_.Serialize(fp)) return false;
59  if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
60  if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
61  return false;
62  if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
63  return false;
64  if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
65  return false;
66  if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
67  fp) != num_micro_features_)
68  return false;
69  if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
70  kNumCNParams) return false;
71  if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
72  return false;
73  return true;
74 }
75 
76 // Creates from the given file. Returns nullptr in case of error.
77 // If swap is true, assumes a big/little-endian swap is needed.
80  if (sample->DeSerialize(swap, fp)) return sample;
81  delete sample;
82  return nullptr;
83 }
84 
85 // Reads from the given file. Returns false in case of error.
86 // If swap is true, assumes a big/little-endian swap is needed.
87 bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
88  if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
89  if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
90  if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
91  if (!bounding_box_.DeSerialize(swap, fp)) return false;
92  if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
93  if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
94  return false;
95  if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
96  return false;
97  if (swap) {
98  ReverseN(&class_id_, sizeof(class_id_));
99  ReverseN(&num_features_, sizeof(num_features_));
100  ReverseN(&num_micro_features_, sizeof(num_micro_features_));
101  ReverseN(&outline_length_, sizeof(outline_length_));
102  }
103  // Arbitrarily limit the number of elements to protect against bad data.
104  if (num_features_ > UINT16_MAX) return false;
105  if (num_micro_features_ > UINT16_MAX) return false;
106  delete [] features_;
107  features_ = new INT_FEATURE_STRUCT[num_features_];
108  if (fread(features_, sizeof(*features_), num_features_, fp)
109  != num_features_)
110  return false;
111  delete [] micro_features_;
112  micro_features_ = new MicroFeature[num_micro_features_];
113  if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
114  fp) != num_micro_features_)
115  return false;
116  if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
117  kNumCNParams) return false;
118  if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
119  return false;
120  return true;
121 }
122 
123 // Saves the given features into a TrainingSample.
125  const INT_FX_RESULT_STRUCT& fx_info,
126  const TBOX& bounding_box,
127  const INT_FEATURE_STRUCT* features,
128  int num_features) {
130  sample->num_features_ = num_features;
131  sample->features_ = new INT_FEATURE_STRUCT[num_features];
132  sample->outline_length_ = fx_info.Length;
133  memcpy(sample->features_, features, num_features * sizeof(features[0]));
134  sample->geo_feature_[GeoBottom] = bounding_box.bottom();
135  sample->geo_feature_[GeoTop] = bounding_box.top();
136  sample->geo_feature_[GeoWidth] = bounding_box.width();
137 
138  // Generate the cn_feature_ from the fx_info.
139  sample->cn_feature_[CharNormY] =
141  sample->cn_feature_[CharNormLength] =
143  sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
144  sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
145 
146  sample->features_are_indexed_ = false;
147  sample->features_are_mapped_ = false;
148  return sample;
149 }
150 
151 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
153  FEATURE feature = NewFeature(&CharNormDesc);
154  for (int i = 0; i < kNumCNParams; ++i)
155  feature->Params[i] = cn_feature_[i];
156  return feature;
157 }
158 
159 // Constructs and returns a copy randomized by the method given by
160 // the randomizer index. If index is out of [0, kSampleRandomSize) then
161 // an exact copy is returned.
164  if (index >= 0 && index < kSampleRandomSize) {
165  ++index; // Remove the first combination.
166  const int yshift = kYShiftValues[index / kSampleScaleSize];
167  double scaling = kScaleValues[index % kSampleScaleSize];
168  for (uint32_t i = 0; i < num_features_; ++i) {
169  double result = (features_[i].X - kRandomizingCenter) * scaling;
170  result += kRandomizingCenter;
171  sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
172  result = (features_[i].Y - kRandomizingCenter) * scaling;
173  result += kRandomizingCenter + yshift;
174  sample->features_[i].Y = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
175  }
176  }
177  return sample;
178 }
179 
180 // Constructs and returns an exact copy.
183  sample->class_id_ = class_id_;
184  sample->font_id_ = font_id_;
185  sample->weight_ = weight_;
186  sample->sample_index_ = sample_index_;
187  sample->num_features_ = num_features_;
188  if (num_features_ > 0) {
189  sample->features_ = new INT_FEATURE_STRUCT[num_features_];
190  memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
191  }
192  sample->num_micro_features_ = num_micro_features_;
193  if (num_micro_features_ > 0) {
194  sample->micro_features_ = new MicroFeature[num_micro_features_];
195  memcpy(sample->micro_features_, micro_features_,
196  num_micro_features_ * sizeof(micro_features_[0]));
197  }
198  memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
199  memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
200  return sample;
201 }
202 
203 // Extracts the needed information from the CHAR_DESC_STRUCT.
204 void TrainingSample::ExtractCharDesc(int int_feature_type,
205  int micro_type,
206  int cn_type,
207  int geo_type,
208  CHAR_DESC_STRUCT* char_desc) {
209  // Extract the INT features.
210  delete[] features_;
211  FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
212  if (char_features == nullptr) {
213  tprintf("Error: no features to train on of type %s\n",
215  num_features_ = 0;
216  features_ = nullptr;
217  } else {
218  num_features_ = char_features->NumFeatures;
219  features_ = new INT_FEATURE_STRUCT[num_features_];
220  for (uint32_t f = 0; f < num_features_; ++f) {
221  features_[f].X =
222  static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
223  features_[f].Y =
224  static_cast<uint8_t>(char_features->Features[f]->Params[IntY]);
225  features_[f].Theta =
226  static_cast<uint8_t>(char_features->Features[f]->Params[IntDir]);
227  features_[f].CP_misses = 0;
228  }
229  }
230  // Extract the Micro features.
231  delete[] micro_features_;
232  char_features = char_desc->FeatureSets[micro_type];
233  if (char_features == nullptr) {
234  tprintf("Error: no features to train on of type %s\n",
236  num_micro_features_ = 0;
237  micro_features_ = nullptr;
238  } else {
239  num_micro_features_ = char_features->NumFeatures;
240  micro_features_ = new MicroFeature[num_micro_features_];
241  for (uint32_t f = 0; f < num_micro_features_; ++f) {
242  for (int d = 0; d < MFCount; ++d) {
243  micro_features_[f][d] = char_features->Features[f]->Params[d];
244  }
245  }
246  }
247  // Extract the CN feature.
248  char_features = char_desc->FeatureSets[cn_type];
249  if (char_features == nullptr) {
250  tprintf("Error: no CN feature to train on.\n");
251  } else {
252  ASSERT_HOST(char_features->NumFeatures == 1);
253  cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
254  cn_feature_[CharNormLength] =
255  char_features->Features[0]->Params[CharNormLength];
256  cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
257  cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
258  }
259  // Extract the Geo feature.
260  char_features = char_desc->FeatureSets[geo_type];
261  if (char_features == nullptr) {
262  tprintf("Error: no Geo feature to train on.\n");
263  } else {
264  ASSERT_HOST(char_features->NumFeatures == 1);
265  geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
266  geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
267  geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
268  }
269  features_are_indexed_ = false;
270  features_are_mapped_ = false;
271 }
272 
273 // Sets the mapped_features_ from the features_ using the provided
274 // feature_space to the indexed versions of the features.
275 void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) {
277  feature_space.IndexAndSortFeatures(features_, num_features_,
278  &mapped_features_);
279  features_are_indexed_ = true;
280  features_are_mapped_ = false;
281 }
282 
283 // Sets the mapped_features_ from the features using the provided
284 // feature_map.
285 void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
287  feature_map.feature_space().IndexAndSortFeatures(features_, num_features_,
289  feature_map.MapIndexedFeatures(indexed_features, &mapped_features_);
290  features_are_indexed_ = false;
291  features_are_mapped_ = true;
292 }
293 
294 // Returns a pix representing the sample. (Int features only.)
295 Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
296  Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
297  for (uint32_t f = 0; f < num_features_; ++f) {
298  int start_x = features_[f].X;
299  int start_y = kIntFeatureExtent - features_[f].Y;
300  double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
301  double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
302  for (int i = 0; i <= 5; ++i) {
303  int x = static_cast<int>(start_x + dx * i);
304  int y = static_cast<int>(start_y + dy * i);
305  if (x >= 0 && x < 256 && y >= 0 && y < 256)
306  pixSetPixel(pix, x, y, 1);
307  }
308  }
309  if (unicharset != nullptr)
310  pixSetText(pix, unicharset->id_to_unichar(class_id_));
311  return pix;
312 }
313 
314 // Displays the features in the given window with the given color.
316  ScrollView* window) const {
317  #ifndef GRAPHICS_DISABLED
318  for (uint32_t f = 0; f < num_features_; ++f) {
319  RenderIntFeature(window, &features_[f], color);
320  }
321  #endif // GRAPHICS_DISABLED
322 }
323 
324 // Returns a pix of the original sample image. The pix is padded all round
325 // by padding wherever possible.
326 // The returned Pix must be pixDestroyed after use.
327 // If the input page_pix is nullptr, nullptr is returned.
328 Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const {
329  if (page_pix == nullptr)
330  return nullptr;
331  int page_width = pixGetWidth(page_pix);
332  int page_height = pixGetHeight(page_pix);
333  TBOX padded_box = bounding_box();
334  padded_box.pad(padding, padding);
335  // Clip the padded_box to the limits of the page
336  TBOX page_box(0, 0, page_width, page_height);
337  padded_box &= page_box;
338  Box* box = boxCreate(page_box.left(), page_height - page_box.top(),
339  page_box.width(), page_box.height());
340  Pix* sample_pix = pixClipRectangle(page_pix, box, nullptr);
341  boxDestroy(&box);
342  return sample_pix;
343 }
344 
345 } // namespace tesseract
Definition: picofeat.h:31
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const
const IntFeatureSpace & feature_space() const
Definition: intfeaturemap.h:60
const int kIntFeatureExtent
Definition: cluster.h:32
#define MF_SCALE_FACTOR
Definition: mfoutline.h:64
const int kRandomizingCenter
Definition: rect.h:34
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
const int kBlnBaselineOffset
Definition: normalis.h:25
TrainingSample * Copy() const
bool DeSerialize(bool swap, FILE *fp)
Definition: rect.cpp:192
float Params[1]
Definition: ocrfeatures.h:62
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:178
int16_t width() const
Definition: rect.h:115
FEATURE Features[1]
Definition: ocrfeatures.h:69
int16_t left() const
Definition: rect.h:72
void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
int16_t top() const
Definition: rect.h:58
static TrainingSample * DeSerializeCreate(bool swap, FILE *fp)
Pix * GetSamplePix(int padding, Pix *page_pix) const
uint16_t NumFeatures
Definition: ocrfeatures.h:67
Definition: picofeat.h:30
#define LENGTH_COMPRESSION
Definition: normfeat.h:27
const TBOX & bounding_box() const
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
Definition: intproto.cpp:1628
const INT_FEATURE_STRUCT * features() const
#define ELISTIZE(CLASSNAME)
Definition: elst.h:961
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:42
bool Serialize(FILE *fp) const
Definition: rect.cpp:185
uint32_t num_features() const
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
float MicroFeature[MFCount]
Definition: mf.h:33
const char * kMicroFeatureType
Definition: featdefs.cpp:32
bool Serialize(FILE *fp) const
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:81
FEATURE_STRUCT * GetCNFeature() const
int32_t Length
Definition: intfx.h:36
void MapFeatures(const IntFeatureMap &feature_map)
Definition: mf.h:30
const FEATURE_DESC_STRUCT CharNormDesc
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
Pix * RenderToPix(const UNICHARSET *unicharset) const
bool DeSerialize(bool swap, FILE *fp)
int16_t Ymean
Definition: intfx.h:37
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const
int16_t bottom() const
Definition: rect.h:65
void IndexFeatures(const IntFeatureSpace &feature_space)
int MapIndexedFeatures(const GenericVector< int > &index_features, GenericVector< int > *map_features) const
const char * kIntFeatureType
Definition: featdefs.cpp:34
int16_t height() const
Definition: rect.h:108
const GenericVector< int > & indexed_features() const
void pad(int xpad, int ypad)
Definition: rect.h:131
TrainingSample * RandomizedCopy(int index) const
#define ASSERT_HOST(x)
Definition: errcode.h:84