tesseract  5.0.0-alpha-619-ge9db
trainingsample.cpp
Go to the documentation of this file.
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
15 
16 #define _USE_MATH_DEFINES // for M_PI
17 // Include automatically generated configuration file if running autoconf.
18 #ifdef HAVE_CONFIG_H
19 #include "config_auto.h"
20 #endif
21 
22 #include "trainingsample.h"
23 
24 #include <cmath> // for M_PI
25 #include "allheaders.h"
26 #include <tesseract/helpers.h>
27 #include "intfeaturemap.h"
28 #include "normfeat.h"
29 #include "shapetable.h"
30 
31 namespace tesseract {
32 
33 ELISTIZE(TrainingSample)
34 
35 // Center of randomizing operations.
36 const int kRandomizingCenter = 128;
37 
38 // Randomizing factors.
39 const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {
40  6, 3, -3, -6, 0
41 };
42 const double TrainingSample::kScaleValues[kSampleScaleSize] = {
43  1.0625, 0.9375, 1.0
44 };
45 
47  delete [] features_;
48  delete [] micro_features_;
49 }
50 
51 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
52 // members, which is mostly the mapped features, and the weight.
53 // It is assumed these can all be reconstructed from what is saved.
54 // Writes to the given file. Returns false in case of error.
55 bool TrainingSample::Serialize(FILE* fp) const {
56  if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
57  if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
58  if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
59  if (!bounding_box_.Serialize(fp)) return false;
60  if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
61  if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
62  return false;
63  if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
64  return false;
65  if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
66  return false;
67  if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
68  fp) != num_micro_features_)
69  return false;
70  if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
71  kNumCNParams) return false;
72  if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
73  return false;
74  return true;
75 }
76 
77 // Creates from the given file. Returns nullptr in case of error.
78 // If swap is true, assumes a big/little-endian swap is needed.
80  auto* sample = new TrainingSample;
81  if (sample->DeSerialize(swap, fp)) return sample;
82  delete sample;
83  return nullptr;
84 }
85 
86 // Reads from the given file. Returns false in case of error.
87 // If swap is true, assumes a big/little-endian swap is needed.
88 bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
89  if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
90  if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
91  if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
92  if (!bounding_box_.DeSerialize(swap, fp)) return false;
93  if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
94  if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
95  return false;
96  if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
97  return false;
98  if (swap) {
99  ReverseN(&class_id_, sizeof(class_id_));
100  ReverseN(&num_features_, sizeof(num_features_));
101  ReverseN(&num_micro_features_, sizeof(num_micro_features_));
102  ReverseN(&outline_length_, sizeof(outline_length_));
103  }
104  // Arbitrarily limit the number of elements to protect against bad data.
105  if (num_features_ > UINT16_MAX) return false;
106  if (num_micro_features_ > UINT16_MAX) return false;
107  delete [] features_;
108  features_ = new INT_FEATURE_STRUCT[num_features_];
109  if (fread(features_, sizeof(*features_), num_features_, fp)
110  != num_features_)
111  return false;
112  delete [] micro_features_;
113  micro_features_ = new MicroFeature[num_micro_features_];
114  if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
115  fp) != num_micro_features_)
116  return false;
117  if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
118  kNumCNParams) return false;
119  if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
120  return false;
121  return true;
122 }
123 
124 // Saves the given features into a TrainingSample.
126  const INT_FX_RESULT_STRUCT& fx_info,
127  const TBOX& bounding_box,
128  const INT_FEATURE_STRUCT* features,
129  int num_features) {
130  auto* sample = new TrainingSample;
131  sample->num_features_ = num_features;
132  sample->features_ = new INT_FEATURE_STRUCT[num_features];
133  sample->outline_length_ = fx_info.Length;
134  memcpy(sample->features_, features, num_features * sizeof(features[0]));
135  sample->geo_feature_[GeoBottom] = bounding_box.bottom();
136  sample->geo_feature_[GeoTop] = bounding_box.top();
137  sample->geo_feature_[GeoWidth] = bounding_box.width();
138 
139  // Generate the cn_feature_ from the fx_info.
140  sample->cn_feature_[CharNormY] =
142  sample->cn_feature_[CharNormLength] =
144  sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
145  sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
146 
147  sample->features_are_indexed_ = false;
148  sample->features_are_mapped_ = false;
149  return sample;
150 }
151 
152 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
154  FEATURE feature = NewFeature(&CharNormDesc);
155  for (int i = 0; i < kNumCNParams; ++i)
156  feature->Params[i] = cn_feature_[i];
157  return feature;
158 }
159 
160 // Constructs and returns a copy randomized by the method given by
161 // the randomizer index. If index is out of [0, kSampleRandomSize) then
162 // an exact copy is returned.
165  if (index >= 0 && index < kSampleRandomSize) {
166  ++index; // Remove the first combination.
167  const int yshift = kYShiftValues[index / kSampleScaleSize];
168  double scaling = kScaleValues[index % kSampleScaleSize];
169  for (uint32_t i = 0; i < num_features_; ++i) {
170  double result = (features_[i].X - kRandomizingCenter) * scaling;
171  result += kRandomizingCenter;
172  sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
173  result = (features_[i].Y - kRandomizingCenter) * scaling;
174  result += kRandomizingCenter + yshift;
175  sample->features_[i].Y = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
176  }
177  }
178  return sample;
179 }
180 
181 // Constructs and returns an exact copy.
183  auto* sample = new TrainingSample;
184  sample->class_id_ = class_id_;
185  sample->font_id_ = font_id_;
186  sample->weight_ = weight_;
187  sample->sample_index_ = sample_index_;
188  sample->num_features_ = num_features_;
189  if (num_features_ > 0) {
190  sample->features_ = new INT_FEATURE_STRUCT[num_features_];
191  memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
192  }
193  sample->num_micro_features_ = num_micro_features_;
194  if (num_micro_features_ > 0) {
195  sample->micro_features_ = new MicroFeature[num_micro_features_];
196  memcpy(sample->micro_features_, micro_features_,
197  num_micro_features_ * sizeof(micro_features_[0]));
198  }
199  memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
200  memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
201  return sample;
202 }
203 
204 // Extracts the needed information from the CHAR_DESC_STRUCT.
205 void TrainingSample::ExtractCharDesc(int int_feature_type,
206  int micro_type,
207  int cn_type,
208  int geo_type,
209  CHAR_DESC_STRUCT* char_desc) {
210  // Extract the INT features.
211  delete[] features_;
212  FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
213  if (char_features == nullptr) {
214  tprintf("Error: no features to train on of type %s\n",
216  num_features_ = 0;
217  features_ = nullptr;
218  } else {
219  num_features_ = char_features->NumFeatures;
220  features_ = new INT_FEATURE_STRUCT[num_features_];
221  for (uint32_t f = 0; f < num_features_; ++f) {
222  features_[f].X =
223  static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
224  features_[f].Y =
225  static_cast<uint8_t>(char_features->Features[f]->Params[IntY]);
226  features_[f].Theta =
227  static_cast<uint8_t>(char_features->Features[f]->Params[IntDir]);
228  features_[f].CP_misses = 0;
229  }
230  }
231  // Extract the Micro features.
232  delete[] micro_features_;
233  char_features = char_desc->FeatureSets[micro_type];
234  if (char_features == nullptr) {
235  tprintf("Error: no features to train on of type %s\n",
237  num_micro_features_ = 0;
238  micro_features_ = nullptr;
239  } else {
240  num_micro_features_ = char_features->NumFeatures;
241  micro_features_ = new MicroFeature[num_micro_features_];
242  for (uint32_t f = 0; f < num_micro_features_; ++f) {
243  for (int d = 0; d < MFCount; ++d) {
244  micro_features_[f][d] = char_features->Features[f]->Params[d];
245  }
246  }
247  }
248  // Extract the CN feature.
249  char_features = char_desc->FeatureSets[cn_type];
250  if (char_features == nullptr) {
251  tprintf("Error: no CN feature to train on.\n");
252  } else {
253  ASSERT_HOST(char_features->NumFeatures == 1);
254  cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
255  cn_feature_[CharNormLength] =
256  char_features->Features[0]->Params[CharNormLength];
257  cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
258  cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
259  }
260  // Extract the Geo feature.
261  char_features = char_desc->FeatureSets[geo_type];
262  if (char_features == nullptr) {
263  tprintf("Error: no Geo feature to train on.\n");
264  } else {
265  ASSERT_HOST(char_features->NumFeatures == 1);
266  geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
267  geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
268  geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
269  }
270  features_are_indexed_ = false;
271  features_are_mapped_ = false;
272 }
273 
274 // Sets the mapped_features_ from the features_ using the provided
275 // feature_space to the indexed versions of the features.
276 void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) {
278  feature_space.IndexAndSortFeatures(features_, num_features_,
279  &mapped_features_);
280  features_are_indexed_ = true;
281  features_are_mapped_ = false;
282 }
283 
284 // Sets the mapped_features_ from the features using the provided
285 // feature_map.
286 void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
288  feature_map.feature_space().IndexAndSortFeatures(features_, num_features_,
290  feature_map.MapIndexedFeatures(indexed_features, &mapped_features_);
291  features_are_indexed_ = false;
292  features_are_mapped_ = true;
293 }
294 
295 // Returns a pix representing the sample. (Int features only.)
296 Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
297  Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
298  for (uint32_t f = 0; f < num_features_; ++f) {
299  int start_x = features_[f].X;
300  int start_y = kIntFeatureExtent - features_[f].Y;
301  double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
302  double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
303  for (int i = 0; i <= 5; ++i) {
304  int x = static_cast<int>(start_x + dx * i);
305  int y = static_cast<int>(start_y + dy * i);
306  if (x >= 0 && x < 256 && y >= 0 && y < 256)
307  pixSetPixel(pix, x, y, 1);
308  }
309  }
310  if (unicharset != nullptr)
311  pixSetText(pix, unicharset->id_to_unichar(class_id_));
312  return pix;
313 }
314 
315 // Displays the features in the given window with the given color.
317  ScrollView* window) const {
318  #ifndef GRAPHICS_DISABLED
319  for (uint32_t f = 0; f < num_features_; ++f) {
320  RenderIntFeature(window, &features_[f], color);
321  }
322  #endif // GRAPHICS_DISABLED
323 }
324 
325 // Returns a pix of the original sample image. The pix is padded all round
326 // by padding wherever possible.
327 // The returned Pix must be pixDestroyed after use.
328 // If the input page_pix is nullptr, nullptr is returned.
329 Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const {
330  if (page_pix == nullptr)
331  return nullptr;
332  int page_width = pixGetWidth(page_pix);
333  int page_height = pixGetHeight(page_pix);
334  TBOX padded_box = bounding_box();
335  padded_box.pad(padding, padding);
336  // Clip the padded_box to the limits of the page
337  TBOX page_box(0, 0, page_width, page_height);
338  padded_box &= page_box;
339  Box* box = boxCreate(page_box.left(), page_height - page_box.top(),
340  page_box.width(), page_box.height());
341  Pix* sample_pix = pixClipRectangle(page_pix, box, nullptr);
342  boxDestroy(&box);
343  return sample_pix;
344 }
345 
346 } // namespace tesseract
MFCount
Definition: mf.h:43
ScrollView
Definition: scrollview.h:97
tesseract::TrainingSample::features
const INT_FEATURE_STRUCT * features() const
Definition: trainingsample.h:143
tesseract::TrainingSample::Serialize
bool Serialize(FILE *fp) const
Definition: trainingsample.cpp:55
tesseract::TrainingSample::MapFeatures
void MapFeatures(const IntFeatureMap &feature_map)
Definition: trainingsample.cpp:286
tesseract::TrainingSample::indexed_features
const GenericVector< int > & indexed_features() const
Definition: trainingsample.h:186
tesseract::TrainingSample::RenderToPix
Pix * RenderToPix(const UNICHARSET *unicharset) const
Definition: trainingsample.cpp:296
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::IntFeatureSpace::IndexAndSortFeatures
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const
Definition: intfeaturespace.cpp:68
INT_FX_RESULT_STRUCT
Definition: intfx.h:34
IntY
Definition: picofeat.h:45
INT_FEATURE_STRUCT::Theta
uint8_t Theta
Definition: intproto.h:141
tesseract::TrainingSample::GetSamplePix
Pix * GetSamplePix(int padding, Pix *page_pix) const
Definition: trainingsample.cpp:329
CHAR_DESC_STRUCT::FeatureSets
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:40
TBOX::top
int16_t top() const
Definition: rect.h:57
FEATURE_STRUCT
Definition: ocrfeatures.h:58
GeoWidth
Definition: picofeat.h:38
MF_SCALE_FACTOR
const float MF_SCALE_FACTOR
Definition: mfoutline.h:70
kIntFeatureType
const char *const kIntFeatureType
Definition: featdefs.cpp:33
tesseract::TrainingSample::num_features
uint32_t num_features() const
Definition: trainingsample.h:140
tesseract::kRandomizingCenter
const int kRandomizingCenter
Definition: trainingsample.cpp:36
TBOX::height
int16_t height() const
Definition: rect.h:107
IntX
Definition: picofeat.h:44
IntDir
Definition: picofeat.h:46
tesseract::TrainingSample::Copy
TrainingSample * Copy() const
Definition: trainingsample.cpp:182
tesseract::TrainingSample::~TrainingSample
~TrainingSample()
Definition: trainingsample.cpp:46
tesseract::TrainingSample::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: trainingsample.cpp:88
INT_FX_RESULT_STRUCT::Ry
int16_t Ry
Definition: intfx.h:37
INT_FX_RESULT_STRUCT::Ymean
int16_t Ymean
Definition: intfx.h:36
tesseract::TrainingSample::DeSerializeCreate
static TrainingSample * DeSerializeCreate(bool swap, FILE *fp)
Definition: trainingsample.cpp:79
INT_FX_RESULT_STRUCT::Rx
int16_t Rx
Definition: intfx.h:37
TBOX::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: rect.cpp:186
TBOX::Serialize
bool Serialize(FILE *fp) const
Definition: rect.cpp:179
CharNormLength
Definition: normfeat.h:29
tesseract::TrainingSample::GetCNFeature
FEATURE_STRUCT * GetCNFeature() const
Definition: trainingsample.cpp:153
trainingsample.h
LENGTH_COMPRESSION
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
tesseract::TrainingSample::RandomizedCopy
TrainingSample * RandomizedCopy(int index) const
Definition: trainingsample.cpp:163
normfeat.h
shapetable.h
TBOX::width
int16_t width() const
Definition: rect.h:114
UNICHARSET
Definition: unicharset.h:145
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
FEATURE_SET_STRUCT::Features
FEATURE Features[1]
Definition: ocrfeatures.h:67
INT_FEATURE_STRUCT::Y
uint8_t Y
Definition: intproto.h:140
helpers.h
tesseract
Definition: baseapi.h:65
FEATURE_STRUCT::Params
float Params[1]
Definition: ocrfeatures.h:60
kMicroFeatureType
const char *const kMicroFeatureType
Definition: featdefs.cpp:31
NewFeature
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:77
sample
Definition: cluster.h:31
GenericVector< int >
FEATURE_SET_STRUCT
Definition: ocrfeatures.h:64
tesseract::IntFeatureSpace
Definition: intfeaturespace.h:38
tesseract::TrainingSample::DisplayFeatures
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const
Definition: trainingsample.cpp:316
tesseract::IntFeatureMap::feature_space
const IntFeatureSpace & feature_space() const
Definition: intfeaturemap.h:60
CHAR_DESC_STRUCT
Definition: featdefs.h:38
GeoTop
Definition: picofeat.h:37
tesseract::TrainingSample::bounding_box
const TBOX & bounding_box() const
Definition: trainingsample.h:134
CharNormDesc
const FEATURE_DESC_STRUCT CharNormDesc
GeoCount
Definition: picofeat.h:40
INT_FEATURE_STRUCT::CP_misses
int8_t CP_misses
Definition: intproto.h:142
TBOX::pad
void pad(int xpad, int ypad)
Definition: rect.h:130
INT_FEATURE_STRUCT
Definition: intproto.h:131
tesseract::TrainingSample
Definition: trainingsample.h:53
tesseract::IntFeatureMap
Definition: intfeaturemap.h:48
TBOX::left
int16_t left() const
Definition: rect.h:71
INT_FX_RESULT_STRUCT::Length
int32_t Length
Definition: intfx.h:35
GeoBottom
Definition: picofeat.h:36
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::TrainingSample::TrainingSample
TrainingSample()
Definition: trainingsample.h:55
MicroFeature
float MicroFeature[MFCount]
Definition: mf.h:32
ReverseN
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:183
UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
FEATURE_SET_STRUCT::NumFeatures
uint16_t NumFeatures
Definition: ocrfeatures.h:65
tesseract::IntFeatureMap::MapIndexedFeatures
int MapIndexedFeatures(const GenericVector< int > &index_features, GenericVector< int > *map_features) const
Definition: intfeaturemap.h:115
ScrollView::Color
Color
Definition: scrollview.h:100
INT_FEATURE_STRUCT::X
uint8_t X
Definition: intproto.h:139
tesseract::TrainingSample::CopyFromFeatures
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
Definition: trainingsample.cpp:125
tesseract::TrainingSample::IndexFeatures
void IndexFeatures(const IntFeatureSpace &feature_space)
Definition: trainingsample.cpp:276
CharNormY
Definition: normfeat.h:29
kIntFeatureExtent
const int kIntFeatureExtent
Definition: intfeaturespace.h:27
kBlnBaselineOffset
const int kBlnBaselineOffset
Definition: normalis.h:24
ELISTIZE
#define ELISTIZE(CLASSNAME)
Definition: elst.h:919
RenderIntFeature
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
Definition: intproto.cpp:1603
intfeaturemap.h
CharNormRx
Definition: normfeat.h:29
tesseract::TrainingSample::ExtractCharDesc
void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
Definition: trainingsample.cpp:205
TBOX
Definition: rect.h:33
CharNormRy
Definition: normfeat.h:29