18 #include "config_auto.h"
24 #include "allheaders.h"
41 const double TrainingSample::kScaleValues[kSampleScaleSize] = {
47 delete [] micro_features_;
55 if (fwrite(&class_id_,
sizeof(class_id_), 1, fp) != 1)
return false;
56 if (fwrite(&font_id_,
sizeof(font_id_), 1, fp) != 1)
return false;
57 if (fwrite(&page_num_,
sizeof(page_num_), 1, fp) != 1)
return false;
58 if (!bounding_box_.
Serialize(fp))
return false;
59 if (fwrite(&num_features_,
sizeof(num_features_), 1, fp) != 1)
return false;
60 if (fwrite(&num_micro_features_,
sizeof(num_micro_features_), 1, fp) != 1)
62 if (fwrite(&outline_length_,
sizeof(outline_length_), 1, fp) != 1)
64 if (static_cast<int>(fwrite(features_,
sizeof(*features_), num_features_, fp))
67 if (static_cast<int>(fwrite(micro_features_,
sizeof(*micro_features_),
69 fp)) != num_micro_features_)
71 if (fwrite(cn_feature_,
sizeof(*cn_feature_), kNumCNParams, fp) !=
72 kNumCNParams)
return false;
90 if (fread(&class_id_,
sizeof(class_id_), 1, fp) != 1)
return false;
91 if (fread(&font_id_,
sizeof(font_id_), 1, fp) != 1)
return false;
92 if (fread(&page_num_,
sizeof(page_num_), 1, fp) != 1)
return false;
93 if (!bounding_box_.
DeSerialize(swap, fp))
return false;
94 if (fread(&num_features_,
sizeof(num_features_), 1, fp) != 1)
return false;
95 if (fread(&num_micro_features_,
sizeof(num_micro_features_), 1, fp) != 1)
97 if (fread(&outline_length_,
sizeof(outline_length_), 1, fp) != 1)
100 ReverseN(&class_id_,
sizeof(class_id_));
101 ReverseN(&num_features_,
sizeof(num_features_));
102 ReverseN(&num_micro_features_,
sizeof(num_micro_features_));
103 ReverseN(&outline_length_,
sizeof(outline_length_));
107 if (static_cast<int>(fread(features_,
sizeof(*features_), num_features_, fp))
110 delete [] micro_features_;
111 micro_features_ =
new MicroFeature[num_micro_features_];
112 if (static_cast<int>(fread(micro_features_,
sizeof(*micro_features_),
114 fp)) != num_micro_features_)
116 if (fread(cn_feature_,
sizeof(*cn_feature_), kNumCNParams, fp) !=
117 kNumCNParams)
return false;
126 const TBOX& bounding_box,
132 sample->outline_length_ = fx_info.
Length;
133 memcpy(sample->features_, features, num_features *
sizeof(features[0]));
135 sample->geo_feature_[
GeoTop] = bounding_box.
top();
146 sample->features_are_indexed_ =
false;
147 sample->features_are_mapped_ =
false;
154 for (
int i = 0; i < kNumCNParams; ++i)
155 feature->
Params[i] = cn_feature_[i];
164 if (index >= 0 && index < kSampleRandomSize) {
166 int yshift = kYShiftValues[index / kSampleScaleSize];
167 double scaling = kScaleValues[index % kSampleScaleSize];
168 for (
int i = 0; i < num_features_; ++i) {
171 sample->features_[i].
X =
ClipToRange(static_cast<int>(result + 0.5), 0,
175 sample->features_[i].
Y =
ClipToRange(static_cast<int>(result + 0.5), 0,
185 sample->class_id_ = class_id_;
186 sample->font_id_ = font_id_;
187 sample->weight_ = weight_;
188 sample->sample_index_ = sample_index_;
189 sample->num_features_ = num_features_;
190 if (num_features_ > 0) {
192 memcpy(sample->features_, features_, num_features_ *
sizeof(features_[0]));
194 sample->num_micro_features_ = num_micro_features_;
195 if (num_micro_features_ > 0) {
196 sample->micro_features_ =
new MicroFeature[num_micro_features_];
197 memcpy(sample->micro_features_, micro_features_,
198 num_micro_features_ *
sizeof(micro_features_[0]));
200 memcpy(sample->cn_feature_, cn_feature_,
sizeof(*cn_feature_) * kNumCNParams);
201 memcpy(sample->geo_feature_, geo_feature_,
sizeof(*geo_feature_) *
GeoCount);
212 if (features_ !=
NULL)
delete [] features_;
214 if (char_features ==
NULL) {
215 tprintf(
"Error: no features to train on of type %s\n",
222 for (
int f = 0; f < num_features_; ++f) {
233 if (micro_features_ !=
NULL)
delete [] micro_features_;
234 char_features = char_desc->
FeatureSets[micro_type];
235 if (char_features ==
NULL) {
236 tprintf(
"Error: no features to train on of type %s\n",
238 num_micro_features_ = 0;
239 micro_features_ =
NULL;
242 micro_features_ =
new MicroFeature[num_micro_features_];
243 for (
int f = 0; f < num_micro_features_; ++f) {
244 for (
int d = 0; d <
MFCount; ++d) {
251 if (char_features ==
NULL) {
252 tprintf(
"Error: no CN feature to train on.\n");
263 if (char_features ==
NULL) {
264 tprintf(
"Error: no Geo feature to train on.\n");
271 features_are_indexed_ =
false;
272 features_are_mapped_ =
false;
281 features_are_indexed_ =
true;
282 features_are_mapped_ =
false;
292 features_are_indexed_ =
false;
293 features_are_mapped_ =
true;
299 for (
int f = 0; f < num_features_; ++f) {
300 int start_x = features_[f].
X;
302 double dx = cos((features_[f].Theta / 256.0) * 2.0 *
PI -
PI);
303 double dy = -sin((features_[f].Theta / 256.0) * 2.0 *
PI -
PI);
304 for (
int i = 0; i <= 5; ++i) {
305 int x =
static_cast<int>(start_x + dx * i);
306 int y =
static_cast<int>(start_y + dy * i);
307 if (x >= 0 && x < 256 && y >= 0 && y < 256)
308 pixSetPixel(pix, x, y, 1);
311 if (unicharset !=
NULL)
319 #ifndef GRAPHICS_DISABLED
320 for (
int f = 0; f < num_features_; ++f) {
323 #endif // GRAPHICS_DISABLED
331 if (page_pix ==
NULL)
333 int page_width = pixGetWidth(page_pix);
334 int page_height = pixGetHeight(page_pix);
336 padded_box.
pad(padding, padding);
338 TBOX page_box(0, 0, page_width, page_height);
339 padded_box &= page_box;
340 Box* box = boxCreate(page_box.
left(), page_height - page_box.
top(),
342 Pix* sample_pix = pixClipRectangle(page_pix, box,
NULL);
void IndexFeatures(const IntFeatureSpace &feature_space)
bool Serialize(FILE *fp) const
Pix * RenderToPix(const UNICHARSET *unicharset) const
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
const char * kIntFeatureType
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Pix * GetSamplePix(int padding, Pix *page_pix) const
void MapFeatures(const IntFeatureMap &feature_map)
TrainingSample * RandomizedCopy(int index) const
bool DeSerialize(bool swap, FILE *fp)
const IntFeatureSpace & feature_space() const
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void pad(int xpad, int ypad)
int MapIndexedFeatures(const GenericVector< int > &index_features, GenericVector< int > *map_features) const
const GenericVector< int > & indexed_features() const
#define LENGTH_COMPRESSION
FEATURE_STRUCT * GetCNFeature() const
const FEATURE_DESC_STRUCT CharNormDesc
bool DeSerialize(bool swap, FILE *fp)
const char *const id_to_unichar(UNICHAR_ID id) const
static TrainingSample * DeSerializeCreate(bool swap, FILE *fp)
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
const char * kMicroFeatureType
const int kBlnBaselineOffset
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
const int kIntFeatureExtent
void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
TrainingSample * Copy() const
void ReverseN(void *ptr, int num_bytes)
float MicroFeature[MFCount]
bool Serialize(FILE *fp) const
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const
const TBOX & bounding_box() const
const int kRandomizingCenter