18 #include "config_auto.h" 24 #include "allheaders.h" 41 const double TrainingSample::kScaleValues[kSampleScaleSize] = {
47 delete [] micro_features_;
55 if (fwrite(&class_id_,
sizeof(class_id_), 1, fp) != 1)
return false;
56 if (fwrite(&font_id_,
sizeof(font_id_), 1, fp) != 1)
return false;
57 if (fwrite(&page_num_,
sizeof(page_num_), 1, fp) != 1)
return false;
58 if (!bounding_box_.
Serialize(fp))
return false;
59 if (fwrite(&num_features_,
sizeof(num_features_), 1, fp) != 1)
return false;
60 if (fwrite(&num_micro_features_,
sizeof(num_micro_features_), 1, fp) != 1)
62 if (fwrite(&outline_length_,
sizeof(outline_length_), 1, fp) != 1)
64 if (fwrite(features_,
sizeof(*features_), num_features_, fp) != num_features_)
66 if (fwrite(micro_features_,
sizeof(*micro_features_), num_micro_features_,
67 fp) != num_micro_features_)
69 if (fwrite(cn_feature_,
sizeof(*cn_feature_), kNumCNParams, fp) !=
70 kNumCNParams)
return false;
88 if (fread(&class_id_,
sizeof(class_id_), 1, fp) != 1)
return false;
89 if (fread(&font_id_,
sizeof(font_id_), 1, fp) != 1)
return false;
90 if (fread(&page_num_,
sizeof(page_num_), 1, fp) != 1)
return false;
91 if (!bounding_box_.
DeSerialize(swap, fp))
return false;
92 if (fread(&num_features_,
sizeof(num_features_), 1, fp) != 1)
return false;
93 if (fread(&num_micro_features_,
sizeof(num_micro_features_), 1, fp) != 1)
95 if (fread(&outline_length_,
sizeof(outline_length_), 1, fp) != 1)
98 ReverseN(&class_id_,
sizeof(class_id_));
99 ReverseN(&num_features_,
sizeof(num_features_));
100 ReverseN(&num_micro_features_,
sizeof(num_micro_features_));
101 ReverseN(&outline_length_,
sizeof(outline_length_));
104 if (num_features_ > UINT16_MAX)
return false;
105 if (num_micro_features_ > UINT16_MAX)
return false;
108 if (fread(features_,
sizeof(*features_), num_features_, fp)
111 delete [] micro_features_;
112 micro_features_ =
new MicroFeature[num_micro_features_];
113 if (fread(micro_features_,
sizeof(*micro_features_), num_micro_features_,
114 fp) != num_micro_features_)
116 if (fread(cn_feature_,
sizeof(*cn_feature_), kNumCNParams, fp) !=
117 kNumCNParams)
return false;
126 const TBOX& bounding_box,
146 sample->features_are_indexed_ =
false;
147 sample->features_are_mapped_ =
false;
154 for (
int i = 0; i < kNumCNParams; ++i)
155 feature->
Params[i] = cn_feature_[i];
164 if (index >= 0 && index < kSampleRandomSize) {
166 const int yshift = kYShiftValues[index / kSampleScaleSize];
167 double scaling = kScaleValues[index % kSampleScaleSize];
168 for (uint32_t i = 0; i < num_features_; ++i) {
171 sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
174 sample->features_[i].Y = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
183 sample->class_id_ = class_id_;
184 sample->font_id_ = font_id_;
185 sample->weight_ = weight_;
186 sample->sample_index_ = sample_index_;
187 sample->num_features_ = num_features_;
188 if (num_features_ > 0) {
190 memcpy(
sample->features_, features_, num_features_ *
sizeof(features_[0]));
192 sample->num_micro_features_ = num_micro_features_;
193 if (num_micro_features_ > 0) {
195 memcpy(
sample->micro_features_, micro_features_,
196 num_micro_features_ *
sizeof(micro_features_[0]));
198 memcpy(
sample->cn_feature_, cn_feature_,
sizeof(*cn_feature_) * kNumCNParams);
199 memcpy(
sample->geo_feature_, geo_feature_,
sizeof(*geo_feature_) *
GeoCount);
212 if (char_features ==
nullptr) {
213 tprintf(
"Error: no features to train on of type %s\n",
220 for (uint32_t f = 0; f < num_features_; ++f) {
231 delete[] micro_features_;
232 char_features = char_desc->
FeatureSets[micro_type];
233 if (char_features ==
nullptr) {
234 tprintf(
"Error: no features to train on of type %s\n",
236 num_micro_features_ = 0;
237 micro_features_ =
nullptr;
240 micro_features_ =
new MicroFeature[num_micro_features_];
241 for (uint32_t f = 0; f < num_micro_features_; ++f) {
242 for (
int d = 0; d <
MFCount; ++d) {
249 if (char_features ==
nullptr) {
250 tprintf(
"Error: no CN feature to train on.\n");
261 if (char_features ==
nullptr) {
262 tprintf(
"Error: no Geo feature to train on.\n");
269 features_are_indexed_ =
false;
270 features_are_mapped_ =
false;
279 features_are_indexed_ =
true;
280 features_are_mapped_ =
false;
290 features_are_indexed_ =
false;
291 features_are_mapped_ =
true;
297 for (uint32_t f = 0; f < num_features_; ++f) {
298 int start_x = features_[f].
X;
300 double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
301 double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
302 for (
int i = 0; i <= 5; ++i) {
303 int x =
static_cast<int>(start_x + dx * i);
304 int y =
static_cast<int>(start_y + dy * i);
305 if (x >= 0 && x < 256 && y >= 0 && y < 256)
306 pixSetPixel(pix, x, y, 1);
309 if (unicharset !=
nullptr)
317 #ifndef GRAPHICS_DISABLED 318 for (uint32_t f = 0; f < num_features_; ++f) {
321 #endif // GRAPHICS_DISABLED 329 if (page_pix ==
nullptr)
331 int page_width = pixGetWidth(page_pix);
332 int page_height = pixGetHeight(page_pix);
334 padded_box.
pad(padding, padding);
336 TBOX page_box(0, 0, page_width, page_height);
337 padded_box &= page_box;
338 Box* box = boxCreate(page_box.
left(), page_height - page_box.
top(),
340 Pix* sample_pix = pixClipRectangle(page_pix, box,
nullptr);
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const
const IntFeatureSpace & feature_space() const
const int kIntFeatureExtent
const int kRandomizingCenter
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
const int kBlnBaselineOffset
TrainingSample * Copy() const
bool DeSerialize(bool swap, FILE *fp)
void ReverseN(void *ptr, int num_bytes)
void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
static TrainingSample * DeSerializeCreate(bool swap, FILE *fp)
Pix * GetSamplePix(int padding, Pix *page_pix) const
#define LENGTH_COMPRESSION
const TBOX & bounding_box() const
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
const INT_FEATURE_STRUCT * features() const
#define ELISTIZE(CLASSNAME)
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
bool Serialize(FILE *fp) const
uint32_t num_features() const
DLLSYM void tprintf(const char *format,...)
float MicroFeature[MFCount]
const char * kMicroFeatureType
bool Serialize(FILE *fp) const
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
FEATURE_STRUCT * GetCNFeature() const
void MapFeatures(const IntFeatureMap &feature_map)
const FEATURE_DESC_STRUCT CharNormDesc
const char * id_to_unichar(UNICHAR_ID id) const
Pix * RenderToPix(const UNICHARSET *unicharset) const
bool DeSerialize(bool swap, FILE *fp)
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const
void IndexFeatures(const IntFeatureSpace &feature_space)
int MapIndexedFeatures(const GenericVector< int > &index_features, GenericVector< int > *map_features) const
const char * kIntFeatureType
const GenericVector< int > & indexed_features() const
void pad(int xpad, int ypad)
TrainingSample * RandomizedCopy(int index) const