tesseract  5.0.0-alpha-619-ge9db
picofeat.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: picofeat.c
3  ** Purpose: Definition of pico-features.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 /*----------------------------------------------------------------------------
18  Include Files and Type Defines
19 ----------------------------------------------------------------------------*/
20 #include "picofeat.h"
21 
22 #include "classify.h"
23 #include "featdefs.h"
24 #include "fpoint.h"
25 #include "mfoutline.h"
26 #include "ocrfeatures.h"
27 #include "params.h"
28 #include "trainingsample.h"
29 
30 #include <cmath>
31 #include <cstdio>
32 
33 /*---------------------------------------------------------------------------
34  Variables
35 ----------------------------------------------------------------------------*/
36 
37 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
38 
39 /*---------------------------------------------------------------------------
40  Private Function Prototypes
41 ----------------------------------------------------------------------------*/
43  FPOINT *End,
44  FEATURE_SET FeatureSet);
45 
46 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
47 
48 void NormalizePicoX(FEATURE_SET FeatureSet);
49 
50 /*----------------------------------------------------------------------------
51  Public Code
52 ----------------------------------------------------------------------------*/
53 /*---------------------------------------------------------------------------*/
54 namespace tesseract {
64  LIST Outlines;
65  LIST RemainingOutlines;
66  MFOUTLINE Outline;
67  FEATURE_SET FeatureSet;
68  float XScale, YScale;
69 
70  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
71  Outlines = ConvertBlob(Blob);
72  NormalizeOutlines(Outlines, &XScale, &YScale);
73  RemainingOutlines = Outlines;
74  iterate(RemainingOutlines) {
75  Outline = static_cast<MFOUTLINE>first_node (RemainingOutlines);
76  ConvertToPicoFeatures2(Outline, FeatureSet);
77  }
79  NormalizePicoX(FeatureSet);
80  FreeOutlines(Outlines);
81  return (FeatureSet);
82 
83 } /* ExtractPicoFeatures */
84 } // namespace tesseract
85 
86 /*----------------------------------------------------------------------------
87  Private Code
88 ----------------------------------------------------------------------------*/
89 /*---------------------------------------------------------------------------*/
103 void ConvertSegmentToPicoFeat(FPOINT *Start,
104  FPOINT *End,
105  FEATURE_SET FeatureSet) {
106  FEATURE Feature;
107  float Angle;
108  float Length;
109  int NumFeatures;
110  FPOINT Center;
111  FPOINT Delta;
112  int i;
113 
114  Angle = NormalizedAngleFrom (Start, End, 1.0);
115  Length = DistanceBetween (*Start, *End);
116  NumFeatures = static_cast<int>(floor (Length / classify_pico_feature_length + 0.5));
117  if (NumFeatures < 1)
118  NumFeatures = 1;
119 
120  /* compute vector for one pico feature */
121  Delta.x = XDelta (*Start, *End) / NumFeatures;
122  Delta.y = YDelta (*Start, *End) / NumFeatures;
123 
124  /* compute position of first pico feature */
125  Center.x = Start->x + Delta.x / 2.0;
126  Center.y = Start->y + Delta.y / 2.0;
127 
128  /* compute each pico feature in segment and add to feature set */
129  for (i = 0; i < NumFeatures; i++) {
130  Feature = NewFeature (&PicoFeatDesc);
131  Feature->Params[PicoFeatDir] = Angle;
132  Feature->Params[PicoFeatX] = Center.x;
133  Feature->Params[PicoFeatY] = Center.y;
134  AddFeature(FeatureSet, Feature);
135 
136  Center.x += Delta.x;
137  Center.y += Delta.y;
138  }
139 } /* ConvertSegmentToPicoFeat */
140 
141 
142 /*---------------------------------------------------------------------------*/
155 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
156  MFOUTLINE Next;
157  MFOUTLINE First;
158  MFOUTLINE Current;
159 
160  if (DegenerateOutline(Outline))
161  return;
162 
163  First = Outline;
164  Current = First;
165  Next = NextPointAfter(Current);
166  do {
167  /* note that an edge is hidden if the ending point of the edge is
168  marked as hidden. This situation happens because the order of
169  the outlines is reversed when they are converted from the old
170  format. In the old format, a hidden edge is marked by the
171  starting point for that edge. */
172  if (!(PointAt(Next)->Hidden))
173  ConvertSegmentToPicoFeat (&(PointAt(Current)->Point),
174  &(PointAt(Next)->Point), FeatureSet);
175 
176  Current = Next;
177  Next = NextPointAfter(Current);
178  }
179  while (Current != First);
180 
181 } /* ConvertToPicoFeatures2 */
182 
183 
184 /*---------------------------------------------------------------------------*/
193 void NormalizePicoX(FEATURE_SET FeatureSet) {
194  int i;
195  FEATURE Feature;
196  float Origin = 0.0;
197 
198  for (i = 0; i < FeatureSet->NumFeatures; i++) {
199  Feature = FeatureSet->Features[i];
200  Origin += Feature->Params[PicoFeatX];
201  }
202  Origin /= FeatureSet->NumFeatures;
203 
204  for (i = 0; i < FeatureSet->NumFeatures; i++) {
205  Feature = FeatureSet->Features[i];
206  Feature->Params[PicoFeatX] -= Origin;
207  }
208 } /* NormalizePicoX */
209 
210 namespace tesseract {
211 /*---------------------------------------------------------------------------*/
218  const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
219  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
222  blob, false, &local_fx_info, &bl_features);
223  if (sample == nullptr) return nullptr;
224 
225  uint32_t num_features = sample->num_features();
226  const INT_FEATURE_STRUCT* features = sample->features();
227  FEATURE_SET feature_set = NewFeatureSet(num_features);
228  for (uint32_t f = 0; f < num_features; ++f) {
229  FEATURE feature = NewFeature(&IntFeatDesc);
230 
231  feature->Params[IntX] = features[f].X;
232  feature->Params[IntY] = features[f].Y;
233  feature->Params[IntDir] = features[f].Theta;
234  AddFeature(feature_set, feature);
235  }
236  delete sample;
237 
238  return feature_set;
239 } /* ExtractIntCNFeatures */
240 
241 /*---------------------------------------------------------------------------*/
248  const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
249  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
252  blob, false, &local_fx_info, &bl_features);
253  if (sample == nullptr) return nullptr;
254 
255  FEATURE_SET feature_set = NewFeatureSet(1);
256  FEATURE feature = NewFeature(&IntFeatDesc);
257 
258  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
259  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
260  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
261  AddFeature(feature_set, feature);
262  delete sample;
263 
264  return feature_set;
265 } /* ExtractIntGeoFeatures */
266 
267 } // namespace tesseract.
picofeat.h
AddFeature
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:39
fpoint.h
tesseract::Classify::classify_norm_method
int classify_norm_method
Definition: classify.h:434
NormalizePicoX
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:192
tesseract::BlobToTrainingSample
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:75
first_node
#define first_node(l)
Definition: oldlist.h:84
FPOINT::y
float y
Definition: fpoint.h:44
list_rec
Definition: oldlist.h:73
baseline
Definition: mfoutline.h:62
INT_FX_RESULT_STRUCT
Definition: intfx.h:34
IntY
Definition: picofeat.h:45
mfoutline.h
params.h
INT_FEATURE_STRUCT::Theta
uint8_t Theta
Definition: intproto.h:141
NormalizedAngleFrom
float NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, float FullScale)
Definition: fpoint.cpp:43
FEATURE_STRUCT
Definition: ocrfeatures.h:58
FPOINT
Definition: fpoint.h:28
tesseract::Classify::NormalizeOutlines
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
Definition: mfoutline.cpp:275
ConvertToPicoFeatures2
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:154
PicoFeatY
Definition: picofeat.h:43
GeoWidth
Definition: picofeat.h:38
FPOINT::x
float x
Definition: fpoint.h:44
PicoFeatX
Definition: picofeat.h:43
tesseract::Classify::ExtractIntGeoFeatures
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:246
PicoFeatDir
Definition: picofeat.h:43
IntX
Definition: picofeat.h:44
IntDir
Definition: picofeat.h:46
ocrfeatures.h
DistanceBetween
float DistanceBetween(FPOINT A, FPOINT B)
Definition: fpoint.cpp:28
trainingsample.h
ConvertBlob
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:36
FEATURE_SET_STRUCT::Features
FEATURE Features[1]
Definition: ocrfeatures.h:67
YDelta
#define YDelta(A, B)
Definition: fpoint.h:38
INT_FEATURE_STRUCT::Y
uint8_t Y
Definition: intproto.h:140
tesseract
Definition: baseapi.h:65
classify_pico_feature_length
double classify_pico_feature_length
Definition: picofeat.cpp:36
FEATURE_STRUCT::Params
float Params[1]
Definition: ocrfeatures.h:60
double_VAR
#define double_VAR(name, val, comment)
Definition: params.h:309
tesseract::Classify::ExtractIntCNFeatures
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:216
NewFeature
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:77
sample
Definition: cluster.h:31
GenericVector
Definition: baseapi.h:40
FEATURE_SET_STRUCT
Definition: ocrfeatures.h:64
FreeOutlines
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:166
tesseract::Classify::ExtractPicoFeatures
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:62
GeoTop
Definition: picofeat.h:37
PicoFeatDesc
const TESS_API FEATURE_DESC_STRUCT PicoFeatDesc
NewFeatureSet
FEATURE_SET NewFeatureSet(int NumFeatures)
Definition: ocrfeatures.cpp:93
INT_FEATURE_STRUCT
Definition: intproto.h:131
TBLOB
Definition: blobs.h:282
tesseract::TrainingSample
Definition: trainingsample.h:53
featdefs.h
iterate
#define iterate(l)
Definition: oldlist.h:92
GeoBottom
Definition: picofeat.h:36
IntFeatDesc
const FEATURE_DESC_STRUCT IntFeatDesc
XDelta
#define XDelta(A, B)
Definition: fpoint.h:37
ConvertSegmentToPicoFeat
void ConvertSegmentToPicoFeat(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:102
classify.h
FEATURE_SET_STRUCT::NumFeatures
uint16_t NumFeatures
Definition: ocrfeatures.h:65
INT_FEATURE_STRUCT::X
uint8_t X
Definition: intproto.h:139
MAX_PICO_FEATURES
#define MAX_PICO_FEATURES
Definition: picofeat.h:45