All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
intfx.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intfx.c
3  ** Purpose: Integer character normalization & feature extraction
4  ** Author: Robert Moss, rays@google.com (Ray Smith)
5  ** History: Tue May 21 15:51:57 MDT 1991, RWM, Created.
6  ** Tue Feb 28 10:42:00 PST 2012, vastly rewritten to allow
7  greyscale fx and non-linear
8  normalization.
9  **
10  ** (c) Copyright Hewlett-Packard Company, 1988.
11  ** Licensed under the Apache License, Version 2.0 (the "License");
12  ** you may not use this file except in compliance with the License.
13  ** You may obtain a copy of the License at
14  ** http://www.apache.org/licenses/LICENSE-2.0
15  ** Unless required by applicable law or agreed to in writing, software
16  ** distributed under the License is distributed on an "AS IS" BASIS,
17  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  ** See the License for the specific language governing permissions and
19  ** limitations under the License.
20  ******************************************************************************/
24 #include "intfx.h"
25 #include "allheaders.h"
26 #include "ccutil.h"
27 #include "classify.h"
28 #include "const.h"
29 #include "helpers.h"
30 #include "intmatcher.h"
31 #include "linlsq.h"
32 #include "ndminx.h"
33 #include "normalis.h"
34 #include "statistc.h"
35 #include "trainingsample.h"
36 
38 
42 // Look up table for cos and sin to turn the intfx feature angle to a vector.
43 // Protected by atan_table_mutex.
44 // The entries are in binary degrees where a full circle is 256 binary degrees.
45 static float cos_table[INT_CHAR_NORM_RANGE];
46 static float sin_table[INT_CHAR_NORM_RANGE];
47 // Guards write access to AtanTable so we dont create it more than once.
49 
50 
54 /*---------------------------------------------------------------------------*/
55 void InitIntegerFX() {
56  static bool atan_table_init = false;
57  atan_table_mutex.Lock();
58  if (!atan_table_init) {
59  for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) {
60  cos_table[i] = cos(i * 2 * PI / INT_CHAR_NORM_RANGE + PI);
61  sin_table[i] = sin(i * 2 * PI / INT_CHAR_NORM_RANGE + PI);
62  }
63  atan_table_init = true;
64  }
65  atan_table_mutex.Unlock();
66 }
67 
68 // Returns a vector representing the direction of a feature with the given
69 // theta direction in an INT_FEATURE_STRUCT.
71  return FCOORD(cos_table[theta], sin_table[theta]);
72 }
73 
74 namespace tesseract {
75 
76 // Generates a TrainingSample from a TBLOB. Extracts features and sets
77 // the bounding box, so classifiers that operate on the image can work.
78 // TODO(rays) Make BlobToTrainingSample a member of Classify now that
79 // the FlexFx and FeatureDescription code have been removed and LearnBlob
80 // is now a member of Classify.
82  const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
83  GenericVector<INT_FEATURE_STRUCT>* bl_features) {
85  Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
86  &cn_features, fx_info, NULL);
87  // TODO(rays) Use blob->PreciseBoundingBox() instead.
88  TBOX box = blob.bounding_box();
90  int num_features = fx_info->NumCN;
91  if (num_features > 0) {
92  sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
93  num_features);
94  }
95  if (sample != NULL) {
96  // Set the bounding box (in original image coordinates) in the sample.
97  TPOINT topleft, botright;
98  topleft.x = box.left();
99  topleft.y = box.top();
100  botright.x = box.right();
101  botright.y = box.bottom();
102  TPOINT original_topleft, original_botright;
103  blob.denorm().DenormTransform(NULL, topleft, &original_topleft);
104  blob.denorm().DenormTransform(NULL, botright, &original_botright);
105  sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y,
106  original_botright.x, original_topleft.y));
107  }
108  return sample;
109 }
110 
111 // Computes the DENORMS for bl(baseline) and cn(character) normalization
112 // during feature extraction. The input denorm describes the current state
113 // of the blob, which is usually a baseline-normalized word.
114 // The Transforms setup are as follows:
115 // Baseline Normalized (bl) Output:
116 // We center the grapheme by aligning the x-coordinate of its centroid with
117 // x=128 and leaving the already-baseline-normalized y as-is.
118 //
119 // Character Normalized (cn) Output:
120 // We align the grapheme's centroid at the origin and scale it
121 // asymmetrically in x and y so that the 2nd moments are a standard value
122 // (51.2) ie the result is vaguely square.
123 // If classify_nonlinear_norm is true:
124 // A non-linear normalization is setup that attempts to evenly distribute
125 // edges across x and y.
126 //
127 // Some of the fields of fx_info are also setup:
128 // Length: Total length of outline.
129 // Rx: Rounded y second moment. (Reversed by convention.)
130 // Ry: rounded x second moment.
131 // Xmean: Rounded x center of mass of the blob.
132 // Ymean: Rounded y center of mass of the blob.
133 void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm,
134  DENORM* bl_denorm, DENORM* cn_denorm,
135  INT_FX_RESULT_STRUCT* fx_info) {
136  // Compute 1st and 2nd moments of the original outline.
137  FCOORD center, second_moments;
138  int length = blob.ComputeMoments(&center, &second_moments);
139  if (fx_info != NULL) {
140  fx_info->Length = length;
141  fx_info->Rx = IntCastRounded(second_moments.y());
142  fx_info->Ry = IntCastRounded(second_moments.x());
143 
144  fx_info->Xmean = IntCastRounded(center.x());
145  fx_info->Ymean = IntCastRounded(center.y());
146  }
147  // Setup the denorm for Baseline normalization.
148  bl_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), center.x(), 128.0f,
149  1.0f, 1.0f, 128.0f, 128.0f);
150  // Setup the denorm for character normalization.
151  if (nonlinear_norm) {
154  TBOX box;
155  blob.GetPreciseBoundingBox(&box);
156  box.pad(1, 1);
157  blob.GetEdgeCoords(box, &x_coords, &y_coords);
158  cn_denorm->SetupNonLinear(&blob.denorm(), box, MAX_UINT8, MAX_UINT8,
159  0.0f, 0.0f, x_coords, y_coords);
160  } else {
161  cn_denorm->SetupNormalization(NULL, NULL, &blob.denorm(),
162  center.x(), center.y(),
163  51.2f / second_moments.x(),
164  51.2f / second_moments.y(),
165  128.0f, 128.0f);
166  }
167 }
168 
169 // Helper normalizes the direction, assuming that it is at the given
170 // unnormed_pos, using the given denorm, starting at the root_denorm.
171 uinT8 NormalizeDirection(uinT8 dir, const FCOORD& unnormed_pos,
172  const DENORM& denorm, const DENORM* root_denorm) {
173  // Convert direction to a vector.
174  FCOORD unnormed_end;
175  unnormed_end.from_direction(dir);
176  unnormed_end += unnormed_pos;
177  FCOORD normed_pos, normed_end;
178  denorm.NormTransform(root_denorm, unnormed_pos, &normed_pos);
179  denorm.NormTransform(root_denorm, unnormed_end, &normed_end);
180  normed_end -= normed_pos;
181  return normed_end.to_direction();
182 }
183 
184 // Helper returns the mean direction vector from the given stats. Use the
185 // mean direction from dirs if there is information available, otherwise, use
186 // the fit_vector from point_diffs.
187 static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs,
188  const FCOORD& start_pt,
189  const FCOORD& end_pt) {
190  FCOORD fit_vector;
191  if (dirs.count() > 0) {
192  // There were directions, so use them. To avoid wrap-around problems, we
193  // have 2 accumulators in dirs: x for normal directions and y for
194  // directions offset by 128. We will use the one with the least variance.
195  FCOORD mean_pt = dirs.mean_point();
196  double mean_dir = 0.0;
197  if (dirs.x_variance() <= dirs.y_variance()) {
198  mean_dir = mean_pt.x();
199  } else {
200  mean_dir = mean_pt.y() + 128;
201  }
202  fit_vector.from_direction(Modulo(IntCastRounded(mean_dir), 256));
203  } else {
204  // There were no directions, so we rely on the vector_fit to the points.
205  // Since the vector_fit is 180 degrees ambiguous, we align with the
206  // supplied feature_dir by making the scalar product non-negative.
207  FCOORD feature_dir(end_pt - start_pt);
208  fit_vector = point_diffs.vector_fit();
209  if (fit_vector.x() == 0.0f && fit_vector.y() == 0.0f) {
210  // There was only a single point. Use feature_dir directly.
211  fit_vector = feature_dir;
212  } else {
213  // Sometimes the least mean squares fit is wrong, due to the small sample
214  // of points and scaling. Use a 90 degree rotated vector if that matches
215  // feature_dir better.
216  FCOORD fit_vector2 = !fit_vector;
217  // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by
218  // insisting that the scalar product with the feature_dir should be +ve.
219  if (fit_vector % feature_dir < 0.0)
220  fit_vector = -fit_vector;
221  if (fit_vector2 % feature_dir < 0.0)
222  fit_vector2 = -fit_vector2;
223  // Even though fit_vector2 has a higher mean squared error, it might be
224  // a better fit, so use it if the dot product with feature_dir is bigger.
225  if (fit_vector2 % feature_dir > fit_vector % feature_dir)
226  fit_vector = fit_vector2;
227  }
228  }
229  return fit_vector;
230 }
231 
232 // Helper computes one or more features corresponding to the given points.
233 // Emitted features are on the line defined by:
234 // start_pt + lambda * (end_pt - start_pt) for scalar lambda.
235 // Features are spaced at feature_length intervals.
236 static int ComputeFeatures(const FCOORD& start_pt, const FCOORD& end_pt,
237  double feature_length,
239  FCOORD feature_vector(end_pt - start_pt);
240  if (feature_vector.x() == 0.0f && feature_vector.y() == 0.0f) return 0;
241  // Compute theta for the feature based on its direction.
242  uinT8 theta = feature_vector.to_direction();
243  // Compute the number of features and lambda_step.
244  double target_length = feature_vector.length();
245  int num_features = IntCastRounded(target_length / feature_length);
246  if (num_features == 0) return 0;
247  // Divide the length evenly into num_features pieces.
248  double lambda_step = 1.0 / num_features;
249  double lambda = lambda_step / 2.0;
250  for (int f = 0; f < num_features; ++f, lambda += lambda_step) {
251  FCOORD feature_pt(start_pt);
252  feature_pt += feature_vector * lambda;
253  INT_FEATURE_STRUCT feature(feature_pt, theta);
254  features->push_back(feature);
255  }
256  return num_features;
257 }
258 
259 // Gathers outline points and their directions from start_index into dirs by
260 // stepping along the outline and normalizing the coordinates until the
261 // required feature_length has been collected or end_index is reached.
262 // On input pos must point to the position corresponding to start_index and on
263 // return pos is updated to the current raw position, and pos_normed is set to
264 // the normed version of pos.
265 // Since directions wrap-around, they need special treatment to get the mean.
266 // Provided the cluster of directions doesn't straddle the wrap-around point,
267 // the simple mean works. If they do, then, unless the directions are wildly
268 // varying, the cluster rotated by 180 degrees will not straddle the wrap-
269 // around point, so mean(dir + 180 degrees) - 180 degrees will work. Since
270 // LLSQ conveniently stores the mean of 2 variables, we use it to store
271 // dir and dir+128 (128 is 180 degrees) and then use the resulting mean
272 // with the least variance.
273 static int GatherPoints(const C_OUTLINE* outline, double feature_length,
274  const DENORM& denorm, const DENORM* root_denorm,
275  int start_index, int end_index,
276  ICOORD* pos, FCOORD* pos_normed,
277  LLSQ* points, LLSQ* dirs) {
278  int step_length = outline->pathlength();
279  ICOORD step = outline->step(start_index % step_length);
280  // Prev_normed is the start point of this collection and will be set on the
281  // first iteration, and on later iterations used to determine the length
282  // that has been collected.
283  FCOORD prev_normed;
284  points->clear();
285  dirs->clear();
286  int num_points = 0;
287  int index;
288  for (index = start_index; index <= end_index; ++index, *pos += step) {
289  step = outline->step(index % step_length);
290  int edge_weight = outline->edge_strength_at_index(index % step_length);
291  if (edge_weight == 0) {
292  // This point has conflicting gradient and step direction, so ignore it.
293  continue;
294  }
295  // Get the sub-pixel precise location and normalize.
296  FCOORD f_pos = outline->sub_pixel_pos_at_index(*pos, index % step_length);
297  denorm.NormTransform(root_denorm, f_pos, pos_normed);
298  if (num_points == 0) {
299  // The start of this segment.
300  prev_normed = *pos_normed;
301  } else {
302  FCOORD offset = *pos_normed - prev_normed;
303  float length = offset.length();
304  if (length > feature_length) {
305  // We have gone far enough from the start. We will use this point in
306  // the next set so return what we have so far.
307  return index;
308  }
309  }
310  points->add(pos_normed->x(), pos_normed->y(), edge_weight);
311  int direction = outline->direction_at_index(index % step_length);
312  if (direction >= 0) {
313  direction = NormalizeDirection(direction, f_pos, denorm, root_denorm);
314  // Use both the direction and direction +128 so we are not trying to
315  // take the mean of something straddling the wrap-around point.
316  dirs->add(direction, Modulo(direction + 128, 256));
317  }
318  ++num_points;
319  }
320  return index;
321 }
322 
323 // Extracts Tesseract features and appends them to the features vector.
324 // Startpt to lastpt, inclusive, MUST have the same src_outline member,
325 // which may be NULL. The vector from lastpt to its next is included in
326 // the feature extraction. Hidden edges should be excluded by the caller.
327 // If force_poly is true, the features will be extracted from the polygonal
328 // approximation even if more accurate data is available.
329 static void ExtractFeaturesFromRun(
330  const EDGEPT* startpt, const EDGEPT* lastpt,
331  const DENORM& denorm, double feature_length, bool force_poly,
333  const EDGEPT* endpt = lastpt->next;
334  const C_OUTLINE* outline = startpt->src_outline;
335  if (outline != NULL && !force_poly) {
336  // Detailed information is available. We have to normalize only from
337  // the root_denorm to denorm.
338  const DENORM* root_denorm = denorm.RootDenorm();
339  int total_features = 0;
340  // Get the features from the outline.
341  int step_length = outline->pathlength();
342  int start_index = startpt->start_step;
343  // pos is the integer coordinates of the binary image steps.
344  ICOORD pos = outline->position_at_index(start_index);
345  // We use an end_index that allows us to use a positive increment, but that
346  // may be beyond the bounds of the outline steps/ due to wrap-around, to
347  // so we use % step_length everywhere, except for start_index.
348  int end_index = lastpt->start_step + lastpt->step_count;
349  if (end_index <= start_index)
350  end_index += step_length;
351  LLSQ prev_points;
352  LLSQ prev_dirs;
353  FCOORD prev_normed_pos = outline->sub_pixel_pos_at_index(pos, start_index);
354  denorm.NormTransform(root_denorm, prev_normed_pos, &prev_normed_pos);
355  LLSQ points;
356  LLSQ dirs;
357  FCOORD normed_pos;
358  int index = GatherPoints(outline, feature_length, denorm, root_denorm,
359  start_index, end_index, &pos, &normed_pos,
360  &points, &dirs);
361  while (index <= end_index) {
362  // At each iteration we nominally have 3 accumulated sets of points and
363  // dirs: prev_points/dirs, points/dirs, next_points/dirs and sum them
364  // into sum_points/dirs, but we don't necessarily get any features out,
365  // so if that is the case, we keep accumulating instead of rotating the
366  // accumulators.
367  LLSQ next_points;
368  LLSQ next_dirs;
369  FCOORD next_normed_pos;
370  index = GatherPoints(outline, feature_length, denorm, root_denorm,
371  index, end_index, &pos, &next_normed_pos,
372  &next_points, &next_dirs);
373  LLSQ sum_points(prev_points);
374  // TODO(rays) find out why it is better to use just dirs and next_dirs
375  // in sum_dirs, instead of using prev_dirs as well.
376  LLSQ sum_dirs(dirs);
377  sum_points.add(points);
378  sum_points.add(next_points);
379  sum_dirs.add(next_dirs);
380  bool made_features = false;
381  // If we have some points, we can try making some features.
382  if (sum_points.count() > 0) {
383  // We have gone far enough from the start. Make a feature and restart.
384  FCOORD fit_pt = sum_points.mean_point();
385  FCOORD fit_vector = MeanDirectionVector(sum_points, sum_dirs,
386  prev_normed_pos, normed_pos);
387  // The segment to which we fit features is the line passing through
388  // fit_pt in direction of fit_vector that starts nearest to
389  // prev_normed_pos and ends nearest to normed_pos.
390  FCOORD start_pos = prev_normed_pos.nearest_pt_on_line(fit_pt,
391  fit_vector);
392  FCOORD end_pos = normed_pos.nearest_pt_on_line(fit_pt, fit_vector);
393  // Possible correction to match the adjacent polygon segment.
394  if (total_features == 0 && startpt != endpt) {
395  FCOORD poly_pos(startpt->pos.x, startpt->pos.y);
396  denorm.LocalNormTransform(poly_pos, &start_pos);
397  }
398  if (index > end_index && startpt != endpt) {
399  FCOORD poly_pos(endpt->pos.x, endpt->pos.y);
400  denorm.LocalNormTransform(poly_pos, &end_pos);
401  }
402  int num_features = ComputeFeatures(start_pos, end_pos, feature_length,
403  features);
404  if (num_features > 0) {
405  // We made some features so shuffle the accumulators.
406  prev_points = points;
407  prev_dirs = dirs;
408  prev_normed_pos = normed_pos;
409  points = next_points;
410  dirs = next_dirs;
411  made_features = true;
412  total_features += num_features;
413  }
414  // The end of the next set becomes the end next time around.
415  normed_pos = next_normed_pos;
416  }
417  if (!made_features) {
418  // We didn't make any features, so keep the prev accumulators and
419  // add the next ones into the current.
420  points.add(next_points);
421  dirs.add(next_dirs);
422  }
423  }
424  } else {
425  // There is no outline, so we are forced to use the polygonal approximation.
426  const EDGEPT* pt = startpt;
427  do {
428  FCOORD start_pos(pt->pos.x, pt->pos.y);
429  FCOORD end_pos(pt->next->pos.x, pt->next->pos.y);
430  denorm.LocalNormTransform(start_pos, &start_pos);
431  denorm.LocalNormTransform(end_pos, &end_pos);
432  ComputeFeatures(start_pos, end_pos, feature_length, features);
433  } while ((pt = pt->next) != endpt);
434  }
435 }
436 
437 // Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as
438 // (x,y) position and angle as measured counterclockwise from the vector
439 // <-1, 0>, from blob using two normalizations defined by bl_denorm and
440 // cn_denorm. See SetpuBLCNDenorms for definitions.
441 // If outline_cn_counts is not NULL, on return it contains the cumulative
442 // number of cn features generated for each outline in the blob (in order).
443 // Thus after the first outline, there were (*outline_cn_counts)[0] features,
444 // after the second outline, there were (*outline_cn_counts)[1] features etc.
446  bool nonlinear_norm,
449  INT_FX_RESULT_STRUCT* results,
450  GenericVector<int>* outline_cn_counts) {
451  DENORM bl_denorm, cn_denorm;
452  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm,
453  &bl_denorm, &cn_denorm, results);
454  if (outline_cn_counts != NULL)
455  outline_cn_counts->truncate(0);
456  // Iterate the outlines.
457  for (TESSLINE* ol = blob.outlines; ol != NULL; ol = ol->next) {
458  // Iterate the polygon.
459  EDGEPT* loop_pt = ol->FindBestStartPt();
460  EDGEPT* pt = loop_pt;
461  if (pt == NULL) continue;
462  do {
463  if (pt->IsHidden()) continue;
464  // Find a run of equal src_outline.
465  EDGEPT* last_pt = pt;
466  do {
467  last_pt = last_pt->next;
468  } while (last_pt != loop_pt && !last_pt->IsHidden() &&
469  last_pt->src_outline == pt->src_outline);
470  last_pt = last_pt->prev;
471  // Until the adaptive classifier can be weaned off polygon segments,
472  // we have to force extraction from the polygon for the bl_features.
473  ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength,
474  true, bl_features);
475  ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength,
476  false, cn_features);
477  pt = last_pt;
478  } while ((pt = pt->next) != loop_pt);
479  if (outline_cn_counts != NULL)
480  outline_cn_counts->push_back(cn_features->size());
481  }
482  results->NumBL = bl_features->size();
483  results->NumCN = cn_features->size();
484  results->YBottom = blob.bounding_box().bottom();
485  results->YTop = blob.bounding_box().top();
486  results->Width = blob.bounding_box().width();
487 }
488 
489 } // namespace tesseract
490 
491 
492 /*--------------------------------------------------------------------------*/
493 // Extract a set of standard-sized features from Blobs and write them out in
494 // two formats: baseline normalized and character normalized.
495 //
496 // We presume the Blobs are already scaled so that x-height=128 units
497 //
498 // Standard Features:
499 // We take all outline segments longer than 7 units and chop them into
500 // standard-sized segments of approximately 13 = (64 / 5) units.
501 // When writing these features out, we output their center and angle as
502 // measured counterclockwise from the vector <-1, 0>
503 //
504 // Baseline Normalized Output:
505 // We center the grapheme by aligning the x-coordinate of its centroid with
506 // x=0 and subtracting 128 from the y-coordinate.
507 //
508 // Character Normalized Output:
509 // We align the grapheme's centroid at the origin and scale it asymmetrically
510 // in x and y so that the result is vaguely square.
511 //
512 // Deprecated! Prefer tesseract::Classify::ExtractFeatures instead.
513 bool ExtractIntFeat(const TBLOB& blob,
514  bool nonlinear_norm,
515  INT_FEATURE_ARRAY baseline_features,
516  INT_FEATURE_ARRAY charnorm_features,
517  INT_FX_RESULT_STRUCT* results) {
520  tesseract::Classify::ExtractFeatures(blob, nonlinear_norm,
521  &bl_features, &cn_features, results,
522  NULL);
523  if (bl_features.size() == 0 || cn_features.size() == 0 ||
524  bl_features.size() > MAX_NUM_INT_FEATURES ||
525  cn_features.size() > MAX_NUM_INT_FEATURES) {
526  return false; // Feature extraction failed.
527  }
528  memcpy(baseline_features, &bl_features[0],
529  bl_features.size() * sizeof(bl_features[0]));
530  memcpy(charnorm_features, &cn_features[0],
531  cn_features.size() * sizeof(cn_features[0]));
532  return true;
533 }
Definition: blobs.h:261
FCOORD nearest_pt_on_line(const FCOORD &line_point, const FCOORD &dir_vector) const
Definition: points.cpp:136
void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:305
int size() const
Definition: genericvector.h:72
void truncate(int size)
int direction_at_index(int index) const
Definition: coutln.h:176
void set_bounding_box(const TBOX &box)
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:133
void clear()
Definition: linlsq.cpp:33
tesseract::CCUtilMutex atan_table_mutex
Definition: intfx.cpp:48
float x() const
Definition: points.h:209
int push_back(T object)
int start_step
Definition: blobs.h:173
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
Definition: normalis.cpp:267
EDGEPT * prev
Definition: blobs.h:170
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
Definition: blobs.cpp:535
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:95
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
inT16 y
Definition: blobs.h:72
FCOORD FeatureDirection(uinT8 theta)
Definition: intfx.cpp:70
inT32 pathlength() const
Definition: coutln.h:133
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:389
int Modulo(int a, int b)
Definition: helpers.h:157
inT16 right() const
Definition: rect.h:75
Definition: linlsq.h:26
inT32 count() const
Definition: linlsq.h:41
void pad(int xpad, int ypad)
Definition: rect.h:127
double x_variance() const
Definition: linlsq.h:79
uinT8 to_direction() const
Definition: points.cpp:111
inT16 left() const
Definition: rect.h:68
void GetPreciseBoundingBox(TBOX *precise_box) const
Definition: blobs.cpp:554
FCOORD sub_pixel_pos_at_index(const ICOORD &pos, int index) const
Definition: coutln.h:161
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
Definition: blobs.h:50
double y_variance() const
Definition: linlsq.h:85
EDGEPT * next
Definition: blobs.h:169
float length() const
find length
Definition: points.h:230
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
void add(double x, double y)
Definition: linlsq.cpp:49
bool ExtractIntFeat(const TBLOB &blob, bool nonlinear_norm, INT_FEATURE_ARRAY baseline_features, INT_FEATURE_ARRAY charnorm_features, INT_FX_RESULT_STRUCT *results)
Definition: intfx.cpp:513
#define MAX_UINT8
Definition: host.h:121
inT16 x
Definition: blobs.h:71
bool IsHidden() const
Definition: blobs.h:153
const DENORM & denorm() const
Definition: blobs.h:340
int step_count
Definition: blobs.h:174
integer coordinate
Definition: points.h:30
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
inT16 bottom() const
Definition: rect.h:61
C_OUTLINE * src_outline
Definition: blobs.h:171
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
Definition: blobs.cpp:570
void from_direction(uinT8 direction)
Definition: points.cpp:115
inT16 width() const
Definition: rect.h:111
FCOORD mean_point() const
Definition: linlsq.cpp:167
TPOINT pos
Definition: blobs.h:163
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:334
void InitIntegerFX()
Definition: intfx.cpp:55
int edge_strength_at_index(int index) const
Definition: coutln.h:185
Definition: cluster.h:32
int IntCastRounded(double x)
Definition: helpers.h:172
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:445
Definition: blobs.h:76
#define PI
Definition: const.h:19
Definition: rect.h:30
float y() const
Definition: points.h:212
ICOORD step(int index) const
Definition: coutln.h:142
#define NULL
Definition: host.h:144
ICOORD position_at_index(int index) const
Definition: coutln.h:151
TBOX bounding_box() const
Definition: blobs.cpp:482
TESSLINE * outlines
Definition: blobs.h:377
uinT8 NormalizeDirection(uinT8 dir, const FCOORD &unnormed_pos, const DENORM &denorm, const DENORM *root_denorm)
Definition: intfx.cpp:171
const double kStandardFeatureLength
Definition: intfx.h:46
FCOORD vector_fit() const
Definition: linlsq.cpp:252
inT16 top() const
Definition: rect.h:54
const DENORM * RootDenorm() const
Definition: normalis.h:260
Definition: points.h:189
unsigned char uinT8
Definition: host.h:99