tesseract  4.0.0-1-g2a2b
clusttool.cpp File Reference
#include "clusttool.h"
#include "emalloc.h"
#include <cstdio>
#include <cmath>

Go to the source code of this file.

Macros

#define TOKENSIZE   80
 
#define QUOTED_TOKENSIZE   "79"
 
#define MAXSAMPLESIZE   65535
 

Functions

uint16_t ReadSampleSize (TFile *fp)
 
PARAM_DESCReadParamDesc (TFile *fp, uint16_t N)
 
PROTOTYPEReadPrototype (TFile *fp, uint16_t N)
 
float * ReadNFloats (TFile *fp, uint16_t N, float Buffer[])
 
void WriteParamDesc (FILE *File, uint16_t N, const PARAM_DESC ParamDesc[])
 
void WritePrototype (FILE *File, uint16_t N, PROTOTYPE *Proto)
 
void WriteNFloats (FILE *File, uint16_t N, float Array[])
 
void WriteProtoStyle (FILE *File, PROTOSTYLE ProtoStyle)
 
void WriteProtoList (FILE *File, uint16_t N, PARAM_DESC *ParamDesc, LIST ProtoList, bool WriteSigProtos, bool WriteInsigProtos)
 

Macro Definition Documentation

◆ MAXSAMPLESIZE

#define MAXSAMPLESIZE   65535

Definition at line 29 of file clusttool.cpp.

◆ QUOTED_TOKENSIZE

#define QUOTED_TOKENSIZE   "79"

Definition at line 28 of file clusttool.cpp.

◆ TOKENSIZE

#define TOKENSIZE   80

Definition at line 27 of file clusttool.cpp.

Function Documentation

◆ ReadNFloats()

float* ReadNFloats ( TFile fp,
uint16_t  N,
float  Buffer[] 
)

This routine reads N floats from the specified text file and places them into Buffer. If Buffer is nullptr, a buffer is created and passed back to the caller. If EOF is encountered before any floats can be read, nullptr is returned.

Parameters
fpopen text file to read floats from
Nnumber of floats to read
Bufferpointer to buffer to place floats into
Returns
Pointer to buffer holding floats or nullptr if EOF
Note
Globals: None

Definition at line 186 of file clusttool.cpp.

186  {
187  const int kMaxLineSize = 1024;
188  char line[kMaxLineSize];
189  if (fp->FGets(line, kMaxLineSize) == nullptr) {
190  tprintf("Hit EOF in ReadNFloats!\n");
191  return nullptr;
192  }
193  bool needs_free = false;
194 
195  if (Buffer == nullptr) {
196  Buffer = static_cast<float *>(Emalloc(N * sizeof(float)));
197  needs_free = true;
198  }
199 
200  char *startptr = line;
201  for (int i = 0; i < N; i++) {
202  char *endptr;
203  Buffer[i] = strtof(startptr, &endptr);
204  if (endptr == startptr) {
205  tprintf("Read of %d floats failed!\n", N);
206  if (needs_free) Efree(Buffer);
207  return nullptr;
208  }
209  startptr = endptr;
210  }
211  return Buffer;
212 }
void * Emalloc(int Size)
Definition: emalloc.cpp:31
void Efree(void *ptr)
Definition: emalloc.cpp:45
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:248
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

◆ ReadParamDesc()

PARAM_DESC* ReadParamDesc ( TFile fp,
uint16_t  N 
)

This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Parameters
fpopen text file to read N parameter descriptions from
Nnumber of parameter descriptions to read
Returns
Pointer to an array of parameter descriptors.
Note
Globals: None

Definition at line 61 of file clusttool.cpp.

61  {
62  PARAM_DESC *ParamDesc;
63  char linear_token[TOKENSIZE], essential_token[TOKENSIZE];
64 
65  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
66  for (int i = 0; i < N; i++) {
67  const int kMaxLineSize = TOKENSIZE * 4;
68  char line[kMaxLineSize];
69  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
70  ASSERT_HOST(sscanf(line,
71  "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %f %f",
72  linear_token, essential_token, &ParamDesc[i].Min,
73  &ParamDesc[i].Max) == 4);
74  if (linear_token[0] == 'c')
75  ParamDesc[i].Circular = TRUE;
76  else
77  ParamDesc[i].Circular = FALSE;
78 
79  if (linear_token[0] == 'e')
80  ParamDesc[i].NonEssential = FALSE;
81  else
82  ParamDesc[i].NonEssential = TRUE;
83  ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
84  ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
85  ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
86  }
87  return (ParamDesc);
88 }
float MidRange
Definition: ocrfeatures.h:50
int8_t Circular
Definition: ocrfeatures.h:44
float HalfRange
Definition: ocrfeatures.h:49
#define TRUE
Definition: capi.h:51
float Min
Definition: ocrfeatures.h:46
void * Emalloc(int Size)
Definition: emalloc.cpp:31
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:248
#define FALSE
Definition: capi.h:52
float Range
Definition: ocrfeatures.h:48
int8_t NonEssential
Definition: ocrfeatures.h:45
float Max
Definition: ocrfeatures.h:47
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:28
#define TOKENSIZE
Definition: clusttool.cpp:27
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ReadPrototype()

PROTOTYPE* ReadPrototype ( TFile fp,
uint16_t  N 
)

This routine reads a textual description of a prototype from the specified file.

Parameters
fpopen text file to read prototype from
Nnumber of dimensions used in prototype
Returns
List of prototypes
Note
Globals: None

Definition at line 99 of file clusttool.cpp.

99  {
100  char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
101  PROTOTYPE *Proto;
102  int SampleCount;
103  int i;
104 
105  const int kMaxLineSize = TOKENSIZE * 4;
106  char line[kMaxLineSize];
107  if (fp->FGets(line, kMaxLineSize) == nullptr ||
108  sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d",
109  sig_token, shape_token, &SampleCount) != 3) {
110  tprintf("Invalid prototype: %s\n", line);
111  return nullptr;
112  }
113  Proto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
114  Proto->Cluster = nullptr;
115  if (sig_token[0] == 's')
116  Proto->Significant = TRUE;
117  else
118  Proto->Significant = FALSE;
119 
120  switch (shape_token[0]) {
121  case 's':
122  Proto->Style = spherical;
123  break;
124  case 'e':
125  Proto->Style = elliptical;
126  break;
127  case 'a':
128  Proto->Style = automatic;
129  break;
130  default:
131  tprintf("Invalid prototype style specification:%s\n", shape_token);
132  Proto->Style = elliptical;
133  }
134 
135  ASSERT_HOST(SampleCount >= 0);
136  Proto->NumSamples = SampleCount;
137 
138  Proto->Mean = ReadNFloats(fp, N, nullptr);
139  ASSERT_HOST(Proto->Mean != nullptr);
140 
141  switch (Proto->Style) {
142  case spherical:
143  ASSERT_HOST(ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) != nullptr);
144  Proto->Magnitude.Spherical =
145  1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
146  Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, (float)N);
147  Proto->LogMagnitude = log((double)Proto->TotalMagnitude);
148  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
149  Proto->Distrib = nullptr;
150  break;
151  case elliptical:
152  Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr);
153  ASSERT_HOST(Proto->Variance.Elliptical != nullptr);
154  Proto->Magnitude.Elliptical = (float *)Emalloc(N * sizeof(float));
155  Proto->Weight.Elliptical = (float *)Emalloc(N * sizeof(float));
156  Proto->TotalMagnitude = 1.0;
157  for (i = 0; i < N; i++) {
158  Proto->Magnitude.Elliptical[i] =
159  1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]);
160  Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
161  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
162  }
163  Proto->LogMagnitude = log((double)Proto->TotalMagnitude);
164  Proto->Distrib = nullptr;
165  break;
166  default:
167  Efree(Proto);
168  tprintf("Invalid prototype style\n");
169  return nullptr;
170  }
171  return Proto;
172 }
float * Mean
Definition: cluster.h:78
#define TRUE
Definition: capi.h:51
void * Emalloc(int Size)
Definition: emalloc.cpp:31
void Efree(void *ptr)
Definition: emalloc.cpp:45
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:248
float TotalMagnitude
Definition: cluster.h:79
float Spherical
Definition: cluster.h:63
float * Elliptical
Definition: cluster.h:64
FLOATUNION Weight
Definition: cluster.h:83
DISTRIBUTION * Distrib
Definition: cluster.h:77
unsigned Style
Definition: cluster.h:74
#define FALSE
Definition: capi.h:52
unsigned Significant
Definition: cluster.h:68
float * ReadNFloats(TFile *fp, uint16_t N, float Buffer[])
Definition: clusttool.cpp:186
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
FLOATUNION Magnitude
Definition: cluster.h:82
CLUSTER * Cluster
Definition: cluster.h:76
unsigned NumSamples
Definition: cluster.h:75
float LogMagnitude
Definition: cluster.h:80
FLOATUNION Variance
Definition: cluster.h:81
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:28
#define TOKENSIZE
Definition: clusttool.cpp:27
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ReadSampleSize()

uint16_t ReadSampleSize ( TFile fp)

This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters
fpopen text file to read sample size from
Returns
Sample size
Note
Globals: None

Definition at line 41 of file clusttool.cpp.

41  {
42  int SampleSize = 0;
43 
44  const int kMaxLineSize = 100;
45  char line[kMaxLineSize];
46  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
47  ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
48  ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
49  return SampleSize;
50 }
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:248
#define MAXSAMPLESIZE
Definition: clusttool.cpp:29
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ WriteNFloats()

void WriteNFloats ( FILE *  File,
uint16_t  N,
float  Array[] 
)

This routine writes a text representation of N floats from an array to a file. All of the floats are placed on one line.

Parameters
Fileopen text file to write N floats to
Nnumber of floats to write
Arrayarray of floats to write
Returns
None
Note
Globals: None

Definition at line 298 of file clusttool.cpp.

298  {
299  for (int i = 0; i < N; i++)
300  fprintf(File, " %9.6f", Array[i]);
301  fprintf(File, "\n");
302 }

◆ WriteParamDesc()

void WriteParamDesc ( FILE *  File,
uint16_t  N,
const PARAM_DESC  ParamDesc[] 
)

This routine writes an array of dimension descriptors to the specified text file.

Parameters
Fileopen text file to write param descriptors to
Nnumber of param descriptors to write
ParamDescarray of param descriptors to write
Returns
None
Note
Globals: None

Definition at line 223 of file clusttool.cpp.

223  {
224  int i;
225 
226  for (i = 0; i < N; i++) {
227  if (ParamDesc[i].Circular)
228  fprintf (File, "circular ");
229  else
230  fprintf (File, "linear ");
231 
232  if (ParamDesc[i].NonEssential)
233  fprintf (File, "non-essential ");
234  else
235  fprintf (File, "essential ");
236 
237  fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
238  }
239 }

◆ WriteProtoList()

void WriteProtoList ( FILE *  File,
uint16_t  N,
PARAM_DESC ParamDesc,
LIST  ProtoList,
bool  WriteSigProtos,
bool  WriteInsigProtos 
)

This routine writes a textual description of each prototype in the prototype list to the specified file. It also writes a file header which includes the number of dimensions in feature space and the descriptions for each dimension.

Parameters
Fileopen text file to write prototypes to
Nnumber of dimensions in feature space
ParamDescdescriptions for each dimension
ProtoListlist of prototypes to be written
WriteSigProtosTRUE to write out significant prototypes
WriteInsigProtosTRUE to write out insignificants
Note
Globals: None
Returns
None

Definition at line 345 of file clusttool.cpp.

347  {
348  PROTOTYPE *Proto;
349 
350  /* write file header */
351  fprintf(File,"%0d\n",N);
352  WriteParamDesc(File,N,ParamDesc);
353 
354  /* write prototypes */
355  iterate(ProtoList)
356  {
357  Proto = (PROTOTYPE *) first_node (ProtoList);
358  if ((Proto->Significant && WriteSigProtos) ||
359  (!Proto->Significant && WriteInsigProtos))
360  WritePrototype(File, N, Proto);
361  }
362 }
void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto)
Definition: clusttool.cpp:250
unsigned Significant
Definition: cluster.h:68
void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[])
Definition: clusttool.cpp:223
#define first_node(l)
Definition: oldlist.h:141
#define iterate(l)
Definition: oldlist.h:161

◆ WriteProtoStyle()

void WriteProtoStyle ( FILE *  File,
PROTOSTYLE  ProtoStyle 
)

This routine writes to the specified text file a word which represents the ProtoStyle. It does not append a carriage return to the end.

Parameters
Fileopen text file to write prototype style to
ProtoStyleprototype style to write
Returns
None
Note
Globals: None

Definition at line 313 of file clusttool.cpp.

313  {
314  switch (ProtoStyle) {
315  case spherical:
316  fprintf (File, "spherical");
317  break;
318  case elliptical:
319  fprintf (File, "elliptical");
320  break;
321  case mixed:
322  fprintf (File, "mixed");
323  break;
324  case automatic:
325  fprintf (File, "automatic");
326  break;
327  }
328 }
Definition: cluster.h:45

◆ WritePrototype()

void WritePrototype ( FILE *  File,
uint16_t  N,
PROTOTYPE Proto 
)

This routine writes a textual description of a prototype to the specified text file.

Parameters
Fileopen text file to write prototype to
Nnumber of dimensions in feature space
Protoprototype to write out
Returns
None
Note
Globals: None

Definition at line 250 of file clusttool.cpp.

250  {
251  int i;
252 
253  if (Proto->Significant)
254  fprintf (File, "significant ");
255  else
256  fprintf (File, "insignificant ");
257  WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
258  fprintf (File, "%6d\n\t", Proto->NumSamples);
259  WriteNFloats (File, N, Proto->Mean);
260  fprintf (File, "\t");
261 
262  switch (Proto->Style) {
263  case spherical:
264  WriteNFloats (File, 1, &(Proto->Variance.Spherical));
265  break;
266  case elliptical:
267  WriteNFloats (File, N, Proto->Variance.Elliptical);
268  break;
269  case mixed:
270  for (i = 0; i < N; i++)
271  switch (Proto->Distrib[i]) {
272  case normal:
273  fprintf (File, " %9s", "normal");
274  break;
275  case uniform:
276  fprintf (File, " %9s", "uniform");
277  break;
278  case D_random:
279  fprintf (File, " %9s", "random");
280  break;
281  case DISTRIBUTION_COUNT:
282  ASSERT_HOST(!"Distribution count not allowed!");
283  }
284  fprintf (File, "\n\t");
285  WriteNFloats (File, N, Proto->Variance.Elliptical);
286  }
287 }
float * Mean
Definition: cluster.h:78
float Spherical
Definition: cluster.h:63
float * Elliptical
Definition: cluster.h:64
PROTOSTYLE
Definition: cluster.h:44
DISTRIBUTION * Distrib
Definition: cluster.h:77
unsigned Style
Definition: cluster.h:74
unsigned Significant
Definition: cluster.h:68
Definition: cluster.h:59
unsigned NumSamples
Definition: cluster.h:75
void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle)
Definition: clusttool.cpp:313
void WriteNFloats(FILE *File, uint16_t N, float Array[])
Definition: clusttool.cpp:298
FLOATUNION Variance
Definition: cluster.h:81
#define ASSERT_HOST(x)
Definition: errcode.h:84
Definition: cluster.h:45