tesseract  5.0.0-alpha-619-ge9db
clusttool.cpp File Reference
#include "clusttool.h"
#include <cmath>
#include <locale>
#include <sstream>
#include "emalloc.h"

Go to the source code of this file.

Macros

#define _USE_MATH_DEFINES
 
#define TOKENSIZE   80
 max size of tokens read from an input file More...
 
#define QUOTED_TOKENSIZE   "79"
 
#define MAXSAMPLESIZE   65535
 max num of dimensions in feature space More...
 

Functions

uint16_t ReadSampleSize (TFile *fp)
 
PARAM_DESCReadParamDesc (TFile *fp, uint16_t N)
 
PROTOTYPEReadPrototype (TFile *fp, uint16_t N)
 
void WriteParamDesc (FILE *File, uint16_t N, const PARAM_DESC ParamDesc[])
 
void WritePrototype (FILE *File, uint16_t N, PROTOTYPE *Proto)
 

Macro Definition Documentation

◆ _USE_MATH_DEFINES

#define _USE_MATH_DEFINES

Definition at line 18 of file clusttool.cpp.

◆ MAXSAMPLESIZE

#define MAXSAMPLESIZE   65535

max num of dimensions in feature space

Definition at line 31 of file clusttool.cpp.

◆ QUOTED_TOKENSIZE

#define QUOTED_TOKENSIZE   "79"

Definition at line 30 of file clusttool.cpp.

◆ TOKENSIZE

#define TOKENSIZE   80

max size of tokens read from an input file

Definition at line 29 of file clusttool.cpp.

Function Documentation

◆ ReadParamDesc()

PARAM_DESC* ReadParamDesc ( TFile fp,
uint16_t  N 
)

This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Parameters
fpopen text file to read N parameter descriptions from
Nnumber of parameter descriptions to read
Returns
Pointer to an array of parameter descriptors.
Note
Globals: None

Definition at line 140 of file clusttool.cpp.

140  {
141  PARAM_DESC *ParamDesc;
142 
143  ParamDesc = static_cast<PARAM_DESC *>(Emalloc (N * sizeof (PARAM_DESC)));
144  for (int i = 0; i < N; i++) {
145  const int kMaxLineSize = TOKENSIZE * 4;
146  char line[kMaxLineSize];
147  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
148  std::istringstream stream(line);
149  // Use "C" locale (needed for float values Min, Max).
150  stream.imbue(std::locale::classic());
151  std::string linear_token;
152  stream >> linear_token;
153  std::string essential_token;
154  stream >> essential_token;
155  stream >> ParamDesc[i].Min;
156  stream >> ParamDesc[i].Max;
157  ASSERT_HOST(!stream.fail());
158  ParamDesc[i].Circular = (linear_token[0] == 'c');
159  ParamDesc[i].NonEssential = (essential_token[0] != 'e');
160  ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
161  ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
162  ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
163  }
164  return (ParamDesc);
165 }

◆ ReadPrototype()

PROTOTYPE* ReadPrototype ( TFile fp,
uint16_t  N 
)

This routine reads a textual description of a prototype from the specified file.

Parameters
fpopen text file to read prototype from
Nnumber of dimensions used in prototype
Returns
List of prototypes
Note
Globals: None

Definition at line 176 of file clusttool.cpp.

176  {
177  char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
178  PROTOTYPE *Proto;
179  int SampleCount;
180  int i;
181 
182  const int kMaxLineSize = TOKENSIZE * 4;
183  char line[kMaxLineSize];
184  if (fp->FGets(line, kMaxLineSize) == nullptr ||
185  sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d",
186  sig_token, shape_token, &SampleCount) != 3) {
187  tprintf("Invalid prototype: %s\n", line);
188  return nullptr;
189  }
190  Proto = static_cast<PROTOTYPE *>(Emalloc(sizeof(PROTOTYPE)));
191  Proto->Cluster = nullptr;
192  Proto->Significant = (sig_token[0] == 's');
193 
194  switch (shape_token[0]) {
195  case 's':
196  Proto->Style = spherical;
197  break;
198  case 'e':
199  Proto->Style = elliptical;
200  break;
201  case 'a':
202  Proto->Style = automatic;
203  break;
204  default:
205  tprintf("Invalid prototype style specification:%s\n", shape_token);
206  Proto->Style = elliptical;
207  }
208 
209  ASSERT_HOST(SampleCount >= 0);
210  Proto->NumSamples = SampleCount;
211 
212  Proto->Mean = ReadNFloats(fp, N, nullptr);
213  ASSERT_HOST(Proto->Mean != nullptr);
214 
215  switch (Proto->Style) {
216  case spherical:
217  ASSERT_HOST(ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) != nullptr);
218  Proto->Magnitude.Spherical =
219  1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
220  Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast<float>(N));
221  Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
222  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
223  Proto->Distrib = nullptr;
224  break;
225  case elliptical:
226  Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr);
227  ASSERT_HOST(Proto->Variance.Elliptical != nullptr);
228  Proto->Magnitude.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
229  Proto->Weight.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
230  Proto->TotalMagnitude = 1.0;
231  for (i = 0; i < N; i++) {
232  Proto->Magnitude.Elliptical[i] =
233  1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]);
234  Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
235  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
236  }
237  Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
238  Proto->Distrib = nullptr;
239  break;
240  default:
241  Efree(Proto);
242  tprintf("Invalid prototype style\n");
243  return nullptr;
244  }
245  return Proto;
246 }

◆ ReadSampleSize()

uint16_t ReadSampleSize ( TFile fp)

This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters
fpopen text file to read sample size from
Returns
Sample size
Note
Globals: None

Definition at line 120 of file clusttool.cpp.

120  {
121  int SampleSize = 0;
122 
123  const int kMaxLineSize = 100;
124  char line[kMaxLineSize];
125  ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
126  ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
127  ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
128  return SampleSize;
129 }

◆ WriteParamDesc()

void WriteParamDesc ( FILE *  File,
uint16_t  N,
const PARAM_DESC  ParamDesc[] 
)

This routine writes an array of dimension descriptors to the specified text file.

Parameters
Fileopen text file to write param descriptors to
Nnumber of param descriptors to write
ParamDescarray of param descriptors to write

Definition at line 255 of file clusttool.cpp.

255  {
256  int i;
257 
258  for (i = 0; i < N; i++) {
259  if (ParamDesc[i].Circular)
260  fprintf (File, "circular ");
261  else
262  fprintf (File, "linear ");
263 
264  if (ParamDesc[i].NonEssential)
265  fprintf (File, "non-essential ");
266  else
267  fprintf (File, "essential ");
268 
269  fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
270  }
271 }

◆ WritePrototype()

void WritePrototype ( FILE *  File,
uint16_t  N,
PROTOTYPE Proto 
)

This routine writes a textual description of a prototype to the specified text file.

Parameters
Fileopen text file to write prototype to
Nnumber of dimensions in feature space
Protoprototype to write out

Definition at line 280 of file clusttool.cpp.

280  {
281  int i;
282 
283  if (Proto->Significant)
284  fprintf (File, "significant ");
285  else
286  fprintf (File, "insignificant ");
287  WriteProtoStyle (File, static_cast<PROTOSTYLE>(Proto->Style));
288  fprintf (File, "%6d\n\t", Proto->NumSamples);
289  WriteNFloats (File, N, Proto->Mean);
290  fprintf (File, "\t");
291 
292  switch (Proto->Style) {
293  case spherical:
294  WriteNFloats (File, 1, &(Proto->Variance.Spherical));
295  break;
296  case elliptical:
297  WriteNFloats (File, N, Proto->Variance.Elliptical);
298  break;
299  case mixed:
300  for (i = 0; i < N; i++)
301  switch (Proto->Distrib[i]) {
302  case normal:
303  fprintf (File, " %9s", "normal");
304  break;
305  case uniform:
306  fprintf (File, " %9s", "uniform");
307  break;
308  case D_random:
309  fprintf (File, " %9s", "random");
310  break;
311  case DISTRIBUTION_COUNT:
312  ASSERT_HOST(!"Distribution count not allowed!");
313  }
314  fprintf (File, "\n\t");
315  WriteNFloats (File, N, Proto->Variance.Elliptical);
316  }
317 }
string
std::string string
Definition: equationdetect_test.cc:21
PROTOTYPE::TotalMagnitude
float TotalMagnitude
Definition: cluster.h:74
PARAM_DESC::Circular
bool Circular
Definition: ocrfeatures.h:42
Emalloc
void * Emalloc(int Size)
Definition: emalloc.cpp:31
elliptical
Definition: cluster.h:43
PROTOTYPE::LogMagnitude
float LogMagnitude
Definition: cluster.h:75
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
language_specific.log
log
Definition: language_specific.py:25
MAXSAMPLESIZE
#define MAXSAMPLESIZE
max num of dimensions in feature space
Definition: clusttool.cpp:31
PROTOTYPE::Magnitude
FLOATUNION Magnitude
Definition: cluster.h:77
normal
Definition: cluster.h:55
PARAM_DESC::Min
float Min
Definition: ocrfeatures.h:44
PARAM_DESC::Range
float Range
Definition: ocrfeatures.h:46
PROTOTYPE
Definition: cluster.h:62
PARAM_DESC::MidRange
float MidRange
Definition: ocrfeatures.h:48
QUOTED_TOKENSIZE
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:30
uniform
Definition: cluster.h:55
FLOATUNION::Elliptical
float * Elliptical
Definition: cluster.h:59
PROTOTYPE::Weight
FLOATUNION Weight
Definition: cluster.h:78
mixed
Definition: cluster.h:43
TOKENSIZE
#define TOKENSIZE
max size of tokens read from an input file
Definition: clusttool.cpp:29
PARAM_DESC::Max
float Max
Definition: ocrfeatures.h:45
PARAM_DESC
Definition: ocrfeatures.h:41
PROTOTYPE::Significant
bool Significant
Definition: cluster.h:63
PARAM_DESC::NonEssential
bool NonEssential
Definition: ocrfeatures.h:43
PROTOTYPE::Mean
float * Mean
Definition: cluster.h:73
FLOATUNION::Spherical
float Spherical
Definition: cluster.h:58
PARAM_DESC::HalfRange
float HalfRange
Definition: ocrfeatures.h:47
Efree
void Efree(void *ptr)
Definition: emalloc.cpp:45
tesseract::TFile::FGets
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:262
spherical
Definition: cluster.h:43
PROTOTYPE::Style
unsigned Style
Definition: cluster.h:69
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
PROTOTYPE::Variance
FLOATUNION Variance
Definition: cluster.h:76
automatic
Definition: cluster.h:43
PROTOTYPE::NumSamples
unsigned NumSamples
Definition: cluster.h:70
DISTRIBUTION_COUNT
Definition: cluster.h:55
PROTOTYPE::Cluster
CLUSTER * Cluster
Definition: cluster.h:71
D_random
Definition: cluster.h:55
PROTOTYPE::Distrib
DISTRIBUTION * Distrib
Definition: cluster.h:72