tesseract  5.0.0-alpha-619-ge9db
adaptive.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptive.c
3  ** Purpose: Adaptive matcher.
4  ** Author: Dan Johnson
5  ** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 /*----------------------------------------------------------------------------
20  Include Files and Type Defines
21 ----------------------------------------------------------------------------*/
22 #include "adaptive.h"
23 #include "emalloc.h"
24 #include "classify.h"
25 
26 #include <cassert>
27 #include <cstdio>
28 
29 using tesseract::TFile;
30 
31 /*----------------------------------------------------------------------------
32  Public Code
33 ----------------------------------------------------------------------------*/
34 /*---------------------------------------------------------------------------*/
46  ADAPT_CLASS Class,
47  CLASS_ID ClassId) {
48  INT_CLASS IntClass;
49 
50  assert (Templates != nullptr);
51  assert (Class != nullptr);
52  assert (LegalClassId (ClassId));
53  assert (UnusedClassIdIn (Templates->Templates, ClassId));
54  assert (Class->NumPermConfigs == 0);
55 
56  IntClass = NewIntClass (1, 1);
57  AddIntClass (Templates->Templates, ClassId, IntClass);
58 
59  assert (Templates->Class[ClassId] == nullptr);
60  Templates->Class[ClassId] = Class;
61 
62 } /* AddAdaptedClass */
63 
64 
65 /*---------------------------------------------------------------------------*/
75  assert (Config != nullptr);
76  FreeBitVector (Config->Protos);
77  free(Config);
78 } /* FreeTempConfig */
79 
80 /*---------------------------------------------------------------------------*/
81 void FreeTempProto(void *arg) {
82  auto proto = static_cast<PROTO>(arg);
83 
84  free(proto);
85 }
86 
87 static void FreePermConfig(PERM_CONFIG Config) {
88  assert(Config != nullptr);
89  delete [] Config->Ambigs;
90  free(Config);
91 }
92 
93 /*---------------------------------------------------------------------------*/
103  ADAPT_CLASS Class;
104 
105  Class = static_cast<ADAPT_CLASS>(Emalloc (sizeof (ADAPT_CLASS_STRUCT)));
106  Class->NumPermConfigs = 0;
107  Class->MaxNumTimesSeen = 0;
108  Class->TempProtos = NIL_LIST;
109 
110  Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
111  Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
112  zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
113  zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS));
114 
115  for (int i = 0; i < MAX_NUM_CONFIGS; i++)
116  TempConfigFor (Class, i) = nullptr;
117 
118  return (Class);
119 
120 } /* NewAdaptedClass */
121 
122 
123 /*-------------------------------------------------------------------------*/
124 void free_adapted_class(ADAPT_CLASS adapt_class) {
125  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
126  if (ConfigIsPermanent (adapt_class, i)
127  && PermConfigFor (adapt_class, i) != nullptr)
128  FreePermConfig (PermConfigFor (adapt_class, i));
129  else if (!ConfigIsPermanent (adapt_class, i)
130  && TempConfigFor (adapt_class, i) != nullptr)
131  FreeTempConfig (TempConfigFor (adapt_class, i));
132  }
133  FreeBitVector (adapt_class->PermProtos);
134  FreeBitVector (adapt_class->PermConfigs);
135  destroy_nodes (adapt_class->TempProtos, FreeTempProto);
136  Efree(adapt_class);
137 }
138 
139 
140 /*---------------------------------------------------------------------------*/
141 namespace tesseract {
152  ADAPT_TEMPLATES Templates;
153 
154  Templates = static_cast<ADAPT_TEMPLATES>(Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)));
155 
156  Templates->Templates = NewIntTemplates ();
157  Templates->NumPermClasses = 0;
158  Templates->NumNonEmptyClasses = 0;
159 
160  /* Insert an empty class for each unichar id in unicharset */
161  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
162  Templates->Class[i] = nullptr;
163  if (InitFromUnicharset && i < unicharset.size()) {
164  AddAdaptedClass(Templates, NewAdaptedClass(), i);
165  }
166  }
167 
168  return (Templates);
169 
170 } /* NewAdaptedTemplates */
171 
172 // Returns FontinfoId of the given config of the given adapted class.
173 int Classify::GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId) {
174  return (ConfigIsPermanent(Class, ConfigId) ?
175  PermConfigFor(Class, ConfigId)->FontinfoId :
176  TempConfigFor(Class, ConfigId)->FontinfoId);
177 }
178 
179 } // namespace tesseract
180 
181 /*----------------------------------------------------------------------------*/
183 
184  if (templates != nullptr) {
185  for (int i = 0; i < (templates->Templates)->NumClasses; i++)
186  free_adapted_class (templates->Class[i]);
187  free_int_templates (templates->Templates);
188  Efree(templates);
189  }
190 }
191 
192 
193 /*---------------------------------------------------------------------------*/
203 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
204  int NumProtos = MaxProtoId + 1;
205 
206  auto Config = static_cast<TEMP_CONFIG>(malloc(sizeof(TEMP_CONFIG_STRUCT)));
207  Config->Protos = NewBitVector (NumProtos);
208 
209  Config->NumTimesSeen = 1;
210  Config->MaxProtoId = MaxProtoId;
211  Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
212  zero_all_bits (Config->Protos, Config->ProtoVectorSize);
213  Config->FontinfoId = FontinfoId;
214 
215  return (Config);
216 
217 } /* NewTempConfig */
218 
219 
220 /*---------------------------------------------------------------------------*/
229  return static_cast<TEMP_PROTO>(malloc(sizeof(TEMP_PROTO_STRUCT)));
230 } /* NewTempProto */
231 
232 
233 /*---------------------------------------------------------------------------*/
234 namespace tesseract {
245  INT_CLASS IClass;
246  ADAPT_CLASS AClass;
247 
248  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
249  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
250  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
251  fprintf (File, " Id NC NPC NP NPP\n");
252  fprintf (File, "------------------------\n");
253 
254  for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
255  IClass = Templates->Templates->Class[i];
256  AClass = Templates->Class[i];
257  if (!IsEmptyAdaptedClass (AClass)) {
258  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
260  IClass->NumConfigs, AClass->NumPermConfigs,
261  IClass->NumProtos,
262  IClass->NumProtos - count (AClass->TempProtos));
263  }
264  }
265  fprintf (File, "\n");
266 
267 } /* PrintAdaptedTemplates */
268 } // namespace tesseract
269 
270 
271 /*---------------------------------------------------------------------------*/
282  int NumTempProtos;
283  int NumConfigs;
284  int i;
285  ADAPT_CLASS Class;
286 
287  /* first read high level adapted class structure */
288  Class = static_cast<ADAPT_CLASS>(Emalloc (sizeof (ADAPT_CLASS_STRUCT)));
289  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
290 
291  /* then read in the definitions of the permanent protos and configs */
292  Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
293  Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
294  fp->FRead(Class->PermProtos, sizeof(uint32_t),
295  WordsInVectorOfSize(MAX_NUM_PROTOS));
296  fp->FRead(Class->PermConfigs, sizeof(uint32_t),
297  WordsInVectorOfSize(MAX_NUM_CONFIGS));
298 
299  /* then read in the list of temporary protos */
300  fp->FRead(&NumTempProtos, sizeof(int), 1);
301  Class->TempProtos = NIL_LIST;
302  for (i = 0; i < NumTempProtos; i++) {
303  auto TempProto = static_cast<TEMP_PROTO>(malloc(sizeof(TEMP_PROTO_STRUCT)));
304  fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
305  Class->TempProtos = push_last (Class->TempProtos, TempProto);
306  }
307 
308  /* then read in the adapted configs */
309  fp->FRead(&NumConfigs, sizeof(int), 1);
310  for (i = 0; i < NumConfigs; i++)
311  if (test_bit (Class->PermConfigs, i))
312  Class->Config[i].Perm = ReadPermConfig(fp);
313  else
314  Class->Config[i].Temp = ReadTempConfig(fp);
315 
316  return (Class);
317 
318 } /* ReadAdaptedClass */
319 
320 
321 /*---------------------------------------------------------------------------*/
322 namespace tesseract {
333  ADAPT_TEMPLATES Templates;
334 
335  /* first read the high level adaptive template struct */
336  Templates = static_cast<ADAPT_TEMPLATES>(Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)));
337  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
338 
339  /* then read in the basic integer templates */
340  Templates->Templates = ReadIntTemplates(fp);
341 
342  /* then read in the adaptive info for each class */
343  for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
344  Templates->Class[i] = ReadAdaptedClass(fp);
345  }
346  return (Templates);
347 
348 } /* ReadAdaptedTemplates */
349 } // namespace tesseract
350 
351 
352 /*---------------------------------------------------------------------------*/
363  auto Config = static_cast<PERM_CONFIG>(malloc(sizeof(PERM_CONFIG_STRUCT)));
364  uint8_t NumAmbigs;
365  fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
366  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
367  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
368  Config->Ambigs[NumAmbigs] = -1;
369  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
370 
371  return (Config);
372 
373 } /* ReadPermConfig */
374 
375 
376 /*---------------------------------------------------------------------------*/
387  auto Config = static_cast<TEMP_CONFIG>(malloc(sizeof(TEMP_CONFIG_STRUCT)));
388  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
389 
390  Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
391  fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
392 
393  return (Config);
394 
395 } /* ReadTempConfig */
396 
397 
398 /*---------------------------------------------------------------------------*/
409 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
410  int NumTempProtos;
411  LIST TempProtos;
412  int i;
413 
414  /* first write high level adapted class structure */
415  fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
416 
417  /* then write out the definitions of the permanent protos and configs */
418  fwrite(Class->PermProtos, sizeof(uint32_t),
419  WordsInVectorOfSize(MAX_NUM_PROTOS), File);
420  fwrite(Class->PermConfigs, sizeof(uint32_t),
421  WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
422 
423  /* then write out the list of temporary protos */
424  NumTempProtos = count (Class->TempProtos);
425  fwrite(&NumTempProtos, sizeof(int), 1, File);
426  TempProtos = Class->TempProtos;
427  iterate (TempProtos) {
428  void* proto = first_node(TempProtos);
429  fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
430  }
431 
432  /* then write out the adapted configs */
433  fwrite(&NumConfigs, sizeof(int), 1, File);
434  for (i = 0; i < NumConfigs; i++)
435  if (test_bit (Class->PermConfigs, i))
436  WritePermConfig (File, Class->Config[i].Perm);
437  else
438  WriteTempConfig (File, Class->Config[i].Temp);
439 
440 } /* WriteAdaptedClass */
441 
442 
443 /*---------------------------------------------------------------------------*/
444 namespace tesseract {
454  int i;
455 
456  /* first write the high level adaptive template struct */
457  fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
458 
459  /* then write out the basic integer templates */
461 
462  /* then write out the adaptive info for each class */
463  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
464  WriteAdaptedClass (File, Templates->Class[i],
465  Templates->Templates->Class[i]->NumConfigs);
466  }
467 } /* WriteAdaptedTemplates */
468 } // namespace tesseract
469 
470 
471 /*---------------------------------------------------------------------------*/
481 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
482  uint8_t NumAmbigs = 0;
483 
484  assert (Config != nullptr);
485  while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
486 
487  fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
488  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
489  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
490 } /* WritePermConfig */
491 
492 
493 /*---------------------------------------------------------------------------*/
503 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
504  assert (Config != nullptr);
505 
506  fwrite(Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
507  fwrite(Config->Protos, sizeof (uint32_t), Config->ProtoVectorSize, File);
508 
509 } /* WriteTempConfig */
emalloc.h
WriteTempConfig
void WriteTempConfig(FILE *File, TEMP_CONFIG Config)
Definition: adaptive.cpp:503
CLASS_ID
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:33
NewIntTemplates
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:681
ADAPT_CLASS_STRUCT::MaxNumTimesSeen
uint8_t MaxNumTimesSeen
Definition: adaptive.h:56
ADAPT_CLASS_STRUCT::PermConfigs
BIT_VECTOR PermConfigs
Definition: adaptive.h:59
first_node
#define first_node(l)
Definition: oldlist.h:84
TempConfigFor
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:90
Emalloc
void * Emalloc(int Size)
Definition: emalloc.cpp:31
destroy_nodes
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:138
PERM_CONFIG_STRUCT
Definition: adaptive.h:43
list_rec
Definition: oldlist.h:73
INT_CLASS_STRUCT
Definition: intproto.h:104
NewIntClass
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:625
ADAPT_CLASS_STRUCT::Config
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
Definition: adaptive.h:61
Config
CLUSTERCONFIG Config
Definition: commontraining.cpp:88
test_bit
#define test_bit(array, bit)
Definition: bitvec.h:58
tesseract::Classify::PrintAdaptedTemplates
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:244
ADAPT_TEMPLATES_STRUCT
Definition: adaptive.h:65
BITSINLONG
const size_t BITSINLONG
Definition: bitvec.h:30
INT_CLASS_STRUCT::NumProtos
uint16_t NumProtos
Definition: intproto.h:105
ADAPT_TEMPLATES_STRUCT::NumNonEmptyClasses
int NumNonEmptyClasses
Definition: adaptive.h:67
NIL_LIST
#define NIL_LIST
Definition: oldlist.h:68
ADAPT_TEMPLATES_STRUCT::NumPermClasses
uint8_t NumPermClasses
Definition: adaptive.h:68
ADAPTED_CONFIG::Temp
TEMP_CONFIG Temp
Definition: adaptive.h:50
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
ADAPT_CLASS_STRUCT::NumPermConfigs
uint8_t NumPermConfigs
Definition: adaptive.h:55
tesseract::TFile::FRead
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:284
AddIntClass
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:230
ReadTempConfig
TEMP_CONFIG ReadTempConfig(TFile *fp)
Definition: adaptive.cpp:386
free_adapted_templates
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:182
TEMP_CONFIG_STRUCT
Definition: adaptive.h:34
LegalClassId
#define LegalClassId(c)
Definition: intproto.h:175
MAX_NUM_CONFIGS
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
MAX_NUM_PROTOS
#define MAX_NUM_PROTOS
Definition: intproto.h:47
MAX_NUM_CLASSES
#define MAX_NUM_CLASSES
Definition: matchdefs.h:29
tesseract::Classify::ReadAdaptedTemplates
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:332
FreeTempConfig
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:74
tesseract::TFile
Definition: serialis.h:75
TEMP_PROTO_STRUCT
Definition: adaptive.h:26
adaptive.h
tesseract
Definition: baseapi.h:65
tesseract::Classify::WriteIntTemplates
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1017
ConfigIsPermanent
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:81
tesseract::Classify::GetFontinfoId
int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId)
Definition: adaptive.cpp:173
NewTempConfig
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:203
ADAPTED_CONFIG::Perm
PERM_CONFIG Perm
Definition: adaptive.h:51
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
ADAPT_TEMPLATES_STRUCT::Class
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:69
free_adapted_class
void free_adapted_class(ADAPT_CLASS adapt_class)
Definition: adaptive.cpp:124
NewTempProto
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:228
INT_TEMPLATES_STRUCT::Class
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:120
WriteAdaptedClass
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:409
count
int count(LIST var_list)
Definition: oldlist.cpp:79
Efree
void Efree(void *ptr)
Definition: emalloc.cpp:45
iterate
#define iterate(l)
Definition: oldlist.h:92
ADAPT_CLASS_STRUCT::PermProtos
BIT_VECTOR PermProtos
Definition: adaptive.h:58
tesseract::Classify::ReadIntTemplates
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:717
free_int_templates
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:697
ADAPT_CLASS_STRUCT
Definition: adaptive.h:54
tesseract::File
Definition: fileio.h:55
classify.h
NewAdaptedClass
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:102
UNICHARSET::id_to_unichar
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
PermConfigFor
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:92
tesseract::Classify::WriteAdaptedTemplates
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:453
INT_CLASS_STRUCT::NumConfigs
uint8_t NumConfigs
Definition: intproto.h:107
ADAPT_TEMPLATES_STRUCT::Templates
INT_TEMPLATES Templates
Definition: adaptive.h:66
WritePermConfig
void WritePermConfig(FILE *File, PERM_CONFIG Config)
Definition: adaptive.cpp:481
UnusedClassIdIn
#define UnusedClassIdIn(T, c)
Definition: intproto.h:176
push_last
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:185
ReadAdaptedClass
ADAPT_CLASS ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:281
ReadPermConfig
PERM_CONFIG ReadPermConfig(TFile *fp)
Definition: adaptive.cpp:362
UNICHARSET::size
int size() const
Definition: unicharset.h:341
FreeTempProto
void FreeTempProto(void *arg)
Definition: adaptive.cpp:81
IsEmptyAdaptedClass
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:78
tesseract::Classify::NewAdaptedTemplates
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:151
AddAdaptedClass
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:45
ADAPT_CLASS_STRUCT::TempProtos
LIST TempProtos
Definition: adaptive.h:60