tesseract  4.0.0-1-g2a2b
adaptive.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptive.c
3  ** Purpose: Adaptive matcher.
4  ** Author: Dan Johnson
5  ** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 /*----------------------------------------------------------------------------
20  Include Files and Type Defines
21 ----------------------------------------------------------------------------*/
22 #include "adaptive.h"
23 #include "emalloc.h"
24 #include "globals.h"
25 #include "classify.h"
26 
27 #include <cassert>
28 #include <cstdio>
29 
30 using tesseract::TFile;
31 
32 /*----------------------------------------------------------------------------
33  Public Code
34 ----------------------------------------------------------------------------*/
35 /*---------------------------------------------------------------------------*/
47  ADAPT_CLASS Class,
48  CLASS_ID ClassId) {
49  INT_CLASS IntClass;
50 
51  assert (Templates != nullptr);
52  assert (Class != nullptr);
53  assert (LegalClassId (ClassId));
54  assert (UnusedClassIdIn (Templates->Templates, ClassId));
55  assert (Class->NumPermConfigs == 0);
56 
57  IntClass = NewIntClass (1, 1);
58  AddIntClass (Templates->Templates, ClassId, IntClass);
59 
60  assert (Templates->Class[ClassId] == nullptr);
61  Templates->Class[ClassId] = Class;
62 
63 } /* AddAdaptedClass */
64 
65 
66 /*---------------------------------------------------------------------------*/
76  assert (Config != nullptr);
77  FreeBitVector (Config->Protos);
78  free(Config);
79 } /* FreeTempConfig */
80 
81 /*---------------------------------------------------------------------------*/
82 void FreeTempProto(void *arg) {
83  PROTO proto = (PROTO) arg;
84 
85  free(proto);
86 }
87 
88 static void FreePermConfig(PERM_CONFIG Config) {
89  assert(Config != nullptr);
90  delete [] Config->Ambigs;
91  free(Config);
92 }
93 
94 /*---------------------------------------------------------------------------*/
104  ADAPT_CLASS Class;
105 
106  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
107  Class->NumPermConfigs = 0;
108  Class->MaxNumTimesSeen = 0;
109  Class->TempProtos = NIL_LIST;
110 
115 
116  for (int i = 0; i < MAX_NUM_CONFIGS; i++)
117  TempConfigFor (Class, i) = nullptr;
118 
119  return (Class);
120 
121 } /* NewAdaptedClass */
122 
123 
124 /*-------------------------------------------------------------------------*/
125 void free_adapted_class(ADAPT_CLASS adapt_class) {
126  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
127  if (ConfigIsPermanent (adapt_class, i)
128  && PermConfigFor (adapt_class, i) != nullptr)
129  FreePermConfig (PermConfigFor (adapt_class, i));
130  else if (!ConfigIsPermanent (adapt_class, i)
131  && TempConfigFor (adapt_class, i) != nullptr)
132  FreeTempConfig (TempConfigFor (adapt_class, i));
133  }
134  FreeBitVector (adapt_class->PermProtos);
135  FreeBitVector (adapt_class->PermConfigs);
136  destroy_nodes (adapt_class->TempProtos, FreeTempProto);
137  Efree(adapt_class);
138 }
139 
140 
141 /*---------------------------------------------------------------------------*/
142 namespace tesseract {
153  ADAPT_TEMPLATES Templates;
154 
155  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
156 
157  Templates->Templates = NewIntTemplates ();
158  Templates->NumPermClasses = 0;
159  Templates->NumNonEmptyClasses = 0;
160 
161  /* Insert an empty class for each unichar id in unicharset */
162  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
163  Templates->Class[i] = nullptr;
164  if (InitFromUnicharset && i < unicharset.size()) {
165  AddAdaptedClass(Templates, NewAdaptedClass(), i);
166  }
167  }
168 
169  return (Templates);
170 
171 } /* NewAdaptedTemplates */
172 
173 // Returns FontinfoId of the given config of the given adapted class.
174 int Classify::GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId) {
175  return (ConfigIsPermanent(Class, ConfigId) ?
176  PermConfigFor(Class, ConfigId)->FontinfoId :
177  TempConfigFor(Class, ConfigId)->FontinfoId);
178 }
179 
180 } // namespace tesseract
181 
182 /*----------------------------------------------------------------------------*/
184 
185  if (templates != nullptr) {
186  for (int i = 0; i < (templates->Templates)->NumClasses; i++)
187  free_adapted_class (templates->Class[i]);
188  free_int_templates (templates->Templates);
189  Efree(templates);
190  }
191 }
192 
193 
194 /*---------------------------------------------------------------------------*/
204 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
205  int NumProtos = MaxProtoId + 1;
206 
208  Config->Protos = NewBitVector (NumProtos);
209 
210  Config->NumTimesSeen = 1;
211  Config->MaxProtoId = MaxProtoId;
212  Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
213  zero_all_bits (Config->Protos, Config->ProtoVectorSize);
214  Config->FontinfoId = FontinfoId;
215 
216  return (Config);
217 
218 } /* NewTempConfig */
219 
220 
221 /*---------------------------------------------------------------------------*/
230  return (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT));
231 } /* NewTempProto */
232 
233 
234 /*---------------------------------------------------------------------------*/
235 namespace tesseract {
246  INT_CLASS IClass;
247  ADAPT_CLASS AClass;
248 
249  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
250  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
251  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
252  fprintf (File, " Id NC NPC NP NPP\n");
253  fprintf (File, "------------------------\n");
254 
255  for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
256  IClass = Templates->Templates->Class[i];
257  AClass = Templates->Class[i];
258  if (!IsEmptyAdaptedClass (AClass)) {
259  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
261  IClass->NumConfigs, AClass->NumPermConfigs,
262  IClass->NumProtos,
263  IClass->NumProtos - count (AClass->TempProtos));
264  }
265  }
266  fprintf (File, "\n");
267 
268 } /* PrintAdaptedTemplates */
269 } // namespace tesseract
270 
271 
272 /*---------------------------------------------------------------------------*/
283  int NumTempProtos;
284  int NumConfigs;
285  int i;
286  ADAPT_CLASS Class;
287 
288  /* first read high level adapted class structure */
289  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
290  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
291 
292  /* then read in the definitions of the permanent protos and configs */
295  fp->FRead(Class->PermProtos, sizeof(uint32_t),
297  fp->FRead(Class->PermConfigs, sizeof(uint32_t),
299 
300  /* then read in the list of temporary protos */
301  fp->FRead(&NumTempProtos, sizeof(int), 1);
302  Class->TempProtos = NIL_LIST;
303  for (i = 0; i < NumTempProtos; i++) {
304  TEMP_PROTO TempProto = (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT));
305  fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
306  Class->TempProtos = push_last (Class->TempProtos, TempProto);
307  }
308 
309  /* then read in the adapted configs */
310  fp->FRead(&NumConfigs, sizeof(int), 1);
311  for (i = 0; i < NumConfigs; i++)
312  if (test_bit (Class->PermConfigs, i))
313  Class->Config[i].Perm = ReadPermConfig(fp);
314  else
315  Class->Config[i].Temp = ReadTempConfig(fp);
316 
317  return (Class);
318 
319 } /* ReadAdaptedClass */
320 
321 
322 /*---------------------------------------------------------------------------*/
323 namespace tesseract {
334  ADAPT_TEMPLATES Templates;
335 
336  /* first read the high level adaptive template struct */
337  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
338  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
339 
340  /* then read in the basic integer templates */
341  Templates->Templates = ReadIntTemplates(fp);
342 
343  /* then read in the adaptive info for each class */
344  for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
345  Templates->Class[i] = ReadAdaptedClass(fp);
346  }
347  return (Templates);
348 
349 } /* ReadAdaptedTemplates */
350 } // namespace tesseract
351 
352 
353 /*---------------------------------------------------------------------------*/
365  uint8_t NumAmbigs;
366  fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
367  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
368  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
369  Config->Ambigs[NumAmbigs] = -1;
370  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
371 
372  return (Config);
373 
374 } /* ReadPermConfig */
375 
376 
377 /*---------------------------------------------------------------------------*/
389  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
390 
391  Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
392  fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
393 
394  return (Config);
395 
396 } /* ReadTempConfig */
397 
398 
399 /*---------------------------------------------------------------------------*/
410 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
411  int NumTempProtos;
412  LIST TempProtos;
413  int i;
414 
415  /* first write high level adapted class structure */
416  fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
417 
418  /* then write out the definitions of the permanent protos and configs */
419  fwrite(Class->PermProtos, sizeof(uint32_t),
421  fwrite(Class->PermConfigs, sizeof(uint32_t),
423 
424  /* then write out the list of temporary protos */
425  NumTempProtos = count (Class->TempProtos);
426  fwrite(&NumTempProtos, sizeof(int), 1, File);
427  TempProtos = Class->TempProtos;
428  iterate (TempProtos) {
429  void* proto = first_node(TempProtos);
430  fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
431  }
432 
433  /* then write out the adapted configs */
434  fwrite(&NumConfigs, sizeof(int), 1, File);
435  for (i = 0; i < NumConfigs; i++)
436  if (test_bit (Class->PermConfigs, i))
437  WritePermConfig (File, Class->Config[i].Perm);
438  else
439  WriteTempConfig (File, Class->Config[i].Temp);
440 
441 } /* WriteAdaptedClass */
442 
443 
444 /*---------------------------------------------------------------------------*/
445 namespace tesseract {
455  int i;
456 
457  /* first write the high level adaptive template struct */
458  fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
459 
460  /* then write out the basic integer templates */
462 
463  /* then write out the adaptive info for each class */
464  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
465  WriteAdaptedClass (File, Templates->Class[i],
466  Templates->Templates->Class[i]->NumConfigs);
467  }
468 } /* WriteAdaptedTemplates */
469 } // namespace tesseract
470 
471 
472 /*---------------------------------------------------------------------------*/
482 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
483  uint8_t NumAmbigs = 0;
484 
485  assert (Config != nullptr);
486  while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
487 
488  fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
489  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
490  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
491 } /* WritePermConfig */
492 
493 
494 /*---------------------------------------------------------------------------*/
504 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
505  assert (Config != nullptr);
506 
507  fwrite(Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
508  fwrite(Config->Protos, sizeof (uint32_t), Config->ProtoVectorSize, File);
509 
510 } /* WriteTempConfig */
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:46
void WritePermConfig(FILE *File, PERM_CONFIG Config)
Definition: adaptive.cpp:482
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:333
int UNICHAR_ID
Definition: unichar.h:35
CLUSTERCONFIG Config
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:270
TEMP_CONFIG_STRUCT * TEMP_CONFIG
Definition: adaptive.h:47
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:708
ADAPT_CLASS_STRUCT * ADAPT_CLASS
Definition: adaptive.h:72
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:121
int count(LIST var_list)
Definition: oldlist.cpp:98
PROTO_STRUCT * PROTO
Definition: protos.h:51
void WriteTempConfig(FILE *File, TEMP_CONFIG Config)
Definition: adaptive.cpp:504
int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId)
Definition: adaptive.cpp:174
void * Emalloc(int Size)
Definition: emalloc.cpp:31
TEMP_CONFIG ReadTempConfig(TFile *fp)
Definition: adaptive.cpp:387
void Efree(void *ptr)
Definition: emalloc.cpp:45
#define zero_all_bits(array, length)
Definition: bitvec.h:33
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:82
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:152
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:36
#define UnusedClassIdIn(T, c)
Definition: intproto.h:175
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:51
int size() const
Definition: unicharset.h:336
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:183
uint8_t NumPermConfigs
Definition: adaptive.h:64
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:101
PERM_CONFIG Perm
Definition: adaptive.h:59
uint8_t NumConfigs
Definition: intproto.h:108
#define LegalClassId(c)
Definition: intproto.h:174
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define MAX_NUM_PROTOS
Definition: intproto.h:48
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:75
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:80
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:186
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:104
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:82
UNICHARSET unicharset
Definition: ccutil.h:68
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:229
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1030
BIT_VECTOR PermConfigs
Definition: adaptive.h:68
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:297
void free_adapted_class(ADAPT_CLASS adapt_class)
Definition: adaptive.cpp:125
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:245
uint8_t NumPermClasses
Definition: adaptive.h:78
#define first_node(l)
Definition: oldlist.h:141
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:204
#define NIL_LIST
Definition: oldlist.h:127
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:92
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:54
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:89
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:103
BIT_VECTOR PermProtos
Definition: adaptive.h:67
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:454
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
Definition: adaptive.h:70
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:728
#define iterate(l)
Definition: oldlist.h:161
uint8_t MaxNumTimesSeen
Definition: adaptive.h:65
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:692
ADAPT_CLASS ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:282
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:232
TEMP_PROTO_STRUCT * TEMP_PROTO
Definition: adaptive.h:37
uint16_t NumProtos
Definition: intproto.h:106
#define BITSINLONG
Definition: bitvec.h:27
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:410
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:636
#define MAX_NUM_CLASSES
Definition: matchdefs.h:32
INT_TEMPLATES Templates
Definition: adaptive.h:76
void FreeTempProto(void *arg)
Definition: adaptive.cpp:82
TEMP_CONFIG Temp
Definition: adaptive.h:58
#define test_bit(array, bit)
Definition: bitvec.h:61
PERM_CONFIG ReadPermConfig(TFile *fp)
Definition: adaptive.cpp:363