tesseract  5.0.0-alpha-619-ge9db
shapetable.h
Go to the documentation of this file.
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: shapetable.h
5 // Description: Class to map a classifier shape index to unicharset
6 // indices and font indices.
7 // Author: Ray Smith
8 //
9 // (C) Copyright 2010, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #ifndef TESSERACT_CLASSIFY_SHAPETABLE_H_
23 #define TESSERACT_CLASSIFY_SHAPETABLE_H_
24 
25 #include "bitvector.h"
26 #include "fontinfo.h"
27 #include "genericheap.h"
29 #include "intmatcher.h"
30 
31 class STRING;
32 class UNICHARSET;
33 
34 namespace tesseract {
35 
36 class ShapeTable;
37 
38 // Simple struct to hold a single classifier unichar selection, a corresponding
39 // rating, and a list of appropriate fonts.
40 struct UnicharRating {
42  : unichar_id(0), rating(0.0f), adapted(false), config(0),
43  feature_misses(0) {}
44  UnicharRating(int u, float r)
45  : unichar_id(u), rating(r), adapted(false), config(0), feature_misses(0) {}
46 
47  // Print debug info.
48  void Print() const {
49  tprintf("Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%d,"
50  " %d fonts\n", unichar_id, rating, adapted, config, feature_misses,
51  fonts.size());
52  }
53 
54  // Sort function to sort ratings appropriately by descending rating.
55  static int SortDescendingRating(const void* t1, const void* t2) {
56  const auto* a = static_cast<const UnicharRating*>(t1);
57  const auto* b = static_cast<const UnicharRating*>(t2);
58  if (a->rating > b->rating) {
59  return -1;
60  } else if (a->rating < b->rating) {
61  return 1;
62  } else {
63  return a->unichar_id - b->unichar_id;
64  }
65  }
66  // Helper function to get the index of the first result with the required
67  // unichar_id. If the results are sorted by rating, this will also be the
68  // best result with the required unichar_id.
69  // Returns -1 if the unichar_id is not found
70  static int FirstResultWithUnichar(const GenericVector<UnicharRating>& results,
72 
73  // Index into some UNICHARSET table indicates the class of the answer.
75  // Rating from classifier with 1.0 perfect and 0.0 impossible.
76  // Call it a probability if you must.
77  float rating;
78  // True if this result is from the adaptive classifier.
79  bool adapted;
80  // Index of best matching font configuration of result.
81  uint8_t config;
82  // Number of features that were total misses - were liked by no classes.
83  uint16_t feature_misses;
84  // Unsorted collection of fontinfo ids and scores. Note that a raw result
85  // from the IntegerMatch will contain config ids, that require transforming
86  // to fontinfo ids via fontsets and (possibly) shapetable.
88 };
89 
90 // Classifier result from a low-level classification is an index into some
91 // ShapeTable and a rating.
92 struct ShapeRating {
94  : shape_id(0), rating(0.0f), raw(0.0f), font(0.0f),
95  joined(false), broken(false) {}
96  ShapeRating(int s, float r)
97  : shape_id(s), rating(r), raw(1.0f), font(0.0f),
98  joined(false), broken(false) {}
99 
100  // Sort function to sort ratings appropriately by descending rating.
101  static int SortDescendingRating(const void* t1, const void* t2) {
102  const auto* a = static_cast<const ShapeRating*>(t1);
103  const auto* b = static_cast<const ShapeRating*>(t2);
104  if (a->rating > b->rating) {
105  return -1;
106  } else if (a->rating < b->rating) {
107  return 1;
108  } else {
109  return a->shape_id - b->shape_id;
110  }
111  }
112  // Helper function to get the index of the first result with the required
113  // unichar_id. If the results are sorted by rating, this will also be the
114  // best result with the required unichar_id.
115  // Returns -1 if the unichar_id is not found
116  static int FirstResultWithUnichar(const GenericVector<ShapeRating>& results,
117  const ShapeTable& shape_table,
118  UNICHAR_ID unichar_id);
119 
120  // Index into some shape table indicates the class of the answer.
121  int shape_id;
122  // Rating from classifier with 1.0 perfect and 0.0 impossible.
123  // Call it a probability if you must.
124  float rating;
125  // Subsidiary rating that a classifier may use internally.
126  float raw;
127  // Subsidiary rating that a classifier may use internally.
128  float font;
129  // Flag indicating that the input may be joined.
130  bool joined;
131  // Flag indicating that the input may be broken (a fragment).
132  bool broken;
133 };
134 
135 // Simple struct to hold an entry for a heap-based priority queue of
136 // ShapeRating.
139  ShapeQueueEntry(const ShapeRating& rating, int level0)
140  : result(rating), level(level0) {}
141 
142  // Sort by decreasing rating and decreasing level for equal rating.
143  bool operator<(const ShapeQueueEntry& other) const {
144  if (result.rating > other.result.rating) return true;
145  if (result.rating == other.result.rating)
146  return level > other.level;
147  return false;
148  }
149 
150  // Output from classifier.
152  // Which level in the tree did this come from?
153  int level;
154 };
156 
157 // Simple struct to hold a set of fonts associated with a single unichar-id.
158 // A vector of UnicharAndFonts makes a shape.
161  }
162  UnicharAndFonts(int uni_id, int font_id) : unichar_id(uni_id) {
163  font_ids.push_back(font_id);
164  }
165 
166  // Writes to the given file. Returns false in case of error.
167  bool Serialize(FILE* fp) const;
168  // Reads from the given file. Returns false in case of error.
169  bool DeSerialize(TFile* fp);
170 
171  // Sort function to sort a pair of UnicharAndFonts by unichar_id.
172  static int SortByUnicharId(const void* v1, const void* v2);
173 
175  int32_t unichar_id;
176 };
177 
178 // A Shape is a collection of unichar-ids and a list of fonts associated with
179 // each, organized as a vector of UnicharAndFonts. Conceptually a Shape is
180 // a classifiable unit, and represents a group of characters or parts of
181 // characters that have a similar or identical shape. Shapes/ShapeTables may
182 // be organized hierarchically from identical shapes at the leaves to vaguely
183 // similar shapes near the root.
184 class Shape {
185  public:
186  Shape() : destination_index_(-1) {}
187 
188  // Writes to the given file. Returns false in case of error.
189  bool Serialize(FILE* fp) const;
190  // Reads from the given file. Returns false in case of error.
191  bool DeSerialize(TFile* fp);
192 
193  int destination_index() const {
194  return destination_index_;
195  }
196  void set_destination_index(int index) {
197  destination_index_ = index;
198  }
199  int size() const {
200  return unichars_.size();
201  }
202  // Returns a UnicharAndFonts entry for the given index, which must be
203  // in the range [0, size()).
204  const UnicharAndFonts& operator[](int index) const {
205  return unichars_[index];
206  }
207  // Sets the unichar_id of the given index to the new unichar_id.
208  void SetUnicharId(int index, int unichar_id) {
209  unichars_[index].unichar_id = unichar_id;
210  }
211  // Adds a font_id for the given unichar_id. If the unichar_id is not
212  // in the shape, it is added.
213  void AddToShape(int unichar_id, int font_id);
214  // Adds everything in other to this.
215  void AddShape(const Shape& other);
216  // Returns true if the shape contains the given unichar_id, font_id pair.
217  bool ContainsUnicharAndFont(int unichar_id, int font_id) const;
218  // Returns true if the shape contains the given unichar_id, ignoring font.
219  bool ContainsUnichar(int unichar_id) const;
220  // Returns true if the shape contains the given font, ignoring unichar_id.
221  bool ContainsFont(int font_id) const;
222  // Returns true if the shape contains the given font properties, ignoring
223  // unichar_id.
224  bool ContainsFontProperties(const FontInfoTable& font_table,
225  uint32_t properties) const;
226  // Returns true if the shape contains multiple different font properties,
227  // ignoring unichar_id.
228  bool ContainsMultipleFontProperties(const FontInfoTable& font_table) const;
229  // Returns true if this shape is equal to other (ignoring order of unichars
230  // and fonts).
231  bool operator==(const Shape& other) const;
232  // Returns true if this is a subset (including equal) of other.
233  bool IsSubsetOf(const Shape& other) const;
234  // Returns true if the lists of unichar ids are the same in this and other,
235  // ignoring fonts.
236  // NOT const, as it will sort the unichars on demand.
237  bool IsEqualUnichars(Shape* other);
238 
239  private:
240  // Sorts the unichars_ vector by unichar.
241  void SortUnichars();
242 
243  // Flag indicates that the unichars are sorted, allowing faster set
244  // operations with another shape.
245  bool unichars_sorted_ = false;
246  // If this Shape is part of a ShapeTable the destiation_index_ is the index
247  // of some other shape in the ShapeTable with which this shape is merged.
248  int destination_index_ = 0;
249  // Array of unichars, each with a set of fonts. Each unichar has at most
250  // one entry in the vector.
252 };
253 
254 // ShapeTable is a class to encapsulate the triple indirection that is
255 // used here.
256 // ShapeTable is a vector of shapes.
257 // Each shape is a vector of UnicharAndFonts representing the set of unichars
258 // that the shape represents.
259 // Each UnicharAndFonts also lists the fonts of the unichar_id that were
260 // mapped to the shape during training.
261 class ShapeTable {
262  public:
263  ShapeTable();
264  // The UNICHARSET reference supplied here, or in set_unicharset below must
265  // exist for the entire life of the ShapeTable. It is used only by DebugStr.
266  explicit ShapeTable(const UNICHARSET& unicharset);
267 
268  // Writes to the given file. Returns false in case of error.
269  bool Serialize(FILE* fp) const;
270  // Reads from the given file. Returns false in case of error.
271  bool DeSerialize(TFile* fp);
272 
273  // Accessors.
274  int NumShapes() const {
275  return shape_table_.size();
276  }
277  const UNICHARSET& unicharset() const {
278  return *unicharset_;
279  }
280  // Returns the number of fonts used in this ShapeTable, computing it if
281  // necessary.
282  int NumFonts() const;
283  // Shapetable takes a pointer to the UNICHARSET, so it must persist for the
284  // entire life of the ShapeTable.
286  unicharset_ = &unicharset;
287  }
288  // Re-indexes the class_ids in the shapetable according to the given map.
289  // Useful in conjunction with set_unicharset.
290  void ReMapClassIds(const GenericVector<int>& unicharset_map);
291  // Returns a string listing the classes/fonts in a shape.
292  STRING DebugStr(int shape_id) const;
293  // Returns a debug string summarizing the table.
294  STRING SummaryStr() const;
295 
296  // Adds a new shape starting with the given unichar_id and font_id.
297  // Returns the assigned index.
298  int AddShape(int unichar_id, int font_id);
299  // Adds a copy of the given shape unless it is already present.
300  // Returns the assigned index or index of existing shape if already present.
301  int AddShape(const Shape& other);
302  // Removes the shape given by the shape index. All indices above are changed!
303  void DeleteShape(int shape_id);
304  // Adds a font_id to the given existing shape index for the given
305  // unichar_id. If the unichar_id is not in the shape, it is added.
306  void AddToShape(int shape_id, int unichar_id, int font_id);
307  // Adds the given shape to the existing shape with the given index.
308  void AddShapeToShape(int shape_id, const Shape& other);
309  // Returns the id of the shape that contains the given unichar and font.
310  // If not found, returns -1.
311  // If font_id < 0, the font_id is ignored and the first shape that matches
312  // the unichar_id is returned.
313  int FindShape(int unichar_id, int font_id) const;
314  // Returns the first unichar_id and font_id in the given shape.
315  void GetFirstUnicharAndFont(int shape_id,
316  int* unichar_id, int* font_id) const;
317 
318  // Accessors for the Shape with the given shape_id.
319  const Shape& GetShape(int shape_id) const {
320  return *shape_table_[shape_id];
321  }
322  Shape* MutableShape(int shape_id) {
323  return shape_table_[shape_id];
324  }
325 
326  // Expands all the classes/fonts in the shape individually to build
327  // a ShapeTable.
328  int BuildFromShape(const Shape& shape, const ShapeTable& master_shapes);
329 
330  // Returns true if the shapes are already merged.
331  bool AlreadyMerged(int shape_id1, int shape_id2) const;
332  // Returns true if any shape contains multiple unichars.
333  bool AnyMultipleUnichars() const;
334  // Returns the maximum number of unichars over all shapes.
335  int MaxNumUnichars() const;
336  // Merges shapes with a common unichar over the [start, end) interval.
337  // Assumes single unichar per shape.
338  void ForceFontMerges(int start, int end);
339  // Returns the number of unichars in the master shape.
340  int MasterUnicharCount(int shape_id) const;
341  // Returns the sum of the font counts in the master shape.
342  int MasterFontCount(int shape_id) const;
343  // Returns the number of unichars that would result from merging the shapes.
344  int MergedUnicharCount(int shape_id1, int shape_id2) const;
345  // Merges two shape_ids, leaving shape_id2 marked as merged.
346  void MergeShapes(int shape_id1, int shape_id2);
347  // Swaps two shape_ids.
348  void SwapShapes(int shape_id1, int shape_id2);
349  // Appends the master shapes from other to this.
350  // Used to create a clean ShapeTable from a merged one, or to create a
351  // copy of a ShapeTable.
352  // If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
353  void AppendMasterShapes(const ShapeTable& other,
354  GenericVector<int>* shape_map);
355  // Returns the number of master shapes remaining after merging.
356  int NumMasterShapes() const;
357  // Returns the destination of this shape, (if merged), taking into account
358  // the fact that the destination may itself have been merged.
359  // For a non-merged shape, returns the input shape_id.
360  int MasterDestinationIndex(int shape_id) const;
361 
362  // Returns false if the unichars in neither shape is a subset of the other..
363  bool SubsetUnichar(int shape_id1, int shape_id2) const;
364  // Returns false if the unichars in neither shape is a subset of the other..
365  bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const;
366  // Returns true if the unichar sets are equal between the shapes.
367  bool EqualUnichars(int shape_id1, int shape_id2) const;
368  bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const;
369  // Returns true if there is a common unichar between the shapes.
370  bool CommonUnichars(int shape_id1, int shape_id2) const;
371  // Returns true if there is a common font id between the shapes.
372  bool CommonFont(int shape_id1, int shape_id2) const;
373 
374  // Adds the unichars of the given shape_id to the vector of results. Any
375  // unichar_id that is already present just has the fonts added to the
376  // font set for that result without adding a new entry in the vector.
377  // NOTE: it is assumed that the results are given to this function in order
378  // of decreasing rating.
379  // The unichar_map vector indicates the index of the results entry containing
380  // each unichar, or -1 if the unichar is not yet included in results.
381  void AddShapeToResults(const ShapeRating& shape_rating,
382  GenericVector<int>* unichar_map,
383  GenericVector<UnicharRating>* results) const;
384 
385  private:
386  // Adds the given unichar_id to the results if needed, updating unichar_map
387  // and returning the index of unichar in results.
388  int AddUnicharToResults(int unichar_id, float rating,
389  GenericVector<int>* unichar_map,
390  GenericVector<UnicharRating>* results) const;
391 
392  // Pointer to a provided unicharset used only by the Debugstr member.
393  const UNICHARSET* unicharset_;
394  // Vector of pointers to the Shapes in this ShapeTable.
395  PointerVector<Shape> shape_table_;
396 
397  // Cached data calculated on demand.
398  mutable int num_fonts_;
399 };
400 
401 } // namespace tesseract.
402 
403 #endif // TESSERACT_CLASSIFY_SHAPETABLE_H_
tesseract::GenericHeap
Definition: genericheap.h:58
tesseract::Shape::ContainsUnichar
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:147
tesseract::ShapeRating::FirstResultWithUnichar
static int FirstResultWithUnichar(const GenericVector< ShapeRating > &results, const ShapeTable &shape_table, UNICHAR_ID unichar_id)
Definition: shapetable.cpp:40
tesseract::ShapeTable::Serialize
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:241
tesseract::Shape::operator==
bool operator==(const Shape &other) const
Definition: shapetable.cpp:197
tesseract::ShapeTable::SwapShapes
void SwapShapes(int shape_id1, int shape_id2)
Definition: shapetable.cpp:523
tesseract::Shape::ContainsFont
bool ContainsFont(int font_id) const
Definition: shapetable.cpp:157
tesseract::ShapeTable::CommonFont
bool CommonFont(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:641
tesseract::Shape
Definition: shapetable.h:184
tesseract::ShapeQueueEntry::ShapeQueueEntry
ShapeQueueEntry(const ShapeRating &rating, int level0)
Definition: shapetable.h:139
tesseract::Shape::ContainsMultipleFontProperties
bool ContainsMultipleFontProperties(const FontInfoTable &font_table) const
Definition: shapetable.cpp:182
tesseract::UnicharRating
Definition: shapetable.h:40
tesseract::UnicharRating::unichar_id
UNICHAR_ID unichar_id
Definition: shapetable.h:74
tesseract::ShapeTable::NumMasterShapes
int NumMasterShapes() const
Definition: shapetable.cpp:670
tesseract::ShapeRating::shape_id
int shape_id
Definition: shapetable.h:121
tesseract::UnicharRating::fonts
GenericVector< ScoredFont > fonts
Definition: shapetable.h:87
tesseract::ShapeTable::AppendMasterShapes
void AppendMasterShapes(const ShapeTable &other, GenericVector< int > *shape_map)
Definition: shapetable.cpp:656
tesseract::ShapeTable::NumShapes
int NumShapes() const
Definition: shapetable.h:274
tesseract::ShapeRating::raw
float raw
Definition: shapetable.h:126
tesseract::UnicharAndFonts::UnicharAndFonts
UnicharAndFonts(int uni_id, int font_id)
Definition: shapetable.h:162
tesseract::PointerVector
Definition: genericvector.h:417
STRING
Definition: strngs.h:45
tesseract::ShapeTable::ForceFontMerges
void ForceFontMerges(int start, int end)
Definition: shapetable.cpp:468
tesseract::UnicharAndFonts::SortByUnicharId
static int SortByUnicharId(const void *v1, const void *v2)
Definition: shapetable.cpp:79
tesseract::ShapeRating::ShapeRating
ShapeRating(int s, float r)
Definition: shapetable.h:96
tesseract::Shape::AddShape
void AddShape(const Shape &other)
Definition: shapetable.cpp:120
tesseract::ShapeTable::AddToShape
void AddToShape(int shape_id, int unichar_id, int font_id)
Definition: shapetable.cpp:369
tesseract::UnicharAndFonts
Definition: shapetable.h:159
tesseract::Shape::ContainsUnicharAndFont
bool ContainsUnicharAndFont(int unichar_id, int font_id) const
Definition: shapetable.cpp:131
tesseract::ShapeRating::rating
float rating
Definition: shapetable.h:124
tesseract::Shape::DeSerialize
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:92
tesseract::ShapeTable::ShapeTable
ShapeTable()
Definition: shapetable.cpp:234
tesseract::ShapeTable::AlreadyMerged
bool AlreadyMerged(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:439
tesseract::FontInfoTable
Definition: fontinfo.h:146
tesseract::ShapeTable::DeSerialize
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:246
tesseract::ShapeTable::EqualUnichars
bool EqualUnichars(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:587
tesseract::ShapeTable::SubsetUnichar
bool SubsetUnichar(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:543
tesseract::ShapeQueueEntry::level
int level
Definition: shapetable.h:153
tesseract::ShapeTable::MergeEqualUnichars
bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const
Definition: shapetable.cpp:604
tesseract::ShapeRating::font
float font
Definition: shapetable.h:128
tesseract::UnicharRating::config
uint8_t config
Definition: shapetable.h:81
tesseract::ShapeTable::NumFonts
int NumFonts() const
Definition: shapetable.cpp:254
genericvector.h
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
tesseract::ShapeTable::MutableShape
Shape * MutableShape(int shape_id)
Definition: shapetable.h:322
tesseract::UnicharAndFonts::unichar_id
int32_t unichar_id
Definition: shapetable.h:175
tesseract::ShapeTable::set_unicharset
void set_unicharset(const UNICHARSET &unicharset)
Definition: shapetable.h:285
tesseract::ShapeTable::GetFirstUnicharAndFont
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:404
tesseract::ShapeTable::unicharset
const UNICHARSET & unicharset() const
Definition: shapetable.h:277
tesseract::ShapeTable::MergeSubsetUnichar
bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const
Definition: shapetable.cpp:561
tesseract::UnicharRating::rating
float rating
Definition: shapetable.h:77
tesseract::Shape::Serialize
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:86
tesseract::Shape::Shape
Shape()
Definition: shapetable.h:186
tesseract::ShapeTable::CommonUnichars
bool CommonUnichars(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:629
tesseract::TFile
Definition: serialis.h:75
UNICHARSET
Definition: unicharset.h:145
tesseract::Shape::SetUnicharId
void SetUnicharId(int index, int unichar_id)
Definition: shapetable.h:208
tesseract::ShapeTable::DebugStr
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:281
tesseract::UnicharRating::Print
void Print() const
Definition: shapetable.h:48
tesseract::ShapeTable::GetShape
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:319
tesseract::ShapeRating
Definition: shapetable.h:92
tesseract::ShapeTable::ReMapClassIds
void ReMapClassIds(const GenericVector< int > &unicharset_map)
Definition: shapetable.cpp:271
tesseract
Definition: baseapi.h:65
fontinfo.h
tesseract::ShapeTable::AddShapeToShape
void AddShapeToShape(int shape_id, const Shape &other)
Definition: shapetable.cpp:376
tesseract::ShapeTable::MasterDestinationIndex
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:531
tesseract::ShapeQueueEntry::operator<
bool operator<(const ShapeQueueEntry &other) const
Definition: shapetable.h:143
bitvector.h
tesseract::Shape::IsSubsetOf
bool IsSubsetOf(const Shape &other) const
Definition: shapetable.cpp:202
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
GenericVector< UnicharRating >
tesseract::ShapeTable::MergedUnicharCount
int MergedUnicharCount(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:503
tesseract::Shape::operator[]
const UnicharAndFonts & operator[](int index) const
Definition: shapetable.h:204
tesseract::Shape::IsEqualUnichars
bool IsEqualUnichars(Shape *other)
Definition: shapetable.cpp:217
tesseract::UnicharAndFonts::DeSerialize
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:74
tesseract::ShapeTable::MasterUnicharCount
int MasterUnicharCount(int shape_id) const
Definition: shapetable.cpp:486
tesseract::ShapeQueueEntry::result
ShapeRating result
Definition: shapetable.h:151
tesseract::Shape::destination_index
int destination_index() const
Definition: shapetable.h:193
tesseract::UnicharRating::UnicharRating
UnicharRating()
Definition: shapetable.h:41
tesseract::UnicharRating::SortDescendingRating
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:55
tesseract::ShapeTable::MasterFontCount
int MasterFontCount(int shape_id) const
Definition: shapetable.cpp:492
tesseract::UnicharAndFonts::UnicharAndFonts
UnicharAndFonts()
Definition: shapetable.h:160
tesseract::Shape::AddToShape
void AddToShape(int unichar_id, int font_id)
Definition: shapetable.cpp:101
tesseract::ShapeRating::broken
bool broken
Definition: shapetable.h:132
tesseract::Shape::size
int size() const
Definition: shapetable.h:199
tesseract::ShapeTable::AddShapeToResults
void AddShapeToResults(const ShapeRating &shape_rating, GenericVector< int > *unichar_map, GenericVector< UnicharRating > *results) const
Definition: shapetable.cpp:687
tesseract::ShapeTable::FindShape
int FindShape(int unichar_id, int font_id) const
Definition: shapetable.cpp:386
tesseract::ShapeTable::AnyMultipleUnichars
bool AnyMultipleUnichars() const
Definition: shapetable.cpp:444
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::ShapeTable
Definition: shapetable.h:261
tesseract::ShapeRating::joined
bool joined
Definition: shapetable.h:130
tesseract::ShapeTable::SummaryStr
STRING SummaryStr() const
Definition: shapetable.cpp:313
tesseract::ShapeQueueEntry::ShapeQueueEntry
ShapeQueueEntry()
Definition: shapetable.h:138
tesseract::ShapeTable::AddShape
int AddShape(int unichar_id, int font_id)
Definition: shapetable.cpp:336
tesseract::ShapeQueueEntry
Definition: shapetable.h:137
tesseract::UnicharAndFonts::Serialize
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:69
tesseract::UnicharRating::FirstResultWithUnichar
static int FirstResultWithUnichar(const GenericVector< UnicharRating > &results, UNICHAR_ID unichar_id)
Definition: shapetable.cpp:58
tesseract::ShapeRating::SortDescendingRating
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:101
tesseract::UnicharRating::adapted
bool adapted
Definition: shapetable.h:79
tesseract::ShapeTable::DeleteShape
void DeleteShape(int shape_id)
Definition: shapetable.cpp:361
tesseract::Shape::ContainsFontProperties
bool ContainsFontProperties(const FontInfoTable &font_table, uint32_t properties) const
Definition: shapetable.cpp:169
tesseract::Shape::set_destination_index
void set_destination_index(int index)
Definition: shapetable.h:196
genericheap.h
tesseract::ShapeTable::MaxNumUnichars
int MaxNumUnichars() const
Definition: shapetable.cpp:455
tesseract::ShapeTable::MergeShapes
void MergeShapes(int shape_id1, int shape_id2)
Definition: shapetable.cpp:513
tesseract::ShapeTable::BuildFromShape
int BuildFromShape(const Shape &shape, const ShapeTable &master_shapes)
Definition: shapetable.cpp:413
tesseract::UnicharAndFonts::font_ids
GenericVector< int32_t > font_ids
Definition: shapetable.h:174
tesseract::ShapeRating::ShapeRating
ShapeRating()
Definition: shapetable.h:93
tesseract::UnicharRating::UnicharRating
UnicharRating(int u, float r)
Definition: shapetable.h:44
tesseract::UnicharRating::feature_misses
uint16_t feature_misses
Definition: shapetable.h:83
intmatcher.h