tesseract  5.0.0-alpha-619-ge9db
indexmapbidi.cpp
Go to the documentation of this file.
1 // File: indexmapbidi.cpp
3 // Description: Bi-directional mapping between a sparse and compact space.
4 // Author: rays@google.com (Ray Smith)
5 // Created: Tue Apr 06 11:33:59 PDT 2010
6 //
7 // (C) Copyright 2010, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "indexmapbidi.h"
21 
22 namespace tesseract {
23 
24 // Destructor.
25 // It is defined here, so the compiler can create a single vtable
26 // instead of weak vtables in every compilation unit.
27 IndexMap::~IndexMap() = default;
28 
29 // SparseToCompact takes a sparse index to an index in the compact space.
30 // Uses a binary search to find the result. For faster speed use
31 // IndexMapBiDi, but that takes more memory.
32 int IndexMap::SparseToCompact(int sparse_index) const {
33  int result = compact_map_.binary_search(sparse_index);
34  return compact_map_[result] == sparse_index ? result : -1;
35 }
36 
37 // Copy from the input.
38 void IndexMap::CopyFrom(const IndexMap& src) {
41 }
42 void IndexMap::CopyFrom(const IndexMapBiDi& src) {
43  sparse_size_ = src.SparseSize();
45 }
46 
47 // Writes to the given file. Returns false in case of error.
48 bool IndexMap::Serialize(FILE* fp) const {
50 }
51 
52 // Reads from the given file. Returns false in case of error.
53 // If swap is true, assumes a big/little-endian swap is needed.
54 bool IndexMap::DeSerialize(bool swap, FILE* fp) {
55  uint32_t sparse_size;
56  if (!tesseract::DeSerialize(fp, &sparse_size)) return false;
57  if (swap)
58  ReverseN(&sparse_size, sizeof(sparse_size));
59  // Arbitrarily limit the number of elements to protect against bad data.
60  if (sparse_size > UINT16_MAX) return false;
61  sparse_size_ = sparse_size;
62  return compact_map_.DeSerialize(swap, fp);
63 }
64 
65 // Destructor.
66 // It is defined here, so the compiler can create a single vtable
67 // instead of weak vtables in every compilation unit.
68 IndexMapBiDi::~IndexMapBiDi() = default;
69 
70 // Top-level init function in a single call to initialize a map to select
71 // a single contiguous subrange [start, end) of the sparse space to be mapped
72 // 1 to 1 to the compact space, with all other elements of the sparse space
73 // left unmapped.
74 // No need to call Setup after this.
75 void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
76  Init(sparse_size, false);
77  for (int i = start; i < end; ++i)
78  SetMap(i, true);
79  Setup();
80 }
81 
82 // Initializes just the sparse_map_ to the given size with either all
83 // forward indices mapped (all_mapped = true) or none (all_mapped = false).
84 // Call Setup immediately after, or make calls to SetMap first to adjust the
85 // mapping and then call Setup before using the map.
86 void IndexMapBiDi::Init(int size, bool all_mapped) {
87  sparse_map_.init_to_size(size, -1);
88  if (all_mapped) {
89  for (int i = 0; i < size; ++i)
90  sparse_map_[i] = i;
91  }
92 }
93 
94 // Sets a given index in the sparse_map_ to be mapped or not.
95 void IndexMapBiDi::SetMap(int sparse_index, bool mapped) {
96  sparse_map_[sparse_index] = mapped ? 0 : -1;
97 }
98 
99 // Sets up the sparse_map_ and compact_map_ properly after Init and
100 // some calls to SetMap. Assumes an ordered 1-1 map from set indices
101 // in the forward map to the compact space.
103  int compact_size = 0;
104  for (int i = 0; i < sparse_map_.size(); ++i) {
105  if (sparse_map_[i] >= 0) {
106  sparse_map_[i] = compact_size++;
107  }
108  }
109  compact_map_.init_to_size(compact_size, -1);
110  for (int i = 0; i < sparse_map_.size(); ++i) {
111  if (sparse_map_[i] >= 0) {
112  compact_map_[sparse_map_[i]] = i;
113  }
114  }
115  sparse_size_ = sparse_map_.size();
116 }
117 
118 // Copy from the input.
120  sparse_map_ = src.sparse_map_;
122  sparse_size_ = sparse_map_.size();
123 }
124 
125 // Merges the two compact space indices. May be called many times, but
126 // the merges must be concluded by a call to CompleteMerges.
127 // Returns true if a merge was actually performed.
128 bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) {
129  // Find the current master index for index1 and index2.
130  compact_index1 = MasterCompactIndex(compact_index1);
131  compact_index2 = MasterCompactIndex(compact_index2);
132  // Be sure that index1 < index2.
133  if (compact_index1 > compact_index2) {
134  int tmp = compact_index1;
135  compact_index1 = compact_index2;
136  compact_index2 = tmp;
137  } else if (compact_index1 == compact_index2) {
138  return false;
139  }
140  // To save iterating over all sparse_map_ entries, simply make the master
141  // entry for index2 point to index1.
142  // This leaves behind a potential chain of parents that needs to be chased,
143  // as above.
144  sparse_map_[compact_map_[compact_index2]] = compact_index1;
145  if (compact_index1 >= 0)
146  compact_map_[compact_index2] = compact_map_[compact_index1];
147  return true;
148 }
149 
150 // Completes one or more Merge operations by further compacting the
151 // compact space. Unused compact space indices are removed, and the used
152 // ones above shuffled down to fill the gaps.
153 // Example:
154 // Input sparse_map_: (x indicates -1)
155 // x x 0 x 2 x x 4 x 0 x 2 x
156 // Output sparse_map_:
157 // x x 0 x 1 x x 2 x 0 x 1 x
158 // Output compact_map_:
159 // 2 4 7.
161  // Ensure each sparse_map_entry contains a master compact_map_ index.
162  int compact_size = 0;
163  for (int i = 0; i < sparse_map_.size(); ++i) {
164  int compact_index = MasterCompactIndex(sparse_map_[i]);
165  sparse_map_[i] = compact_index;
166  if (compact_index >= compact_size)
167  compact_size = compact_index + 1;
168  }
169  // Re-generate the compact_map leaving holes for unused indices.
170  compact_map_.init_to_size(compact_size, -1);
171  for (int i = 0; i < sparse_map_.size(); ++i) {
172  if (sparse_map_[i] >= 0) {
173  if (compact_map_[sparse_map_[i]] == -1)
174  compact_map_[sparse_map_[i]] = i;
175  }
176  }
177  // Compact the compact_map, leaving tmp_compact_map saying where each
178  // index went to in the compacted map.
179  GenericVector<int32_t> tmp_compact_map;
180  tmp_compact_map.init_to_size(compact_size, -1);
181  compact_size = 0;
182  for (int i = 0; i < compact_map_.size(); ++i) {
183  if (compact_map_[i] >= 0) {
184  tmp_compact_map[i] = compact_size;
185  compact_map_[compact_size++] = compact_map_[i];
186  }
187  }
188  compact_map_.truncate(compact_size);
189  // Now modify the entries in the sparse map to point to the new locations.
190  for (int i = 0; i < sparse_map_.size(); ++i) {
191  if (sparse_map_[i] >= 0) {
192  sparse_map_[i] = tmp_compact_map[sparse_map_[i]];
193  }
194  }
195 }
196 
197 // Writes to the given file. Returns false in case of error.
198 bool IndexMapBiDi::Serialize(FILE* fp) const {
199  if (!IndexMap::Serialize(fp)) return false;
200  // Make a vector containing the rest of the map. If the map is many-to-one
201  // then each additional sparse entry needs to be stored.
202  // Normally we store only the compact map to save space.
203  GenericVector<int32_t> remaining_pairs;
204  for (int i = 0; i < sparse_map_.size(); ++i) {
205  if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) {
206  remaining_pairs.push_back(i);
207  remaining_pairs.push_back(sparse_map_[i]);
208  }
209  }
210  if (!remaining_pairs.Serialize(fp)) return false;
211  return true;
212 }
213 
214 // Reads from the given file. Returns false in case of error.
215 // If swap is true, assumes a big/little-endian swap is needed.
216 bool IndexMapBiDi::DeSerialize(bool swap, FILE* fp) {
217  if (!IndexMap::DeSerialize(swap, fp)) return false;
218  GenericVector<int32_t> remaining_pairs;
219  if (!remaining_pairs.DeSerialize(swap, fp)) return false;
220  sparse_map_.init_to_size(sparse_size_, -1);
221  for (int i = 0; i < compact_map_.size(); ++i) {
222  sparse_map_[compact_map_[i]] = i;
223  }
224  for (int i = 0; i < remaining_pairs.size(); ++i) {
225  int sparse_index = remaining_pairs[i++];
226  sparse_map_[sparse_index] = remaining_pairs[i];
227  }
228  return true;
229 }
230 
231 // Bulk calls to SparseToCompact.
232 // Maps the given array of sparse indices to an array of compact indices.
233 // Assumes the input is sorted. The output indices are sorted and uniqued.
234 // Return value is the number of "missed" features, being features that
235 // don't map to the compact feature space.
237  GenericVector<int>* compact) const {
238  compact->truncate(0);
239  int num_features = sparse.size();
240  int missed_features = 0;
241  int prev_good_feature = -1;
242  for (int f = 0; f < num_features; ++f) {
243  int feature = sparse_map_[sparse[f]];
244  if (feature >= 0) {
245  if (feature != prev_good_feature) {
246  compact->push_back(feature);
247  prev_good_feature = feature;
248  }
249  } else {
250  ++missed_features;
251  }
252  }
253  return missed_features;
254 }
255 
256 } // namespace tesseract.
tesseract::IndexMap::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: indexmapbidi.cpp:54
tesseract::IndexMapBiDi::CompleteMerges
void CompleteMerges()
Definition: indexmapbidi.cpp:160
tesseract::IndexMapBiDi::MapFeatures
int MapFeatures(const GenericVector< int > &sparse, GenericVector< int > *compact) const
Definition: indexmapbidi.cpp:236
tesseract::IndexMap::sparse_size_
int32_t sparse_size_
Definition: indexmapbidi.h:77
GenericVector::Serialize
bool Serialize(FILE *fp) const
Definition: genericvector.h:929
tesseract::IndexMapBiDi::InitAndSetupRange
void InitAndSetupRange(int sparse_size, int start, int end)
Definition: indexmapbidi.cpp:75
tesseract::IndexMap
Definition: indexmapbidi.h:42
tesseract::IndexMapBiDi::~IndexMapBiDi
~IndexMapBiDi() override
tesseract::IndexMap::SparseToCompact
virtual int SparseToCompact(int sparse_index) const
Definition: indexmapbidi.cpp:32
tesseract::IndexMapBiDi::Serialize
bool Serialize(FILE *fp) const
Definition: indexmapbidi.cpp:198
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
tesseract::IndexMapBiDi::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: indexmapbidi.cpp:216
tesseract::IndexMap::~IndexMap
virtual ~IndexMap()
tesseract::IndexMapBiDi::Merge
bool Merge(int compact_index1, int compact_index2)
Definition: indexmapbidi.cpp:128
GenericVector::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: genericvector.h:954
tesseract::IndexMap::CopyFrom
void CopyFrom(const IndexMap &src)
Definition: indexmapbidi.cpp:38
tesseract::IndexMapBiDi::Setup
void Setup()
Definition: indexmapbidi.cpp:102
tesseract::IndexMapBiDi::CopyFrom
void CopyFrom(const IndexMapBiDi &src)
Definition: indexmapbidi.cpp:119
tesseract
Definition: baseapi.h:65
tesseract::IndexMap::Serialize
bool Serialize(FILE *fp) const
Definition: indexmapbidi.cpp:48
GenericVector< int32_t >
tesseract::IndexMapBiDi::SparseSize
int SparseSize() const override
Definition: indexmapbidi.h:142
tesseract::IndexMapBiDi
Definition: indexmapbidi.h:102
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:132
tesseract::IndexMapBiDi::Init
void Init(int size, bool all_mapped)
Definition: indexmapbidi.cpp:86
GenericVector::init_to_size
void init_to_size(int size, const T &t)
Definition: genericvector.h:706
GenericVector::binary_search
int binary_search(const T &target) const
Definition: genericvector.h:252
tesseract::IndexMapBiDi::SetMap
void SetMap(int sparse_index, bool mapped)
Definition: indexmapbidi.cpp:95
ReverseN
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:183
tesseract::IndexMap::compact_map_
GenericVector< int32_t > compact_map_
Definition: indexmapbidi.h:80
tesseract::DeSerialize
bool DeSerialize(FILE *fp, char *data, size_t n=1)
Definition: serialis.cpp:41
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::Serialize
bool Serialize(FILE *fp, const char *data, size_t n=1)
Definition: serialis.cpp:73
indexmapbidi.h