tesseract  5.0.0-alpha-619-ge9db
genericvector.h
Go to the documentation of this file.
1 // File: genericvector.h
3 // Description: Generic vector class
4 // Author: Daria Antonova
5 //
6 // (C) Copyright 2007, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 //
19 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
20 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
21 
22 #include <algorithm>
23 #include <cassert>
24 #include <climits> // for LONG_MAX
25 #include <cstdint> // for uint32_t
26 #include <cstdio>
27 #include <cstdlib>
28 #include <functional> // for std::function
29 
30 #include "helpers.h"
31 #include "serialis.h"
32 
33 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
34 // provides automatic deletion of pointers, [De]Serialize that works, and
35 // sort that works.
36 template <typename T>
37 class GenericVector {
38  public:
41  }
42  GenericVector(int size, const T& init_val) {
43  init(size);
44  init_to_size(size, init_val);
45  }
46 
47  // Copy
48  GenericVector(const GenericVector& other) {
49  this->init(other.size());
50  this->operator+=(other);
51  }
54 
56 
57  // Reserve some memory.
58  void reserve(int size);
59  // Double the size of the internal array.
60  void double_the_size();
61 
62  // Resizes to size and sets all values to t.
63  void init_to_size(int size, const T& t);
64  // Resizes to size without any initialization.
65  void resize_no_init(int size) {
66  reserve(size);
67  size_used_ = size;
68  }
69 
70  // Return the size used.
71  int size() const {
72  return size_used_;
73  }
74  // Workaround to avoid g++ -Wsign-compare warnings.
75  size_t unsigned_size() const {
76  static_assert(sizeof(size_used_) <= sizeof(size_t),
77  "Wow! sizeof(size_t) < sizeof(int32_t)!!");
78  assert(0 <= size_used_);
79  return static_cast<size_t>(size_used_);
80  }
81  int size_reserved() const {
82  return size_reserved_;
83  }
84 
85  // Return true if empty.
86  bool empty() const {
87  return size_used_ == 0;
88  }
89 
90  // Return the object from an index.
91  T& get(int index) const;
92  T& back() const;
93  T& operator[](int index) const;
94  // Returns the last object and removes it.
95  T pop_back();
96 
97  // Return the index of the T object.
98  // This method NEEDS a compare_callback to be passed to
99  // set_compare_callback.
100  int get_index(const T& object) const;
101 
102  // Return true if T is in the array
103  bool contains(const T& object) const;
104 
105  // Return true if the index is valid
106  T contains_index(int index) const;
107 
108  // Push an element in the end of the array
109  int push_back(T object);
110  void operator+=(const T& t);
111 
112  // Push an element in the end of the array if the same
113  // element is not already contained in the array.
114  int push_back_new(const T& object);
115 
116  // Push an element in the front of the array
117  // Note: This function is O(n)
118  int push_front(const T& object);
119 
120  // Set the value at the given index
121  void set(const T& t, int index);
122 
123  // Insert t at the given index, push other elements to the right.
124  void insert(const T& t, int index);
125 
126  // Removes an element at the given index and
127  // shifts the remaining elements to the left.
128  void remove(int index);
129 
130  // Truncates the array to the given size by removing the end.
131  // If the current size is less, the array is not expanded.
132  void truncate(int size) {
133  if (size < size_used_) {
134  size_used_ = size;
135  }
136  }
137 
138  // Add a callback to be called to delete the elements when the array took
139  // their ownership.
140  void set_clear_callback(std::function<void(T)> cb) {
141  clear_cb_ = cb;
142  }
143 
144  // Add a callback to be called to compare the elements when needed (contains,
145  // get_id, ...)
146  void set_compare_callback(std::function<bool(const T&, const T&)> cb) {
147  compare_cb_ = cb;
148  }
149 
150  // Clear the array, calling the clear callback function if any.
151  // All the owned callbacks are also deleted.
152  // If you don't want the callbacks to be deleted, before calling clear, set
153  // the callback to nullptr.
154  void clear();
155 
156  // Delete objects pointed to by data_[i]
157  void delete_data_pointers();
158 
159  // This method clears the current object, then, does a shallow copy of
160  // its argument, and finally invalidates its argument.
161  // Callbacks are moved to the current object;
162  void move(GenericVector<T>* from);
163 
164  // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
165  // The callback given must be permanent since they will be called more than
166  // once. The given callback will be deleted at the end.
167  // If the callbacks are nullptr, then the data is simply read/written using
168  // fread (and swapping)/fwrite.
169  // Returns false on error or if the callback returns false.
170  // DEPRECATED. Use [De]Serialize[Classes] instead.
171  bool write(FILE* f, std::function<bool(FILE*, const T&)> cb) const;
172  bool read(tesseract::TFile* f, std::function<bool(tesseract::TFile*, T*)> cb);
173  // Writes a vector of simple types to the given file. Assumes that bitwise
174  // read/write of T will work. Returns false in case of error.
175  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
176  bool Serialize(FILE* fp) const;
177  bool Serialize(tesseract::TFile* fp) const;
178  // Reads a vector of simple types from the given file. Assumes that bitwise
179  // read/write will work with ReverseN according to sizeof(T).
180  // Returns false in case of error.
181  // If swap is true, assumes a big/little-endian swap is needed.
182  // TFile is assumed to know about swapping.
183  bool DeSerialize(bool swap, FILE* fp);
184  bool DeSerialize(tesseract::TFile* fp);
185  // Skips the deserialization of the vector.
186  static bool SkipDeSerialize(tesseract::TFile* fp);
187  // Writes a vector of classes to the given file. Assumes the existence of
188  // bool T::Serialize(FILE* fp) const that returns false in case of error.
189  // Returns false in case of error.
190  bool SerializeClasses(FILE* fp) const;
191  bool SerializeClasses(tesseract::TFile* fp) const;
192  // Reads a vector of classes from the given file. Assumes the existence of
193  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
194  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
195  // this function. Returns false in case of error.
196  // If swap is true, assumes a big/little-endian swap is needed.
197  bool DeSerializeClasses(bool swap, FILE* fp);
199  // Calls SkipDeSerialize on the elements of the vector.
200  static bool SkipDeSerializeClasses(tesseract::TFile* fp);
201 
202  // Allocates a new array of double the current_size, copies over the
203  // information from data to the new location, deletes data and returns
204  // the pointed to the new larger array.
205  // This function uses memcpy to copy the data, instead of invoking
206  // operator=() for each element like double_the_size() does.
207  static T* double_the_size_memcpy(int current_size, T* data) {
208  T* data_new = new T[current_size * 2];
209  memcpy(data_new, data, sizeof(T) * current_size);
210  delete[] data;
211  return data_new;
212  }
213 
214  // Reverses the elements of the vector.
215  void reverse() {
216  for (int i = 0; i < size_used_ / 2; ++i) {
217  Swap(&data_[i], &data_[size_used_ - 1 - i]);
218  }
219  }
220 
221  // Sorts the members of this vector using the less than comparator (cmp_lt),
222  // which compares the values. Useful for GenericVectors to primitive types.
223  // Will not work so great for pointers (unless you just want to sort some
224  // pointers). You need to provide a specialization to sort_cmp to use
225  // your type.
226  void sort();
227 
228  // Sort the array into the order defined by the qsort function comparator.
229  // The comparator function is as defined by qsort, ie. it receives pointers
230  // to two Ts and returns negative if the first element is to appear earlier
231  // in the result and positive if it is to appear later, with 0 for equal.
232  void sort(int (*comparator)(const void*, const void*)) {
233  qsort(data_, size_used_, sizeof(*data_), comparator);
234  }
235 
236  // Searches the array (assuming sorted in ascending order, using sort()) for
237  // an element equal to target and returns true if it is present.
238  // Use binary_search to get the index of target, or its nearest candidate.
239  bool bool_binary_search(const T& target) const {
240  int index = binary_search(target);
241  if (index >= size_used_) {
242  return false;
243  }
244  return data_[index] == target;
245  }
246  // Searches the array (assuming sorted in ascending order, using sort()) for
247  // an element equal to target and returns the index of the best candidate.
248  // The return value is conceptually the largest index i such that
249  // data_[i] <= target or 0 if target < the whole vector.
250  // NOTE that this function uses operator> so really the return value is
251  // the largest index i such that data_[i] > target is false.
252  int binary_search(const T& target) const {
253  int bottom = 0;
254  int top = size_used_;
255  while (top - bottom > 1) {
256  int middle = (bottom + top) / 2;
257  if (data_[middle] > target) {
258  top = middle;
259  } else {
260  bottom = middle;
261  }
262  }
263  return bottom;
264  }
265 
266  // Compact the vector by deleting elements using operator!= on basic types.
267  // The vector must be sorted.
268  void compact_sorted() {
269  if (size_used_ == 0) {
270  return;
271  }
272 
273  // First element is in no matter what, hence the i = 1.
274  int last_write = 0;
275  for (int i = 1; i < size_used_; ++i) {
276  // Finds next unique item and writes it.
277  if (data_[last_write] != data_[i]) {
278  data_[++last_write] = data_[i];
279  }
280  }
281  // last_write is the index of a valid data cell, so add 1.
282  size_used_ = last_write + 1;
283  }
284 
285  // Returns the index of what would be the target_index_th item in the array
286  // if the members were sorted, without actually sorting. Members are
287  // shuffled around, but it takes O(n) time.
288  // NOTE: uses operator< and operator== on the members.
289  int choose_nth_item(int target_index) {
290  // Make sure target_index is legal.
291  if (target_index < 0) {
292  target_index = 0; // ensure legal
293  } else if (target_index >= size_used_) {
294  target_index = size_used_ - 1;
295  }
296  unsigned int seed = 1;
297  return choose_nth_item(target_index, 0, size_used_, &seed);
298  }
299 
300  // Swaps the elements with the given indices.
301  void swap(int index1, int index2) {
302  if (index1 != index2) {
303  T tmp = data_[index1];
304  data_[index1] = data_[index2];
305  data_[index2] = tmp;
306  }
307  }
308  // Returns true if all elements of *this are within the given range.
309  // Only uses operator<
310  bool WithinBounds(const T& rangemin, const T& rangemax) const {
311  for (int i = 0; i < size_used_; ++i) {
312  if (data_[i] < rangemin || rangemax < data_[i]) {
313  return false;
314  }
315  }
316  return true;
317  }
318 
319  protected:
320  // Internal recursive version of choose_nth_item.
321  int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
322 
323  // Init the object, allocating size memory.
324  void init(int size);
325 
326  // We are assuming that the object generally placed in the
327  // vector are small enough that for efficiency it makes sense
328  // to start with a larger initial size.
329  static const int kDefaultVectorSize = 4;
330  int32_t size_used_{};
331  int32_t size_reserved_{};
332  T* data_;
333  std::function<void(T)> clear_cb_;
334  std::function<bool(const T&, const T&)> compare_cb_;
335 };
336 
337 namespace tesseract {
338 
339 // The default FileReader loads the whole file into the vector of char,
340 // returning false on error.
341 inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
342  bool result = false;
343  FILE* fp = fopen(filename, "rb");
344  if (fp != nullptr) {
345  fseek(fp, 0, SEEK_END);
346  auto size = std::ftell(fp);
347  fseek(fp, 0, SEEK_SET);
348  // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
349  if (size > 0 && size < LONG_MAX) {
350  // reserve an extra byte in case caller wants to append a '\0' character
351  data->reserve(size + 1);
352  data->resize_no_init(size);
353  result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
354  }
355  fclose(fp);
356  }
357  return result;
358 }
359 
360 // The default FileWriter writes the vector of char to the filename file,
361 // returning false on error.
362 inline bool SaveDataToFile(const GenericVector<char>& data,
363  const char* filename) {
364  FILE* fp = fopen(filename, "wb");
365  if (fp == nullptr) {
366  return false;
367  }
368  bool result =
369  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
370  fclose(fp);
371  return result;
372 }
373 
374 template <typename T>
375 bool cmp_eq(T const& t1, T const& t2) {
376  return t1 == t2;
377 }
378 
379 // Used by sort()
380 // return < 0 if t1 < t2
381 // return 0 if t1 == t2
382 // return > 0 if t1 > t2
383 template <typename T>
384 int sort_cmp(const void* t1, const void* t2) {
385  const T* a = static_cast<const T*>(t1);
386  const T* b = static_cast<const T*>(t2);
387  if (*a < *b) {
388  return -1;
389  }
390  if (*b < *a) {
391  return 1;
392  }
393  return 0;
394 }
395 
396 // Used by PointerVector::sort()
397 // return < 0 if t1 < t2
398 // return 0 if t1 == t2
399 // return > 0 if t1 > t2
400 template <typename T>
401 int sort_ptr_cmp(const void* t1, const void* t2) {
402  const T* a = *static_cast<T* const*>(t1);
403  const T* b = *static_cast<T* const*>(t2);
404  if (*a < *b) {
405  return -1;
406  }
407  if (*b < *a) {
408  return 1;
409  }
410  return 0;
411 }
412 
413 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
414 // as it provides automatic deletion and correct serialization, with the
415 // corollary that all copy operations are deep copies of the pointed-to objects.
416 template <typename T>
417 class PointerVector : public GenericVector<T*> {
418  public:
420  explicit PointerVector(int size) : GenericVector<T*>(size) {}
422  // Clear must be called here, even though it is called again by the base,
423  // as the base will call the wrong clear.
424  clear();
425  }
426  // Copy must be deep, as the pointers will be automatically deleted on
427  // destruction.
428  PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
429  this->init(other.size());
430  this->operator+=(other);
431  }
433  this->reserve(this->size_used_ + other.size_used_);
434  for (int i = 0; i < other.size(); ++i) {
435  this->push_back(new T(*other.data_[i]));
436  }
437  return *this;
438  }
439 
441  if (&other != this) {
442  this->truncate(0);
443  this->operator+=(other);
444  }
445  return *this;
446  }
447 
448  // Removes an element at the given index and
449  // shifts the remaining elements to the left.
450  void remove(int index) {
451  delete GenericVector<T*>::data_[index];
453  }
454 
455  // Truncates the array to the given size by removing the end.
456  // If the current size is less, the array is not expanded.
457  void truncate(int size) {
458  for (int i = size; i < GenericVector<T*>::size_used_; ++i) {
459  delete GenericVector<T*>::data_[i];
460  }
462  }
463 
464  // Compact the vector by deleting elements for which delete_cb returns
465  // true. delete_cb is a permanent callback and will be deleted.
466  void compact(std::function<bool(const T*)> delete_cb) {
467  int new_size = 0;
468  int old_index = 0;
469  // Until the callback returns true, the elements stay the same.
470  while (old_index < GenericVector<T*>::size_used_ &&
471  !delete_cb(GenericVector<T*>::data_[old_index++])) {
472  ++new_size;
473  }
474  // Now just copy anything else that gets false from delete_cb.
475  for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
476  if (!delete_cb(GenericVector<T*>::data_[old_index])) {
477  GenericVector<T*>::data_[new_size++] =
478  GenericVector<T*>::data_[old_index];
479  } else {
480  delete GenericVector<T*>::data_[old_index];
481  }
482  }
484  }
485 
486  // Clear the array, calling the clear callback function if any.
487  // All the owned callbacks are also deleted.
488  // If you don't want the callbacks to be deleted, before calling clear, set
489  // the callback to nullptr.
490  void clear() {
493  }
494 
495  // Writes a vector of (pointers to) classes to the given file. Assumes the
496  // existence of bool T::Serialize(FILE*) const that returns false in case of
497  // error. There is no Serialize for simple types, as you would have a
498  // normal GenericVector of those.
499  // Returns false in case of error.
500  bool Serialize(FILE* fp) const {
501  int32_t used = GenericVector<T*>::size_used_;
502  if (fwrite(&used, sizeof(used), 1, fp) != 1) {
503  return false;
504  }
505  for (int i = 0; i < used; ++i) {
506  int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
507  if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
508  return false;
509  }
510  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
511  return false;
512  }
513  }
514  return true;
515  }
516  bool Serialize(TFile* fp) const {
517  int32_t used = GenericVector<T*>::size_used_;
518  if (fp->FWrite(&used, sizeof(used), 1) != 1) {
519  return false;
520  }
521  for (int i = 0; i < used; ++i) {
522  int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
523  if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
524  return false;
525  }
526  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
527  return false;
528  }
529  }
530  return true;
531  }
532  // Reads a vector of (pointers to) classes to the given file. Assumes the
533  // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
534  // case of error. There is no Serialize for simple types, as you would have a
535  // normal GenericVector of those.
536  // If swap is true, assumes a big/little-endian swap is needed.
537  // Also needs T::T(), as new T is used in this function.
538  // Returns false in case of error.
539  bool DeSerialize(bool swap, FILE* fp) {
540  uint32_t reserved;
541  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
542  return false;
543  }
544  if (swap) {
545  Reverse32(&reserved);
546  }
547  // Arbitrarily limit the number of elements to protect against bad data.
548  assert(reserved <= UINT16_MAX);
549  if (reserved > UINT16_MAX) {
550  return false;
551  }
552  GenericVector<T*>::reserve(reserved);
553  truncate(0);
554  for (uint32_t i = 0; i < reserved; ++i) {
555  int8_t non_null;
556  if (fread(&non_null, sizeof(non_null), 1, fp) != 1) {
557  return false;
558  }
559  T* item = nullptr;
560  if (non_null != 0) {
561  item = new T;
562  if (!item->DeSerialize(swap, fp)) {
563  delete item;
564  return false;
565  }
566  this->push_back(item);
567  } else {
568  // Null elements should keep their place in the vector.
569  this->push_back(nullptr);
570  }
571  }
572  return true;
573  }
574  bool DeSerialize(TFile* fp) {
575  int32_t reserved;
576  if (!DeSerializeSize(fp, &reserved)) {
577  return false;
578  }
579  GenericVector<T*>::reserve(reserved);
580  truncate(0);
581  for (int i = 0; i < reserved; ++i) {
582  if (!DeSerializeElement(fp)) {
583  return false;
584  }
585  }
586  return true;
587  }
588  // Enables deserialization of a selection of elements. Note that in order to
589  // retain the integrity of the stream, the caller must call some combination
590  // of DeSerializeElement and DeSerializeSkip of the exact number returned in
591  // *size, assuming a true return.
592  static bool DeSerializeSize(TFile* fp, int32_t* size) {
593  return fp->FReadEndian(size, sizeof(*size), 1) == 1;
594  }
595  // Reads and appends to the vector the next element of the serialization.
597  int8_t non_null;
598  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
599  return false;
600  }
601  T* item = nullptr;
602  if (non_null != 0) {
603  item = new T;
604  if (!item->DeSerialize(fp)) {
605  delete item;
606  return false;
607  }
608  this->push_back(item);
609  } else {
610  // Null elements should keep their place in the vector.
611  this->push_back(nullptr);
612  }
613  return true;
614  }
615  // Skips the next element of the serialization.
616  static bool DeSerializeSkip(TFile* fp) {
617  int8_t non_null;
618  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
619  return false;
620  }
621  if (non_null != 0) {
622  if (!T::SkipDeSerialize(fp)) {
623  return false;
624  }
625  }
626  return true;
627  }
628 
629  // Sorts the items pointed to by the members of this vector using
630  // t::operator<().
631  void sort() {
632  this->GenericVector<T*>::sort(&sort_ptr_cmp<T>);
633  }
634 };
635 
636 } // namespace tesseract
637 
638 // A useful vector that uses operator== to do comparisons.
639 template <typename T>
640 class GenericVectorEqEq : public GenericVector<T> {
641  public:
643  using namespace std::placeholders; // for _1
645  std::bind(tesseract::cmp_eq<T>, _1, _2));
646  }
647  explicit GenericVectorEqEq(int size) : GenericVector<T>(size) {
648  using namespace std::placeholders; // for _1
650  std::bind(tesseract::cmp_eq<T>, _1, _2));
651  }
652 };
653 
654 template <typename T>
656  size_used_ = 0;
657  if (size <= 0) {
658  data_ = nullptr;
659  size_reserved_ = 0;
660  } else {
661  if (size < kDefaultVectorSize) {
663  }
664  data_ = new T[size];
666  }
667  clear_cb_ = nullptr;
668  compare_cb_ = nullptr;
669 }
670 
671 template <typename T>
673  clear();
674 }
675 
676 // Reserve some memory. If the internal array contains elements, they are
677 // copied.
678 template <typename T>
680  if (size_reserved_ >= size || size <= 0) {
681  return;
682  }
683  if (size < kDefaultVectorSize) {
685  }
686  T* new_array = new T[size];
687  for (int i = 0; i < size_used_; ++i) {
688  new_array[i] = data_[i];
689  }
690  delete[] data_;
691  data_ = new_array;
693 }
694 
695 template <typename T>
697  if (size_reserved_ == 0) {
699  } else {
700  reserve(2 * size_reserved_);
701  }
702 }
703 
704 // Resizes to size and sets all values to t.
705 template <typename T>
706 void GenericVector<T>::init_to_size(int size, const T& t) {
707  reserve(size);
708  size_used_ = size;
709  for (int i = 0; i < size; ++i) {
710  data_[i] = t;
711  }
712 }
713 
714 // Return the object from an index.
715 template <typename T>
716 T& GenericVector<T>::get(int index) const {
717  assert(index >= 0 && index < size_used_);
718  return data_[index];
719 }
720 
721 template <typename T>
722 T& GenericVector<T>::operator[](int index) const {
723  assert(index >= 0 && index < size_used_);
724  return data_[index];
725 }
726 
727 template <typename T>
729  assert(size_used_ > 0);
730  return data_[size_used_ - 1];
731 }
732 // Returns the last object and removes it.
733 template <typename T>
735  assert(size_used_ > 0);
736  return data_[--size_used_];
737 }
738 
739 // Return the object from an index.
740 template <typename T>
741 void GenericVector<T>::set(const T& t, int index) {
742  assert(index >= 0 && index < size_used_);
743  data_[index] = t;
744 }
745 
746 // Shifts the rest of the elements to the right to make
747 // space for the new elements and inserts the given element
748 // at the specified index.
749 template <typename T>
750 void GenericVector<T>::insert(const T& t, int index) {
751  assert(index >= 0 && index <= size_used_);
752  if (size_reserved_ == size_used_) {
753  double_the_size();
754  }
755  for (int i = size_used_; i > index; --i) {
756  data_[i] = data_[i - 1];
757  }
758  data_[index] = t;
759  size_used_++;
760 }
761 
762 // Removes an element at the given index and
763 // shifts the remaining elements to the left.
764 template <typename T>
765 void GenericVector<T>::remove(int index) {
766  assert(index >= 0 && index < size_used_);
767  for (int i = index; i < size_used_ - 1; ++i) {
768  data_[i] = data_[i + 1];
769  }
770  size_used_--;
771 }
772 
773 // Return true if the index is valindex
774 template <typename T>
776  return index >= 0 && index < size_used_;
777 }
778 
779 // Return the index of the T object.
780 template <typename T>
781 int GenericVector<T>::get_index(const T& object) const {
782  for (int i = 0; i < size_used_; ++i) {
783  assert(compare_cb_ != nullptr);
784  if (compare_cb_(object, data_[i])) {
785  return i;
786  }
787  }
788  return -1;
789 }
790 
791 // Return true if T is in the array
792 template <typename T>
793 bool GenericVector<T>::contains(const T& object) const {
794  return get_index(object) != -1;
795 }
796 
797 // Add an element in the array
798 template <typename T>
800  int index = 0;
801  if (size_used_ == size_reserved_) {
802  double_the_size();
803  }
804  index = size_used_++;
805  data_[index] = object;
806  return index;
807 }
808 
809 template <typename T>
810 int GenericVector<T>::push_back_new(const T& object) {
811  int index = get_index(object);
812  if (index >= 0) {
813  return index;
814  }
815  return push_back(object);
816 }
817 
818 // Add an element in the array (front)
819 template <typename T>
820 int GenericVector<T>::push_front(const T& object) {
821  if (size_used_ == size_reserved_) {
822  double_the_size();
823  }
824  for (int i = size_used_; i > 0; --i) {
825  data_[i] = data_[i - 1];
826  }
827  data_[0] = object;
828  ++size_used_;
829  return 0;
830 }
831 
832 template <typename T>
834  push_back(t);
835 }
836 
837 template <typename T>
839  this->reserve(size_used_ + other.size_used_);
840  for (int i = 0; i < other.size(); ++i) {
841  this->operator+=(other.data_[i]);
842  }
843  return *this;
844 }
845 
846 template <typename T>
848  if (&other != this) {
849  this->truncate(0);
850  this->operator+=(other);
851  }
852  return *this;
853 }
854 
855 // Clear the array, calling the callback function if any.
856 template <typename T>
858  if (size_reserved_ > 0 && clear_cb_ != nullptr) {
859  for (int i = 0; i < size_used_; ++i) {
860  clear_cb_(data_[i]);
861  }
862  }
863  delete[] data_;
864  data_ = nullptr;
865  size_used_ = 0;
866  size_reserved_ = 0;
867  clear_cb_ = nullptr;
868  compare_cb_ = nullptr;
869 }
870 
871 template <typename T>
873  for (int i = 0; i < size_used_; ++i) {
874  delete data_[i];
875  }
876 }
877 
878 template <typename T>
880  std::function<bool(FILE*, const T&)> cb) const {
881  if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
882  return false;
883  }
884  if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) {
885  return false;
886  }
887  if (cb != nullptr) {
888  for (int i = 0; i < size_used_; ++i) {
889  if (!cb(f, data_[i])) {
890  return false;
891  }
892  }
893  } else {
894  if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) {
895  return false;
896  }
897  }
898  return true;
899 }
900 
901 template <typename T>
903  std::function<bool(tesseract::TFile*, T*)> cb) {
904  int32_t reserved;
905  if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
906  return false;
907  }
908  reserve(reserved);
909  if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) {
910  return false;
911  }
912  if (cb != nullptr) {
913  for (int i = 0; i < size_used_; ++i) {
914  if (!cb(f, data_ + i)) {
915  return false;
916  }
917  }
918  } else {
919  if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) {
920  return false;
921  }
922  }
923  return true;
924 }
925 
926 // Writes a vector of simple types to the given file. Assumes that bitwise
927 // read/write of T will work. Returns false in case of error.
928 template <typename T>
929 bool GenericVector<T>::Serialize(FILE* fp) const {
930  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
931  return false;
932  }
933  if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) {
934  return false;
935  }
936  return true;
937 }
938 template <typename T>
940  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
941  return false;
942  }
943  if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) {
944  return false;
945  }
946  return true;
947 }
948 
949 // Reads a vector of simple types from the given file. Assumes that bitwise
950 // read/write will work with ReverseN according to sizeof(T).
951 // Returns false in case of error.
952 // If swap is true, assumes a big/little-endian swap is needed.
953 template <typename T>
954 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
955  uint32_t reserved;
956  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
957  return false;
958  }
959  if (swap) {
960  Reverse32(&reserved);
961  }
962  // Arbitrarily limit the number of elements to protect against bad data.
963  assert(reserved <= UINT16_MAX);
964  if (reserved > UINT16_MAX) {
965  return false;
966  }
967  reserve(reserved);
968  size_used_ = reserved;
969  if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) {
970  return false;
971  }
972  if (swap) {
973  for (int i = 0; i < size_used_; ++i) {
974  ReverseN(&data_[i], sizeof(data_[i]));
975  }
976  }
977  return true;
978 }
979 template <typename T>
981  uint32_t reserved;
982  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
983  return false;
984  }
985  // Arbitrarily limit the number of elements to protect against bad data.
986  const uint32_t limit = 50000000;
987  assert(reserved <= limit);
988  if (reserved > limit) {
989  return false;
990  }
991  reserve(reserved);
992  size_used_ = reserved;
993  return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
994 }
995 template <typename T>
997  uint32_t reserved;
998  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
999  return false;
1000  }
1001  return (uint32_t)fp->FRead(nullptr, sizeof(T), reserved) == reserved;
1002 }
1003 
1004 // Writes a vector of classes to the given file. Assumes the existence of
1005 // bool T::Serialize(FILE* fp) const that returns false in case of error.
1006 // Returns false in case of error.
1007 template <typename T>
1009  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
1010  return false;
1011  }
1012  for (int i = 0; i < size_used_; ++i) {
1013  if (!data_[i].Serialize(fp)) {
1014  return false;
1015  }
1016  }
1017  return true;
1018 }
1019 template <typename T>
1021  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
1022  return false;
1023  }
1024  for (int i = 0; i < size_used_; ++i) {
1025  if (!data_[i].Serialize(fp)) {
1026  return false;
1027  }
1028  }
1029  return true;
1030 }
1031 
1032 // Reads a vector of classes from the given file. Assumes the existence of
1033 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
1034 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
1035 // this function. Returns false in case of error.
1036 // If swap is true, assumes a big/little-endian swap is needed.
1037 template <typename T>
1039  int32_t reserved;
1040  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
1041  return false;
1042  }
1043  if (swap) {
1044  Reverse32(&reserved);
1045  }
1046  T empty;
1047  init_to_size(reserved, empty);
1048  for (int i = 0; i < reserved; ++i) {
1049  if (!data_[i].DeSerialize(swap, fp)) {
1050  return false;
1051  }
1052  }
1053  return true;
1054 }
1055 template <typename T>
1057  int32_t reserved;
1058  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1059  return false;
1060  }
1061  T empty;
1062  init_to_size(reserved, empty);
1063  for (int i = 0; i < reserved; ++i) {
1064  if (!data_[i].DeSerialize(fp)) {
1065  return false;
1066  }
1067  }
1068  return true;
1069 }
1070 template <typename T>
1072  int32_t reserved;
1073  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1074  return false;
1075  }
1076  for (int i = 0; i < reserved; ++i) {
1077  if (!T::SkipDeSerialize(fp)) {
1078  return false;
1079  }
1080  }
1081  return true;
1082 }
1083 
1084 // This method clear the current object, then, does a shallow copy of
1085 // its argument, and finally invalidates its argument.
1086 template <typename T>
1088  this->clear();
1089  this->data_ = from->data_;
1090  this->size_reserved_ = from->size_reserved_;
1091  this->size_used_ = from->size_used_;
1092  this->compare_cb_ = from->compare_cb_;
1093  this->clear_cb_ = from->clear_cb_;
1094  from->data_ = nullptr;
1095  from->clear_cb_ = nullptr;
1096  from->compare_cb_ = nullptr;
1097  from->size_used_ = 0;
1098  from->size_reserved_ = 0;
1099 }
1100 
1101 template <typename T>
1103  sort(&tesseract::sort_cmp<T>);
1104 }
1105 
1106 // Internal recursive version of choose_nth_item.
1107 // The algorithm used comes from "Algorithms" by Sedgewick:
1108 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1109 // The principle is to choose a random pivot, and move everything less than
1110 // the pivot to its left, and everything greater than the pivot to the end
1111 // of the array, then recurse on the part that contains the desired index, or
1112 // just return the answer if it is in the equal section in the middle.
1113 // The random pivot guarantees average linear time for the same reason that
1114 // n times vector::push_back takes linear time on average.
1115 // target_index, start and and end are all indices into the full array.
1116 // Seed is a seed for rand_r for thread safety purposes. Its value is
1117 // unimportant as the random numbers do not affect the result except
1118 // between equal answers.
1119 template <typename T>
1120 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1121  unsigned int* seed) {
1122  // Number of elements to process.
1123  int num_elements = end - start;
1124  // Trivial cases.
1125  if (num_elements <= 1) {
1126  return start;
1127  }
1128  if (num_elements == 2) {
1129  if (data_[start] < data_[start + 1]) {
1130  return target_index > start ? start + 1 : start;
1131  }
1132  return target_index > start ? start : start + 1;
1133  }
1134 // Place the pivot at start.
1135 #ifndef rand_r // _MSC_VER, ANDROID
1136  srand(*seed);
1137 # define rand_r(seed) rand()
1138 #endif // _MSC_VER
1139  int pivot = rand_r(seed) % num_elements + start;
1140  swap(pivot, start);
1141  // The invariant condition here is that items [start, next_lesser) are less
1142  // than the pivot (which is at index next_lesser) and items
1143  // [prev_greater, end) are greater than the pivot, with items
1144  // [next_lesser, prev_greater) being equal to the pivot.
1145  int next_lesser = start;
1146  int prev_greater = end;
1147  for (int next_sample = start + 1; next_sample < prev_greater;) {
1148  if (data_[next_sample] < data_[next_lesser]) {
1149  swap(next_lesser++, next_sample++);
1150  } else if (data_[next_sample] == data_[next_lesser]) {
1151  ++next_sample;
1152  } else {
1153  swap(--prev_greater, next_sample);
1154  }
1155  }
1156  // Now the invariant is set up, we recurse on just the section that contains
1157  // the desired index.
1158  if (target_index < next_lesser) {
1159  return choose_nth_item(target_index, start, next_lesser, seed);
1160  }
1161  if (target_index < prev_greater) {
1162  return next_lesser; // In equal bracket.
1163  }
1164  return choose_nth_item(target_index, prev_greater, end, seed);
1165 }
1166 
1167 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
GenericVector::delete_data_pointers
void delete_data_pointers()
Definition: genericvector.h:872
GenericVector::remove
void remove(int index)
Definition: genericvector.h:765
GenericVector::WithinBounds
bool WithinBounds(const T &rangemin, const T &rangemax) const
Definition: genericvector.h:310
tesseract::PointerVector::DeSerializeSize
static bool DeSerializeSize(TFile *fp, int32_t *size)
Definition: genericvector.h:592
GenericVector::swap
void swap(int index1, int index2)
Definition: genericvector.h:301
GenericVector::operator=
GenericVector< T > & operator=(const GenericVector &other)
Definition: genericvector.h:847
rand_r
#define rand_r(seed)
tesseract::sort_ptr_cmp
int sort_ptr_cmp(const void *t1, const void *t2)
Definition: genericvector.h:401
Reverse32
void Reverse32(void *ptr)
Definition: helpers.h:200
tesseract::PointerVector::PointerVector
PointerVector()
Definition: genericvector.h:419
GenericVector::unsigned_size
size_t unsigned_size() const
Definition: genericvector.h:75
tesseract::PointerVector::PointerVector
PointerVector(int size)
Definition: genericvector.h:420
tesseract::PointerVector::DeSerializeSkip
static bool DeSerializeSkip(TFile *fp)
Definition: genericvector.h:616
GenericVector::GenericVector
GenericVector(const GenericVector &other)
Definition: genericvector.h:48
tesseract::TFile::FReadEndian
int FReadEndian(void *buffer, size_t size, int count)
Definition: serialis.cpp:273
GenericVector::insert
void insert(const T &t, int index)
Definition: genericvector.h:750
tesseract::LoadDataFromFile
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
Definition: genericvector.h:341
GenericVector::data_
T * data_
Definition: genericvector.h:332
GenericVector::push_front
int push_front(const T &object)
Definition: genericvector.h:820
GenericVector::GenericVector
GenericVector(int size, const T &init_val)
Definition: genericvector.h:42
GenericVector::double_the_size_memcpy
static T * double_the_size_memcpy(int current_size, T *data)
Definition: genericvector.h:207
tesseract::PointerVector
Definition: genericvector.h:417
GenericVector::choose_nth_item
int choose_nth_item(int target_index)
Definition: genericvector.h:289
tesseract::PointerVector::~PointerVector
~PointerVector()
Definition: genericvector.h:421
tesseract::PointerVector::truncate
void truncate(int size)
Definition: genericvector.h:457
GenericVector::contains
bool contains(const T &object) const
Definition: genericvector.h:793
tesseract::PointerVector::clear
void clear()
Definition: genericvector.h:490
GenericVector::Serialize
bool Serialize(FILE *fp) const
Definition: genericvector.h:929
tesseract::PointerVector::operator+=
PointerVector< T > & operator+=(const PointerVector &other)
Definition: genericvector.h:432
tesseract::PointerVector::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: genericvector.h:539
tesseract::SaveDataToFile
bool SaveDataToFile(const GenericVector< char > &data, const char *filename)
Definition: genericvector.h:362
tesseract::cmp_eq
bool cmp_eq(T const &t1, T const &t2)
Definition: genericvector.h:375
GenericVector::set_clear_callback
void set_clear_callback(std::function< void(T)> cb)
Definition: genericvector.h:140
tesseract::PointerVector::PointerVector
PointerVector(const PointerVector &other)
Definition: genericvector.h:428
GenericVector::compact_sorted
void compact_sorted()
Definition: genericvector.h:268
GenericVector::size_reserved_
int32_t size_reserved_
Definition: genericvector.h:331
GenericVector::back
T & back() const
Definition: genericvector.h:728
GenericVector::move
void move(GenericVector< T > *from)
Definition: genericvector.h:1087
tesseract::PointerVector::DeSerializeElement
bool DeSerializeElement(TFile *fp)
Definition: genericvector.h:596
GenericVector::reverse
void reverse()
Definition: genericvector.h:215
tesseract::TFile::FRead
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:284
GenericVector::DeSerializeClasses
bool DeSerializeClasses(bool swap, FILE *fp)
Definition: genericvector.h:1038
GenericVector::bool_binary_search
bool bool_binary_search(const T &target) const
Definition: genericvector.h:239
GenericVector::write
bool write(FILE *f, std::function< bool(FILE *, const T &)> cb) const
Definition: genericvector.h:879
tesseract::PointerVector::sort
void sort()
Definition: genericvector.h:631
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
GenericVector::SerializeClasses
bool SerializeClasses(FILE *fp) const
Definition: genericvector.h:1008
GenericVector::set
void set(const T &t, int index)
Definition: genericvector.h:741
GenericVector::contains_index
T contains_index(int index) const
Definition: genericvector.h:775
GenericVector::set_compare_callback
void set_compare_callback(std::function< bool(const T &, const T &)> cb)
Definition: genericvector.h:146
GenericVector::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: genericvector.h:954
tesseract::PointerVector::remove
void remove(int index)
Definition: genericvector.h:450
GenericVector::resize_no_init
void resize_no_init(int size)
Definition: genericvector.h:65
tesseract::TFile
Definition: serialis.h:75
tesseract::PointerVector::DeSerialize
bool DeSerialize(TFile *fp)
Definition: genericvector.h:574
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
GenericVector::double_the_size
void double_the_size()
Definition: genericvector.h:696
GenericVector::operator+=
GenericVector< T > & operator+=(const GenericVector &other)
Definition: genericvector.h:838
GenericVector::size_reserved
int size_reserved() const
Definition: genericvector.h:81
GenericVector::GenericVector
GenericVector()
Definition: genericvector.h:39
GenericVector::get_index
int get_index(const T &object) const
Definition: genericvector.h:781
tesseract::PointerVector::compact
void compact(std::function< bool(const T *)> delete_cb)
Definition: genericvector.h:466
helpers.h
tesseract
Definition: baseapi.h:65
GenericVector::clear_cb_
std::function< void(T)> clear_cb_
Definition: genericvector.h:333
GenericVector::pop_back
T pop_back()
Definition: genericvector.h:734
GenericVector::kDefaultVectorSize
static const int kDefaultVectorSize
Definition: genericvector.h:329
GenericVectorEqEq
Definition: genericvector.h:640
GenericVector::size_used_
int32_t size_used_
Definition: genericvector.h:330
GenericVector::operator[]
T & operator[](int index) const
Definition: genericvector.h:722
GenericVector::sort
void sort(int(*comparator)(const void *, const void *))
Definition: genericvector.h:232
GenericVector
Definition: baseapi.h:40
GenericVector::reserve
void reserve(int size)
Definition: genericvector.h:679
GenericVector::init
void init(int size)
Definition: genericvector.h:655
GenericVectorEqEq::GenericVectorEqEq
GenericVectorEqEq()
Definition: genericvector.h:642
GenericVector::SkipDeSerialize
static bool SkipDeSerialize(tesseract::TFile *fp)
Definition: genericvector.h:996
GenericVector::compare_cb_
std::function< bool(const T &, const T &)> compare_cb_
Definition: genericvector.h:334
GenericVectorEqEq::GenericVectorEqEq
GenericVectorEqEq(int size)
Definition: genericvector.h:647
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:132
GenericVector::get
T & get(int index) const
Definition: genericvector.h:716
GenericVector::clear
void clear()
Definition: genericvector.h:857
GenericVector::init_to_size
void init_to_size(int size, const T &t)
Definition: genericvector.h:706
GenericVector::binary_search
int binary_search(const T &target) const
Definition: genericvector.h:252
Swap
void Swap(T *p1, T *p2)
Definition: helpers.h:93
serialis.h
ReverseN
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:183
tesseract::TFile::FWrite
int FWrite(const void *buffer, size_t size, int count)
Definition: serialis.cpp:332
tesseract::PointerVector::operator=
PointerVector< T > & operator=(const PointerVector &other)
Definition: genericvector.h:440
GenericVector::sort
void sort()
Definition: genericvector.h:1102
tesseract::PointerVector::Serialize
bool Serialize(TFile *fp) const
Definition: genericvector.h:516
GenericVector::push_back_new
int push_back_new(const T &object)
Definition: genericvector.h:810
GenericVector::size
int size() const
Definition: genericvector.h:71
GenericVector::~GenericVector
~GenericVector()
Definition: genericvector.h:672
tesseract::PointerVector::Serialize
bool Serialize(FILE *fp) const
Definition: genericvector.h:500
tesseract::sort_cmp
int sort_cmp(const void *t1, const void *t2)
Definition: genericvector.h:384
GenericVector::SkipDeSerializeClasses
static bool SkipDeSerializeClasses(tesseract::TFile *fp)
Definition: genericvector.h:1071
GenericVector::read
bool read(tesseract::TFile *f, std::function< bool(tesseract::TFile *, T *)> cb)
Definition: genericvector.h:902