tesseract  5.0.0-alpha-619-ge9db
bitvector.h
Go to the documentation of this file.
1 // File: bitvector.h
3 // Description: Class replacement for BITVECTOR.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2011, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_CCUTIL_BITVECTOR_H_
20 #define TESSERACT_CCUTIL_BITVECTOR_H_
21 
22 #include <cassert>
23 #include <cstdint> // for uint8_t
24 #include <cstdio>
25 
26 namespace tesseract {
27 
28 // Trivial class to encapsulate a fixed-length array of bits, with
29 // Serialize/DeSerialize. Replaces the old macros.
30 class BitVector {
31  public:
32  // Fast lookup table to get the first least significant set bit in a byte.
33  // For zero, the table has 255, but since it is a special case, most code
34  // that uses this table will check for zero before looking up lsb_index_.
35  static const uint8_t lsb_index_[256];
36  // Fast lookup table to get the residual bits after zeroing the least
37  // significant set bit in a byte.
38  static const uint8_t lsb_eroded_[256];
39  // Fast lookup table to give the number of set bits in a byte.
40  static const int hamming_table_[256];
41 
42  BitVector();
43  // Initializes the array to length * false.
44  explicit BitVector(int length);
45  BitVector(const BitVector& src);
46  BitVector& operator=(const BitVector& src);
47  ~BitVector();
48 
49  // Initializes the array to length * false.
50  void Init(int length);
51 
52  // Returns the number of bits that are accessible in the vector.
53  int size() const {
54  return bit_size_;
55  }
56 
57  // Writes to the given file. Returns false in case of error.
58  bool Serialize(FILE* fp) const;
59  // Reads from the given file. Returns false in case of error.
60  // If swap is true, assumes a big/little-endian swap is needed.
61  bool DeSerialize(bool swap, FILE* fp);
62 
63  void SetAllFalse();
64  void SetAllTrue();
65 
66  // Accessors to set/reset/get bits.
67  // The range of index is [0, size()-1].
68  // There is debug-only bounds checking.
69  void SetBit(int index) {
70  array_[WordIndex(index)] |= BitMask(index);
71  }
72  void ResetBit(int index) {
73  array_[WordIndex(index)] &= ~BitMask(index);
74  }
75  void SetValue(int index, bool value) {
76  if (value)
77  SetBit(index);
78  else
79  ResetBit(index);
80  }
81  bool At(int index) const {
82  return (array_[WordIndex(index)] & BitMask(index)) != 0;
83  }
84  bool operator[](int index) const {
85  return (array_[WordIndex(index)] & BitMask(index)) != 0;
86  }
87 
88  // Returns the index of the next set bit after the given index.
89  // Useful for quickly iterating through the set bits in a sparse vector.
90  int NextSetBit(int prev_bit) const;
91 
92  // Returns the number of set bits in the vector.
93  int NumSetBits() const;
94 
95  // Logical in-place operations on whole bit vectors. Tries to do something
96  // sensible if they aren't the same size, but they should be really.
97  void operator|=(const BitVector& other);
98  void operator&=(const BitVector& other);
99  void operator^=(const BitVector& other);
100  // Set subtraction *this = v1 - v2.
101  void SetSubtract(const BitVector& v1, const BitVector& v2);
102 
103  private:
104  // Allocates memory for a vector of the given length.
105  void Alloc(int length);
106 
107  // Computes the index to array_ for the given index, with debug range
108  // checking.
109  int WordIndex(int index) const {
110  assert(0 <= index && index < bit_size_);
111  return index / kBitFactor;
112  }
113  // Returns a mask to select the appropriate bit for the given index.
114  uint32_t BitMask(int index) const {
115  return 1 << (index & (kBitFactor - 1));
116  }
117  // Returns the number of array elements needed to represent the current
118  // bit_size_.
119  int WordLength() const {
120  return (bit_size_ + kBitFactor - 1) / kBitFactor;
121  }
122  // Returns the number of bytes consumed by the array_.
123  int ByteLength() const {
124  return WordLength() * sizeof(*array_);
125  }
126 
127  // Number of bits in this BitVector.
128  int32_t bit_size_;
129  // Array of words used to pack the bits.
130  // Bits are stored little-endian by uint32_t word, ie by word first and then
131  // starting with the least significant bit in each word.
132  uint32_t* array_;
133  // Number of bits in an array_ element.
134  static const int kBitFactor = sizeof(uint32_t) * 8;
135 };
136 
137 } // namespace tesseract.
138 
139 #endif // TESSERACT_CCUTIL_BITVECTOR_H_
tesseract::BitVector::SetBit
void SetBit(int index)
Definition: bitvector.h:69
tesseract::BitVector::operator[]
bool operator[](int index) const
Definition: bitvector.h:84
tesseract::BitVector::SetAllFalse
void SetAllFalse()
Definition: bitvector.cpp:169
tesseract::BitVector::hamming_table_
static const int hamming_table_[256]
Definition: bitvector.h:40
tesseract::BitVector::NumSetBits
int NumSetBits() const
Definition: bitvector.cpp:216
tesseract::BitVector::Init
void Init(int length)
Definition: bitvector.cpp:139
tesseract::BitVector::NextSetBit
int NextSetBit(int prev_bit) const
Definition: bitvector.cpp:178
tesseract::BitVector::operator&=
void operator&=(const BitVector &other)
Definition: bitvector.cpp:236
tesseract::BitVector::Serialize
bool Serialize(FILE *fp) const
Definition: bitvector.cpp:145
tesseract::BitVector::operator^=
void operator^=(const BitVector &other)
Definition: bitvector.cpp:243
tesseract::BitVector::ResetBit
void ResetBit(int index)
Definition: bitvector.h:72
tesseract::BitVector::operator=
BitVector & operator=(const BitVector &src)
Definition: bitvector.cpp:126
tesseract::BitVector::operator|=
void operator|=(const BitVector &other)
Definition: bitvector.cpp:231
tesseract
Definition: baseapi.h:65
tesseract::BitVector::SetValue
void SetValue(int index, bool value)
Definition: bitvector.h:75
tesseract::BitVector::SetSubtract
void SetSubtract(const BitVector &v1, const BitVector &v2)
Definition: bitvector.cpp:249
tesseract::BitVector
Definition: bitvector.h:30
tesseract::BitVector::lsb_eroded_
static const uint8_t lsb_eroded_[256]
Definition: bitvector.h:38
tesseract::BitVector::At
bool At(int index) const
Definition: bitvector.h:81
tesseract::BitVector::size
int size() const
Definition: bitvector.h:53
tesseract::BitVector::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: bitvector.cpp:153
tesseract::BitVector::lsb_index_
static const uint8_t lsb_index_[256]
Definition: bitvector.h:35
tesseract::BitVector::~BitVector
~BitVector()
Definition: bitvector.cpp:134
tesseract::BitVector::BitVector
BitVector()
Definition: bitvector.cpp:110
tesseract::BitVector::SetAllTrue
void SetAllTrue()
Definition: bitvector.cpp:172