All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
matrix.h
Go to the documentation of this file.
1 /* -*-C-*-
2  ******************************************************************************
3  *
4  * File: matrix.h (Formerly matrix.h)
5  * Description: Ratings matrix code. (Used by associator)
6  * Author: Mark Seaman, OCR Technology
7  * Created: Wed May 16 13:22:06 1990
8  * Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt
9  * Language: C
10  * Package: N/A
11  * Status: Experimental (Do Not Distribute)
12  *
13  * (c) Copyright 1990, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 #ifndef TESSERACT_CCSTRUCT_MATRIX_H__
26 #define TESSERACT_CCSTRUCT_MATRIX_H__
27 
28 #include "kdpair.h"
29 #include "unicharset.h"
30 
31 class BLOB_CHOICE_LIST;
32 
33 #define NOT_CLASSIFIED reinterpret_cast<BLOB_CHOICE_LIST*>(NULL)
34 
35 // A generic class to hold a 2-D matrix with entries of type T, but can also
36 // act as a base class for other implementations, such as a triangular or
37 // banded matrix.
38 template <class T>
40  public:
41  // Initializes the array size, and empty element, but cannot allocate memory
42  // for the subclasses or initialize because calls to the num_elements
43  // member will be routed to the base class implementation. Subclasses can
44  // either pass the memory in, or allocate after by calling Resize().
45  GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array)
46  : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) {
47  }
48  // Original constructor for a full rectangular matrix DOES allocate memory
49  // and initialize it to empty.
50  GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty)
51  : empty_(empty), dim1_(dim1), dim2_(dim2) {
52  array_ = new T[dim1_ * dim2_];
53  for (int x = 0; x < dim1_; x++)
54  for (int y = 0; y < dim2_; y++)
55  this->put(x, y, empty_);
56  }
57  virtual ~GENERIC_2D_ARRAY() { delete[] array_; }
58 
59  // Reallocate the array to the given size. Does not keep old data.
60  void Resize(int size1, int size2, const T& empty) {
61  empty_ = empty;
62  if (size1 != dim1_ || size2 != dim2_) {
63  dim1_ = size1;
64  dim2_ = size2;
65  delete [] array_;
66  array_ = new T[dim1_ * dim2_];
67  }
68  Clear();
69  }
70 
71  // Reallocate the array to the given size, keeping old data.
72  void ResizeWithCopy(int size1, int size2) {
73  if (size1 != dim1_ || size2 != dim2_) {
74  T* new_array = new T[size1 * size2];
75  for (int col = 0; col < size1; ++col) {
76  for (int row = 0; row < size2; ++row) {
77  int old_index = col * dim2() + row;
78  int new_index = col * size2 + row;
79  if (col < dim1_ && row < dim2_) {
80  new_array[new_index] = array_[old_index];
81  } else {
82  new_array[new_index] = empty_;
83  }
84  }
85  }
86  delete[] array_;
87  array_ = new_array;
88  dim1_ = size1;
89  dim2_ = size2;
90  }
91  }
92 
93  // Sets all the elements of the array to the empty value.
94  void Clear() {
95  int total_size = num_elements();
96  for (int i = 0; i < total_size; ++i)
97  array_[i] = empty_;
98  }
99 
100  // Writes to the given file. Returns false in case of error.
101  // Only works with bitwise-serializeable types!
102  bool Serialize(FILE* fp) const {
103  if (!SerializeSize(fp)) return false;
104  if (fwrite(&empty_, sizeof(empty_), 1, fp) != 1) return false;
105  int size = num_elements();
106  if (fwrite(array_, sizeof(*array_), size, fp) != size) return false;
107  return true;
108  }
109 
110  // Reads from the given file. Returns false in case of error.
111  // Only works with bitwise-serializeable typ
112  // If swap is true, assumes a big/little-endian swap is needed.
113  bool DeSerialize(bool swap, FILE* fp) {
114  if (!DeSerializeSize(swap, fp)) return false;
115  if (fread(&empty_, sizeof(empty_), 1, fp) != 1) return false;
116  if (swap) ReverseN(&empty_, sizeof(empty_));
117  int size = num_elements();
118  if (fread(array_, sizeof(*array_), size, fp) != size) return false;
119  if (swap) {
120  for (int i = 0; i < size; ++i)
121  ReverseN(&array_[i], sizeof(array_[i]));
122  }
123  return true;
124  }
125 
126  // Writes to the given file. Returns false in case of error.
127  // Assumes a T::Serialize(FILE*) const function.
128  bool SerializeClasses(FILE* fp) const {
129  if (!SerializeSize(fp)) return false;
130  if (!empty_.Serialize(fp)) return false;
131  int size = num_elements();
132  for (int i = 0; i < size; ++i) {
133  if (!array_[i].Serialize(fp)) return false;
134  }
135  return true;
136  }
137 
138  // Reads from the given file. Returns false in case of error.
139  // Assumes a T::DeSerialize(bool swap, FILE*) function.
140  // If swap is true, assumes a big/little-endian swap is needed.
141  bool DeSerializeClasses(bool swap, FILE* fp) {
142  if (!DeSerializeSize(swap, fp)) return false;
143  if (!empty_.DeSerialize(swap, fp)) return false;
144  int size = num_elements();
145  for (int i = 0; i < size; ++i) {
146  if (!array_[i].DeSerialize(swap, fp)) return false;
147  }
148  return true;
149  }
150 
151  // Provide the dimensions of this rectangular matrix.
152  int dim1() const { return dim1_; }
153  int dim2() const { return dim2_; }
154  // Returns the number of elements in the array.
155  // Banded/triangular matrices may override.
156  virtual int num_elements() const { return dim1_ * dim2_; }
157 
158  // Expression to select a specific location in the matrix. The matrix is
159  // stored COLUMN-major, so the left-most index is the most significant.
160  // This allows [][] access to use indices in the same order as (,).
161  virtual int index(int column, int row) const {
162  return (column * dim2_ + row);
163  }
164 
165  // Put a list element into the matrix at a specific location.
166  void put(int column, int row, const T& thing) {
167  array_[this->index(column, row)] = thing;
168  }
169 
170  // Get the item at a specified location from the matrix.
171  T get(int column, int row) const {
172  return array_[this->index(column, row)];
173  }
174  // Return a reference to the element at the specified location.
175  const T& operator()(int column, int row) const {
176  return array_[this->index(column, row)];
177  }
178  T& operator()(int column, int row) {
179  return array_[this->index(column, row)];
180  }
181  // Allow access using array[column][row]. NOTE that the indices are
182  // in the same left-to-right order as the () indexing.
183  T* operator[](int column) {
184  return &array_[this->index(column, 0)];
185  }
186  const T* operator[](int column) const {
187  return &array_[this->index(column, 0)];
188  }
189 
190  // Delete objects pointed to by array_[i].
192  int size = num_elements();
193  for (int i = 0; i < size; ++i) {
194  T matrix_cell = array_[i];
195  if (matrix_cell != empty_)
196  delete matrix_cell;
197  }
198  }
199 
200  protected:
201  // Factored helper to serialize the size.
202  bool SerializeSize(FILE* fp) const {
203  inT32 size = dim1_;
204  if (fwrite(&size, sizeof(size), 1, fp) != 1) return false;
205  size = dim2_;
206  if (fwrite(&size, sizeof(size), 1, fp) != 1) return false;
207  return true;
208  }
209  // Factored helper to deserialize the size.
210  // If swap is true, assumes a big/little-endian swap is needed.
211  bool DeSerializeSize(bool swap, FILE* fp) {
212  inT32 size1, size2;
213  if (fread(&size1, sizeof(size1), 1, fp) != 1) return false;
214  if (fread(&size2, sizeof(size2), 1, fp) != 1) return false;
215  if (swap) {
216  ReverseN(&size1, sizeof(size1));
217  ReverseN(&size2, sizeof(size2));
218  }
219  Resize(size1, size2, empty_);
220  return true;
221  }
222 
223  T* array_;
224  T empty_; // The unused cell.
225  int dim1_; // Size of the 1st dimension in indexing functions.
226  int dim2_; // Size of the 2nd dimension in indexing functions.
227 };
228 
229 // A generic class to store a banded triangular matrix with entries of type T.
230 // In this array, the nominally square matrix is dim1_ x dim1_, and dim2_ is
231 // the number of bands, INCLUDING the diagonal. The storage is thus of size
232 // dim1_ * dim2_ and index(col, row) = col * dim2_ + row - col, and an
233 // assert will fail if row < col or row - col >= dim2.
234 template <class T>
235 class BandTriMatrix : public GENERIC_2D_ARRAY<T> {
236  public:
237  // Allocate a piece of memory to hold a 2d-array of the given dimension.
238  // Initialize all the elements of the array to empty instead of assuming
239  // that a default constructor can be used.
240  BandTriMatrix(int dim1, int dim2, const T& empty)
241  : GENERIC_2D_ARRAY<T>(dim1, dim2, empty) {
242  }
243  // The default destructor will do.
244 
245  // Provide the dimensions of this matrix.
246  // dimension is the size of the nominally square matrix.
247  int dimension() const { return this->dim1_; }
248  // bandwidth is the number of bands in the matrix, INCLUDING the diagonal.
249  int bandwidth() const { return this->dim2_; }
250 
251  // Expression to select a specific location in the matrix. The matrix is
252  // stored COLUMN-major, so the left-most index is the most significant.
253  // This allows [][] access to use indices in the same order as (,).
254  virtual int index(int column, int row) const {
255  ASSERT_HOST(row >= column);
256  ASSERT_HOST(row - column < this->dim2_);
257  return column * this->dim2_ + row - column;
258  }
259 
260  // Appends array2 corner-to-corner to *this, making an array of dimension
261  // equal to the sum of the individual dimensions.
262  // array2 is not destroyed, but is left empty, as all elements are moved
263  // to *this.
265  int new_dim1 = this->dim1_ + array2->dim1_;
266  int new_dim2 = MAX(this->dim2_, array2->dim2_);
267  T* new_array = new T[new_dim1 * new_dim2];
268  for (int col = 0; col < new_dim1; ++col) {
269  for (int j = 0; j < new_dim2; ++j) {
270  int new_index = col * new_dim2 + j;
271  if (col < this->dim1_ && j < this->dim2_) {
272  new_array[new_index] = this->get(col, col + j);
273  } else if (col >= this->dim1_ && j < array2->dim2_) {
274  new_array[new_index] = array2->get(col - this->dim1_,
275  col - this->dim1_ + j);
276  array2->put(col - this->dim1_, col - this->dim1_ + j, NULL);
277  } else {
278  new_array[new_index] = this->empty_;
279  }
280  }
281  }
282  delete[] this->array_;
283  this->array_ = new_array;
284  this->dim1_ = new_dim1;
285  this->dim2_ = new_dim2;
286  }
287 };
288 
289 class MATRIX : public BandTriMatrix<BLOB_CHOICE_LIST *> {
290  public:
292  : BandTriMatrix<BLOB_CHOICE_LIST *>(dimension, bandwidth, NOT_CLASSIFIED) {}
293 
294  // Returns true if there are any real classification results.
295  bool Classified(int col, int row, int wildcard_id) const;
296 
297  // Expands the existing matrix in-place to make the band wider, without
298  // losing any existing data.
299  void IncreaseBandSize(int bandwidth);
300 
301  // Returns a bigger MATRIX with a new column and row in the matrix in order
302  // to split the blob at the given (ind,ind) diagonal location.
303  // Entries are relocated to the new MATRIX using the transformation defined
304  // by MATRIX_COORD::MapForSplit.
305  // Transfers the pointer data to the new MATRIX and deletes *this.
306  MATRIX* ConsumeAndMakeBigger(int ind);
307 
308  // Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
309  // on the lists, but not any LanguageModelState that may be attached to the
310  // BLOB_CHOICEs.
311  MATRIX* DeepCopy() const;
312 
313  // Print a shortened version of the contents of the matrix.
314  void print(const UNICHARSET &unicharset) const;
315 };
316 
317 struct MATRIX_COORD {
318  static void Delete(void *arg) {
319  MATRIX_COORD *c = static_cast<MATRIX_COORD *>(arg);
320  delete c;
321  }
322  // Default constructor required by GenericHeap.
323  MATRIX_COORD() : col(0), row(0) {}
324  MATRIX_COORD(int c, int r): col(c), row(r) {}
326 
327  bool Valid(const MATRIX &m) const {
328  return 0 <= col && col < m.dimension() &&
329  col <= row && row < col + m.bandwidth() && row < m.dimension();
330  }
331 
332  // Remaps the col,row pair to split the blob at the given (ind,ind) diagonal
333  // location.
334  // Entries at (i,j) for i in [0,ind] and j in [ind,dim) move to (i,j+1),
335  // making a new row at ind.
336  // Entries at (i,j) for i in [ind+1,dim) and j in [i,dim) move to (i+i,j+1),
337  // making a new column at ind+1.
338  void MapForSplit(int ind) {
339  ASSERT_HOST(row >= col);
340  if (col > ind) ++col;
341  if (row >= ind) ++row;
342  ASSERT_HOST(row >= col);
343  }
344 
345  int col;
346  int row;
347 };
348 
349 // The MatrixCoordPair contains a MATRIX_COORD and its priority.
351 
352 #endif // TESSERACT_CCSTRUCT_MATRIX_H__
virtual int num_elements() const
Definition: matrix.h:156
bool DeSerializeSize(bool swap, FILE *fp)
Definition: matrix.h:211
bool SerializeClasses(FILE *fp) const
Definition: matrix.h:128
int dim2() const
Definition: matrix.h:153
#define NOT_CLASSIFIED
Definition: matrix.h:33
#define MAX(x, y)
Definition: ndminx.h:24
virtual int index(int column, int row) const
Definition: matrix.h:254
bool SerializeSize(FILE *fp) const
Definition: matrix.h:202
T get(int column, int row) const
Definition: matrix.h:171
void AttachOnCorner(BandTriMatrix< T > *array2)
Definition: matrix.h:264
void MapForSplit(int ind)
Definition: matrix.h:338
~MATRIX_COORD()
Definition: matrix.h:325
virtual ~GENERIC_2D_ARRAY()
Definition: matrix.h:57
void Clear()
Definition: matrix.h:94
int dim1() const
Definition: matrix.h:152
MATRIX_COORD(int c, int r)
Definition: matrix.h:324
void Resize(int size1, int size2, const T &empty)
Definition: matrix.h:60
void put(int column, int row, const T &thing)
Definition: matrix.h:166
int dimension() const
Definition: matrix.h:247
#define ASSERT_HOST(x)
Definition: errcode.h:84
void delete_matrix_pointers()
Definition: matrix.h:191
T * operator[](int column)
Definition: matrix.h:183
GENERIC_2D_ARRAY(int dim1, int dim2, const T &empty, T *array)
Definition: matrix.h:45
static void Delete(void *arg)
Definition: matrix.h:318
GENERIC_2D_ARRAY(int dim1, int dim2, const T &empty)
Definition: matrix.h:50
MATRIX * DeepCopy() const
Definition: matrix.cpp:94
bool Valid(const MATRIX &m) const
Definition: matrix.h:327
bool DeSerializeClasses(bool swap, FILE *fp)
Definition: matrix.h:141
BandTriMatrix(int dim1, int dim2, const T &empty)
Definition: matrix.h:240
int bandwidth() const
Definition: matrix.h:249
MATRIX_COORD()
Definition: matrix.h:323
const T & operator()(int column, int row) const
Definition: matrix.h:175
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:177
void print(const UNICHARSET &unicharset) const
Definition: matrix.cpp:112
MATRIX(int dimension, int bandwidth)
Definition: matrix.h:291
bool Serialize(FILE *fp) const
Definition: matrix.h:102
bool Classified(int col, int row, int wildcard_id) const
Definition: matrix.cpp:36
MATRIX * ConsumeAndMakeBigger(int ind)
Definition: matrix.cpp:58
const T * operator[](int column) const
Definition: matrix.h:186
T & operator()(int column, int row)
Definition: matrix.h:178
Definition: matrix.h:289
#define NULL
Definition: host.h:144
void ResizeWithCopy(int size1, int size2)
Definition: matrix.h:72
bool DeSerialize(bool swap, FILE *fp)
Definition: matrix.h:113
tesseract::KDPairInc< float, MATRIX_COORD > MatrixCoordPair
Definition: matrix.h:350
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49
virtual int index(int column, int row) const
Definition: matrix.h:161
int inT32
Definition: host.h:102