tesseract  5.0.0-alpha-619-ge9db
kdtree.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: kdtree.cpp
3  ** Purpose: Routines for managing K-D search trees
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 /*-----------------------------------------------------------------------------
19  Include Files and Type Defines
20 -----------------------------------------------------------------------------*/
21 #include "kdtree.h"
22 #include "emalloc.h"
23 
24 #include <algorithm>
25 #include <cfloat> // for FLT_MAX
26 #include <cstdio>
27 #include <cmath>
28 
29 #define Magnitude(X) ((X) < 0 ? -(X) : (X))
30 #define NodeFound(N,K,D) (((N)->Key == (K)) && ((N)->Data == (D)))
31 
32 /*-----------------------------------------------------------------------------
33  Global Data Definitions and Declarations
34 -----------------------------------------------------------------------------*/
35 #define MINSEARCH -FLT_MAX
36 #define MAXSEARCH FLT_MAX
37 
38 // Helper function to find the next essential dimension in a cycle.
39 static int NextLevel(KDTREE *tree, int level) {
40  do {
41  ++level;
42  if (level >= tree->KeySize)
43  level = 0;
44  } while (tree->KeyDesc[level].NonEssential);
45  return level;
46 }
47 
48 //-----------------------------------------------------------------------------
50 template<typename Key, typename Value>
51 class MinK {
52  public:
53  MinK(Key max_key, int k);
54  ~MinK();
55 
56  struct Element {
57  Element() {}
58  Element(const Key& k, const Value& v) : key(k), value(v) {}
59 
60  Key key;
61  Value value;
62  };
63 
64  bool insert(Key k, Value v);
65  const Key& max_insertable_key();
66 
67  int elements_count() { return elements_count_; }
68  const Element* elements() { return elements_; }
69 
70  private:
71  const Key max_key_;
72  Element *elements_;
73  int elements_count_;
74  int k_;
75  int max_index_;
76 };
77 
78 template<typename Key, typename Value>
79 MinK<Key, Value>::MinK(Key max_key, int k) :
80  max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
81  elements_ = new Element[k_];
82 }
83 
84 template<typename Key, typename Value>
86  delete []elements_;
87 }
88 
89 template<typename Key, typename Value>
91  if (elements_count_ < k_)
92  return max_key_;
93  return elements_[max_index_].key;
94 }
95 
96 template<typename Key, typename Value>
97 bool MinK<Key, Value>::insert(Key key, Value value) {
98  if (elements_count_ < k_) {
99  elements_[elements_count_++] = Element(key, value);
100  if (key > elements_[max_index_].key)
101  max_index_ = elements_count_ - 1;
102  return true;
103  } else if (key < elements_[max_index_].key) {
104  // evict the largest element.
105  elements_[max_index_] = Element(key, value);
106  // recompute max_index_
107  for (int i = 0; i < elements_count_; i++) {
108  if (elements_[i].key > elements_[max_index_].key)
109  max_index_ = i;
110  }
111  return true;
112  }
113  return false;
114 }
115 
116 
117 //-----------------------------------------------------------------------------
120 class KDTreeSearch {
121  public:
122  KDTreeSearch(KDTREE* tree, float *query_point, int k_closest);
123  ~KDTreeSearch();
124 
126  void Search(int *result_count, float *distances, void **results);
127 
128  private:
129  void SearchRec(int Level, KDNODE *SubTree);
130  bool BoxIntersectsSearch(float *lower, float *upper);
131 
132  KDTREE *tree_;
133  float *query_point_;
134  float *sb_min_;
135  float *sb_max_;
136  MinK<float, void *> results_;
137 };
138 
139 KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
140  : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
141  sb_min_ = new float[tree->KeySize];
142  sb_max_ = new float[tree->KeySize];
143 }
144 
146  delete[] sb_min_;
147  delete[] sb_max_;
148 }
149 
152 void KDTreeSearch::Search(int *result_count,
153  float *distances,
154  void **results) {
155  if (tree_->Root.Left == nullptr) {
156  *result_count = 0;
157  } else {
158  for (int i = 0; i < tree_->KeySize; i++) {
159  sb_min_[i] = tree_->KeyDesc[i].Min;
160  sb_max_[i] = tree_->KeyDesc[i].Max;
161  }
162  SearchRec(0, tree_->Root.Left);
163  int count = results_.elements_count();
164  *result_count = count;
165  for (int j = 0; j < count; j++) {
166  // Pre-cast to float64 as key is a template type and we have no control
167  // over its actual type.
168  distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key)));
169  results[j] = results_.elements()[j].value;
170  }
171  }
172 }
173 
174 /*-----------------------------------------------------------------------------
175  Public Code
176 -----------------------------------------------------------------------------*/
180 KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) {
181  auto *KDTree = static_cast<KDTREE *>(Emalloc(
182  sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC)));
183  for (int i = 0; i < KeySize; i++) {
184  KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
185  KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
186  if (KeyDesc[i].Circular) {
187  KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
188  KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
189  KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
190  KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
191  KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
192  } else {
193  KDTree->KeyDesc[i].Min = MINSEARCH;
194  KDTree->KeyDesc[i].Max = MAXSEARCH;
195  }
196  }
197  KDTree->KeySize = KeySize;
198  KDTree->Root.Left = nullptr;
199  KDTree->Root.Right = nullptr;
200  return KDTree;
201 }
202 
203 
212 void KDStore(KDTREE *Tree, float *Key, void *Data) {
213  int Level;
214  KDNODE *Node;
215  KDNODE **PtrToNode;
216 
217  PtrToNode = &(Tree->Root.Left);
218  Node = *PtrToNode;
219  Level = NextLevel(Tree, -1);
220  while (Node != nullptr) {
221  if (Key[Level] < Node->BranchPoint) {
222  PtrToNode = &(Node->Left);
223  if (Key[Level] > Node->LeftBranch)
224  Node->LeftBranch = Key[Level];
225  }
226  else {
227  PtrToNode = &(Node->Right);
228  if (Key[Level] < Node->RightBranch)
229  Node->RightBranch = Key[Level];
230  }
231  Level = NextLevel(Tree, Level);
232  Node = *PtrToNode;
233  }
234 
235  *PtrToNode = MakeKDNode(Tree, Key, Data, Level);
236 } /* KDStore */
237 
252 void
253 KDDelete (KDTREE * Tree, float Key[], void *Data) {
254  int Level;
255  KDNODE *Current;
256  KDNODE *Father;
257 
258  /* initialize search at root of tree */
259  Father = &(Tree->Root);
260  Current = Father->Left;
261  Level = NextLevel(Tree, -1);
262 
263  /* search tree for node to be deleted */
264  while ((Current != nullptr) && (!NodeFound (Current, Key, Data))) {
265  Father = Current;
266  if (Key[Level] < Current->BranchPoint)
267  Current = Current->Left;
268  else
269  Current = Current->Right;
270 
271  Level = NextLevel(Tree, Level);
272  }
273 
274  if (Current != nullptr) { /* if node to be deleted was found */
275  if (Current == Father->Left) {
276  Father->Left = nullptr;
277  Father->LeftBranch = Tree->KeyDesc[Level].Min;
278  } else {
279  Father->Right = nullptr;
280  Father->RightBranch = Tree->KeyDesc[Level].Max;
281  }
282 
283  InsertNodes(Tree, Current->Left);
284  InsertNodes(Tree, Current->Right);
285  FreeSubTree(Current);
286  }
287 } /* KDDelete */
288 
306  KDTREE *Tree, float Query[], int QuerySize, float MaxDistance,
307  int *NumberOfResults, void **NBuffer, float DBuffer[]) {
308  KDTreeSearch search(Tree, Query, QuerySize);
309  search.Search(NumberOfResults, DBuffer, NBuffer);
310 }
311 
312 
313 /*---------------------------------------------------------------------------*/
315 void KDWalk(KDTREE *Tree, void_proc action, void *context) {
316  if (Tree->Root.Left != nullptr)
317  Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
318 }
319 
320 
321 /*---------------------------------------------------------------------------*/
331 void FreeKDTree(KDTREE *Tree) {
332  FreeSubTree(Tree->Root.Left);
333  free(Tree);
334 } /* FreeKDTree */
335 
336 
337 /*-----------------------------------------------------------------------------
338  Private Code
339 -----------------------------------------------------------------------------*/
340 /*---------------------------------------------------------------------------*/
352 KDNODE *MakeKDNode(KDTREE *tree, float Key[], void *Data, int Index) {
353  KDNODE *NewNode;
354 
355  NewNode = static_cast<KDNODE *>(Emalloc (sizeof (KDNODE)));
356 
357  NewNode->Key = Key;
358  NewNode->Data = Data;
359  NewNode->BranchPoint = Key[Index];
360  NewNode->LeftBranch = tree->KeyDesc[Index].Min;
361  NewNode->RightBranch = tree->KeyDesc[Index].Max;
362  NewNode->Left = nullptr;
363  NewNode->Right = nullptr;
364 
365  return NewNode;
366 } /* MakeKDNode */
367 
368 
369 /*---------------------------------------------------------------------------*/
370 void FreeKDNode(KDNODE *Node) { free(Node); }
371 
372 /*---------------------------------------------------------------------------*/
378 void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
379  if (level >= tree_->KeySize)
380  level = 0;
381 
382  if (!BoxIntersectsSearch(sb_min_, sb_max_))
383  return;
384 
385  results_.insert(DistanceSquared(tree_->KeySize, tree_->KeyDesc, query_point_,
386  sub_tree->Key),
387  sub_tree->Data);
388 
389  if (query_point_[level] < sub_tree->BranchPoint) {
390  if (sub_tree->Left != nullptr) {
391  float tmp = sb_max_[level];
392  sb_max_[level] = sub_tree->LeftBranch;
393  SearchRec(NextLevel(tree_, level), sub_tree->Left);
394  sb_max_[level] = tmp;
395  }
396  if (sub_tree->Right != nullptr) {
397  float tmp = sb_min_[level];
398  sb_min_[level] = sub_tree->RightBranch;
399  SearchRec(NextLevel(tree_, level), sub_tree->Right);
400  sb_min_[level] = tmp;
401  }
402  } else {
403  if (sub_tree->Right != nullptr) {
404  float tmp = sb_min_[level];
405  sb_min_[level] = sub_tree->RightBranch;
406  SearchRec(NextLevel(tree_, level), sub_tree->Right);
407  sb_min_[level] = tmp;
408  }
409  if (sub_tree->Left != nullptr) {
410  float tmp = sb_max_[level];
411  sb_max_[level] = sub_tree->LeftBranch;
412  SearchRec(NextLevel(tree_, level), sub_tree->Left);
413  sb_max_[level] = tmp;
414  }
415  }
416 }
417 
418 
419 /*---------------------------------------------------------------------------*/
427 float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
428  float total_distance = 0;
429 
430  for (; k > 0; k--, p1++, p2++, dim++) {
431  if (dim->NonEssential)
432  continue;
433 
434  float dimension_distance = *p1 - *p2;
435 
436  /* if this dimension is circular - check wraparound distance */
437  if (dim->Circular) {
438  dimension_distance = Magnitude(dimension_distance);
439  float wrap_distance = dim->Max - dim->Min - dimension_distance;
440  dimension_distance = std::min(dimension_distance, wrap_distance);
441  }
442 
443  total_distance += dimension_distance * dimension_distance;
444  }
445  return total_distance;
446 }
447 
448 float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
449  return sqrt(DistanceSquared(k, dim, p1, p2));
450 }
451 
452 /*---------------------------------------------------------------------------*/
457 bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) {
458  float *query = query_point_;
459  // Compute the sum in higher precision.
460  double total_distance = 0.0;
461  double radius_squared = static_cast<double>(results_.max_insertable_key()) *
462  results_.max_insertable_key();
463  PARAM_DESC *dim = tree_->KeyDesc;
464 
465  for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
466  if (dim->NonEssential)
467  continue;
468 
469  float dimension_distance;
470  if (*query < *lower)
471  dimension_distance = *lower - *query;
472  else if (*query > *upper)
473  dimension_distance = *query - *upper;
474  else
475  dimension_distance = 0;
476 
477  /* if this dimension is circular - check wraparound distance */
478  if (dim->Circular) {
479  float wrap_distance = FLT_MAX;
480  if (*query < *lower)
481  wrap_distance = *query + dim->Max - dim->Min - *upper;
482  else if (*query > *upper)
483  wrap_distance = *lower - (*query - (dim->Max - dim->Min));
484  dimension_distance = std::min(dimension_distance, wrap_distance);
485  }
486 
487  total_distance +=
488  static_cast<double>(dimension_distance) * dimension_distance;
489  if (total_distance >= radius_squared)
490  return false;
491  }
492  return true;
493 }
494 
495 
496 /*---------------------------------------------------------------------------*/
512 void Walk(KDTREE *tree, void_proc action, void *context,
513  KDNODE *sub_tree, int32_t level) {
514  (*action)(context, sub_tree->Data, level);
515  if (sub_tree->Left != nullptr)
516  Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
517  if (sub_tree->Right != nullptr)
518  Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
519 }
520 
522 void InsertNodes(KDTREE *tree, KDNODE *nodes) {
523  if (nodes == nullptr)
524  return;
525 
526  KDStore(tree, nodes->Key, nodes->Data);
527  InsertNodes(tree, nodes->Left);
528  InsertNodes(tree, nodes->Right);
529 }
530 
532 void FreeSubTree(KDNODE *sub_tree) {
533  if (sub_tree != nullptr) {
534  FreeSubTree(sub_tree->Left);
535  FreeSubTree(sub_tree->Right);
536  free(sub_tree);
537  }
538 }
MinK::~MinK
~MinK()
Definition: kdtree.cpp:84
emalloc.h
KDNODE::Key
float * Key
Definition: kdtree.h:38
FreeKDNode
void FreeKDNode(KDNODE *Node)
Definition: kdtree.cpp:369
KDTreeSearch
Definition: kdtree.cpp:119
MinK::Element::Element
Element()
Definition: kdtree.cpp:56
PARAM_DESC::Circular
bool Circular
Definition: ocrfeatures.h:42
Emalloc
void * Emalloc(int Size)
Definition: emalloc.cpp:31
MinK
Definition: kdtree.cpp:50
MakeKDTree
KDTREE * MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[])
Definition: kdtree.cpp:179
MinK::Element::Element
Element(const Key &k, const Value &v)
Definition: kdtree.cpp:57
DistanceSquared
float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:426
KDDelete
void KDDelete(KDTREE *Tree, float Key[], void *Data)
Definition: kdtree.cpp:252
MinK::Element::value
Value value
Definition: kdtree.cpp:60
KDWalk
void KDWalk(KDTREE *Tree, void_proc action, void *context)
Definition: kdtree.cpp:314
MakeKDNode
KDNODE * MakeKDNode(KDTREE *tree, float Key[], void *Data, int Index)
Definition: kdtree.cpp:351
kdtree.h
KDTREE::KeyDesc
PARAM_DESC KeyDesc[1]
Definition: kdtree.h:50
ComputeDistance
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:447
KDNearestNeighborSearch
void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, int *NumberOfResults, void **NBuffer, float DBuffer[])
Definition: kdtree.cpp:304
PARAM_DESC::Min
float Min
Definition: ocrfeatures.h:44
void_proc
void(*)(...) void_proc
Definition: kdtree.h:25
MinK::elements_count
int elements_count()
Definition: kdtree.cpp:66
KDTREE
Definition: kdtree.h:47
KDNODE::Data
void * Data
Definition: kdtree.h:39
KDTREE::Root
KDNODE Root
Definition: kdtree.h:49
KDTreeSearch::KDTreeSearch
KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
Definition: kdtree.cpp:138
FreeKDTree
void FreeKDTree(KDTREE *Tree)
Definition: kdtree.cpp:330
KDStore
void KDStore(KDTREE *Tree, float *Key, void *Data)
Definition: kdtree.cpp:211
KDNODE::Left
struct KDNODE * Left
Definition: kdtree.h:43
KDTreeSearch::Search
void Search(int *result_count, float *distances, void **results)
Definition: kdtree.cpp:151
InsertNodes
void InsertNodes(KDTREE *tree, KDNODE *nodes)
Definition: kdtree.cpp:521
PARAM_DESC::Max
float Max
Definition: ocrfeatures.h:45
MinK::MinK
MinK(Key max_key, int k)
Definition: kdtree.cpp:78
KDTREE::KeySize
int16_t KeySize
Definition: kdtree.h:48
MAXSEARCH
#define MAXSEARCH
Definition: kdtree.cpp:35
MinK::Element
Definition: kdtree.cpp:55
PARAM_DESC
Definition: ocrfeatures.h:41
PARAM_DESC::NonEssential
bool NonEssential
Definition: ocrfeatures.h:43
FreeSubTree
void FreeSubTree(KDNODE *sub_tree)
Definition: kdtree.cpp:531
count
int count(LIST var_list)
Definition: oldlist.cpp:79
NodeFound
#define NodeFound(N, K, D)
Definition: kdtree.cpp:29
Walk
void Walk(KDTREE *tree, void_proc action, void *context, KDNODE *sub_tree, int32_t level)
Definition: kdtree.cpp:511
MinK::max_insertable_key
const Key & max_insertable_key()
Definition: kdtree.cpp:89
KDNODE::Right
struct KDNODE * Right
Definition: kdtree.h:44
KDNODE::RightBranch
float RightBranch
Definition: kdtree.h:42
KDNODE::LeftBranch
float LeftBranch
Definition: kdtree.h:41
KDNODE::BranchPoint
float BranchPoint
Definition: kdtree.h:40
Magnitude
#define Magnitude(X)
Definition: kdtree.cpp:28
KDTreeSearch::~KDTreeSearch
~KDTreeSearch()
Definition: kdtree.cpp:144
KDNODE
Definition: kdtree.h:37
MinK::elements
const Element * elements()
Definition: kdtree.cpp:67
tesstrain_utils.action
action
Definition: tesstrain_utils.py:159
search
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:202
MinK::Element::key
Key key
Definition: kdtree.cpp:59
MinK::insert
bool insert(Key k, Value v)
Definition: kdtree.cpp:96
MINSEARCH
#define MINSEARCH
Definition: kdtree.cpp:34