All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
search_column.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: search_column.cpp
3  * Description: Implementation of the Beam Search Column Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "search_column.h"
21 #include <stdlib.h>
22 
23 namespace tesseract {
24 
25 SearchColumn::SearchColumn(int col_idx, int max_node) {
26  col_idx_ = col_idx;
27  node_cnt_ = 0;
28  node_array_ = NULL;
29  max_node_cnt_ = max_node;
30  node_hash_table_ = NULL;
31  init_ = false;
32  min_cost_ = INT_MAX;
33  max_cost_ = 0;
34 }
35 
36 // Cleanup data
37 void SearchColumn::Cleanup() {
38  if (node_array_ != NULL) {
39  for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
40  if (node_array_[node_idx] != NULL) {
41  delete node_array_[node_idx];
42  }
43  }
44 
45  delete []node_array_;
46  node_array_ = NULL;
47  }
48  FreeHashTable();
49  init_ = false;
50 }
51 
53  Cleanup();
54 }
55 
56 // Initializations
57 bool SearchColumn::Init() {
58  if (init_ == true) {
59  return true;
60  }
61 
62  // create hash table
63  if (node_hash_table_ == NULL) {
64  node_hash_table_ = new SearchNodeHashTable();
65  if (node_hash_table_ == NULL) {
66  return false;
67  }
68  }
69 
70  init_ = true;
71 
72  return true;
73 }
74 
75 // Prune the nodes if necessary. Pruning is done such that a max
76 // number of nodes is kept, i.e., the beam width
78  // no need to prune
79  if (node_cnt_ <= max_node_cnt_) {
80  return;
81  }
82 
83  // compute the cost histogram
84  memset(score_bins_, 0, sizeof(score_bins_));
85  int cost_range = max_cost_ - min_cost_ + 1;
86  for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
87  int cost_bin = static_cast<int>(
88  ((node_array_[node_idx]->BestCost() - min_cost_) *
89  kScoreBins) / static_cast<double>(cost_range));
90  if (cost_bin >= kScoreBins) {
91  cost_bin = kScoreBins - 1;
92  }
93  score_bins_[cost_bin]++;
94  }
95 
96  // determine the pruning cost by scanning the cost histogram from
97  // least to greatest cost bins and finding the cost at which the
98  // max number of nodes is exceeded
99  int pruning_cost = 0;
100  int new_node_cnt = 0;
101  for (int cost_bin = 0; cost_bin < kScoreBins; cost_bin++) {
102  if (new_node_cnt > 0 &&
103  (new_node_cnt + score_bins_[cost_bin]) > max_node_cnt_) {
104  pruning_cost = min_cost_ + ((cost_bin * cost_range) / kScoreBins);
105  break;
106  }
107  new_node_cnt += score_bins_[cost_bin];
108  }
109 
110  // prune out all the nodes above this cost
111  for (int node_idx = new_node_cnt = 0; node_idx < node_cnt_; node_idx++) {
112  // prune this node out
113  if (node_array_[node_idx]->BestCost() > pruning_cost ||
114  new_node_cnt > max_node_cnt_) {
115  delete node_array_[node_idx];
116  } else {
117  // keep it
118  node_array_[new_node_cnt++] = node_array_[node_idx];
119  }
120  }
121  node_cnt_ = new_node_cnt;
122 }
123 
124 // sort all nodes
126  if (node_cnt_ > 0 && node_array_ != NULL) {
127  qsort(node_array_, node_cnt_, sizeof(*node_array_),
129  }
130 }
131 
132 // add a new node
134  SearchNode *parent_node,
135  CubeRecoContext *cntxt) {
136  // init if necessary
137  if (init_ == false && Init() == false) {
138  return NULL;
139  }
140 
141  // find out if we have an node with the same edge
142  // look in the hash table
143  SearchNode *new_node = node_hash_table_->Lookup(edge, parent_node);
144  // node does not exist
145  if (new_node == NULL) {
146  new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_);
147  if (new_node == NULL) {
148  return NULL;
149  }
150 
151  // if the max node count has already been reached, check if the cost of
152  // the new node exceeds the max cost. This indicates that it will be pruned
153  // and so there is no point adding it
154  if (node_cnt_ >= max_node_cnt_ && new_node->BestCost() > max_cost_) {
155  delete new_node;
156  return NULL;
157  }
158 
159  // expand the node buffer if necc
160  if ((node_cnt_ % kNodeAllocChunk) == 0) {
161  // alloc a new buff
162  SearchNode **new_node_buff =
163  new SearchNode *[node_cnt_ + kNodeAllocChunk];
164  if (new_node_buff == NULL) {
165  delete new_node;
166  return NULL;
167  }
168 
169  // free existing after copying contents
170  if (node_array_ != NULL) {
171  memcpy(new_node_buff, node_array_, node_cnt_ * sizeof(*new_node_buff));
172  delete []node_array_;
173  }
174 
175  node_array_ = new_node_buff;
176  }
177 
178  // add the node to the hash table only if it is non-OOD edge
179  // because the langmod state is not unique
180  if (edge->IsOOD() == false) {
181  if (!node_hash_table_->Insert(edge, new_node)) {
182  tprintf("Hash table full!!!");
183  delete new_node;
184  return NULL;
185  }
186  }
187 
188  node_array_[node_cnt_++] = new_node;
189 
190  } else {
191  // node exists before
192  // if no update occurred, return NULL
193  if (new_node->UpdateParent(parent_node, reco_cost, edge) == false) {
194  new_node = NULL;
195  }
196 
197  // free the edge
198  if (edge != NULL) {
199  delete edge;
200  }
201  }
202 
203  // update Min and Max Costs
204  if (new_node != NULL) {
205  if (min_cost_ > new_node->BestCost()) {
206  min_cost_ = new_node->BestCost();
207  }
208 
209  if (max_cost_ < new_node->BestCost()) {
210  max_cost_ = new_node->BestCost();
211  }
212  }
213 
214  return new_node;
215 }
216 
218  SearchNode *best_node = NULL;
219 
220  for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
221  if (best_node == NULL ||
222  best_node->BestCost() > node_array_[node_idx]->BestCost()) {
223  best_node = node_array_[node_idx];
224  }
225  }
226 
227  return best_node;
228 }
229 } // namespace tesseract
virtual bool IsOOD() const =0
#define tprintf(...)
Definition: tprintf.h:31
SearchNode * Lookup(LangModEdge *lang_mod_edge, SearchNode *parent_node)
Definition: search_node.h:137
SearchColumn(int col_idx, int max_node_cnt)
bool UpdateParent(SearchNode *new_parent, int new_reco_cost, LangModEdge *new_edge)
Definition: search_node.cpp:77
bool Insert(LangModEdge *lang_mod_edge, SearchNode *srch_node)
Definition: search_node.h:119
SearchNode * AddNode(LangModEdge *edge, int score, SearchNode *parent, CubeRecoContext *cntxt)
static int SearchNodeComparer(const void *node1, const void *node2)
Definition: search_node.h:75
#define NULL
Definition: host.h:144