32 word_mode_ = word_mode;
36 void BeamSearch::Cleanup() {
38 for (
int col = 0; col < col_cnt_; col++) {
60 lm_parent_edge, &edge_cnt);
63 for (
int edge = 0; edge < edge_cnt; edge++) {
67 !lm_edges[edge]->
IsEOW()) {
69 delete lm_edges[edge];
75 if (char_alt_list && char_alt_list->
AltCount() > 0) {
79 recognition_cost += extra_cost;
84 if (recognition_cost >= 0) {
85 out_col->
AddNode(lm_edges[edge], recognition_cost, parent_node,
88 delete lm_edges[edge];
103 fprintf(stderr,
"Cube ERROR (BeamSearch::Search): could not construct "
113 if (seg_pt_cnt_ < 0) {
116 col_cnt_ = seg_pt_cnt_ + 1;
119 if (seg_pt_cnt_ > 128) {
120 fprintf(stderr,
"Cube ERROR (BeamSearch::Search): segment point count is "
121 "suspiciously high; bailing out\n");
128 fprintf(stderr,
"Cube ERROR (BeamSearch::Search): could not construct "
129 "SearchColumn array\n");
132 memset(col_, 0, col_cnt_ *
sizeof(*col_));
135 for (
int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
139 if (!col_[end_seg - 1]) {
140 fprintf(stderr,
"Cube ERROR (BeamSearch::Search): could not construct "
141 "SearchColumn for column %d\n", end_seg - 1);
147 for (
int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
148 int parent_nodes_cnt;
153 parent_nodes_cnt = 1;
157 parent_nodes_cnt = col_[strt_seg - 1]->
NodeCount();
158 parent_nodes = col_[strt_seg - 1]->
Nodes();
165 for (
int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
168 : parent_nodes[parent_idx];
173 int contig_cost = srch_obj->
NoSpaceCost(strt_seg - 1, end_seg - 1);
177 int no_space_cost = 0;
178 if (!word_mode_ && strt_seg > 0) {
179 no_space_cost = srch_obj->
NoSpaceCost(strt_seg - 1);
185 CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
186 lm_parent_edge, char_alt_list,
187 contig_cost + no_space_cost);
191 if (!word_mode_ && strt_seg > 0) {
195 int space_cost = srch_obj->
SpaceCost(strt_seg - 1);
200 CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
NULL,
201 char_alt_list, contig_cost + space_cost);
209 col_[end_seg - 1]->
Prune();
215 WordAltList *alt_list = CreateWordAltList(srch_obj);
222 int node_cnt = col_[col_cnt_ - 1]->
NodeCount();
229 best_presorted_node_idx_ = 0;
237 for (
int node_idx = 0; node_idx < node_cnt; node_idx++) {
239 int recognition_cost = srch_nodes[node_idx]->
BestCost();
242 int size_cost =
SizeCost(srch_obj, srch_nodes[node_idx], &ch_buff);
247 int bigram_cost = !bigrams ? 0 :
250 int unigram_cost = !word_unigrams ? 0 :
254 cost =
static_cast<int>(
261 alt_list->
Insert(ch_buff, cost,
262 static_cast<void *>(srch_nodes[node_idx]));
265 if (best_cost < 0 || cost < best_cost) {
266 best_presorted_node_idx_ = node_idx;
280 if (col < 0 || col >= col_cnt_ || !col_)
287 if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
290 int node_cnt = col_[col_cnt_ - 1]->
NodeCount();
292 if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
294 return srch_nodes[0];
329 int *char_cnt,
char_32 **str32,
330 Boxa **char_boxes)
const {
345 return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
353 int *char_cnt,
char_32 **str32,
354 Boxa **char_boxes)
const {
366 if (char_boxes && *char_boxes) {
367 boxaDestroy(char_boxes);
371 chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
383 Boxa **char_boxes)
const {
399 boxaDestroy(char_boxes);
400 *char_boxes = boxaCreate(*char_cnt);
401 if (*char_boxes ==
NULL)
406 CharSamp **chars =
new CharSamp *[*char_cnt];
409 boxaDestroy(char_boxes);
413 int ch_idx = *char_cnt - 1;
414 int seg_pt_cnt = srch_obj->
SegPtCnt();
416 while (srch_node && ch_idx >= 0) {
418 SearchNode *parent_node = srch_node->
ParentNode();
421 int st_col = !parent_node ? 0 : parent_node->
ColIdx() + 1;
422 int st_seg_pt = st_col <= 0 ? -1 : st_col - 1;
423 int end_col = srch_node->
ColIdx();
424 int end_seg_pt = end_col >= seg_pt_cnt ? seg_pt_cnt : end_col;
427 CharSamp *samp = srch_obj->
CharSample(st_seg_pt, end_seg_pt);
433 chars[ch_idx] = samp;
436 Box *char_box = boxCreate(samp->Left(), samp->Top(),
437 samp->Width(), samp->Height());
442 boxaAddBox(*char_boxes, char_box, L_INSERT);
444 srch_node = parent_node;
450 boxaDestroy(char_boxes);
456 int char_boxa_size = boxaGetCount(*char_boxes);
457 int limit = char_boxa_size / 2;
458 for (
int i = 0; i < limit; ++i) {
460 int box2_idx = char_boxa_size - 1 - i;
461 Box *box1 = boxaGetBox(*char_boxes, box1_idx, L_CLONE);
462 Box *box2 = boxaGetBox(*char_boxes, box2_idx, L_CLONE);
463 boxaReplaceBox(*char_boxes, box2_idx, box1);
464 boxaReplaceBox(*char_boxes, box1_idx, box2);
bool Insert(char_32 *char_ptr, int cost, void *tag=NULL)
double WordUnigramWgt() const
LangModel * LangMod() const
int MaxSegPerChar() const
virtual LangModEdge ** GetEdges(CharAltList *alt_list, LangModEdge *parent_edge, int *edge_cnt)=0
virtual int SpaceCost(int seg_pt)=0
virtual LangModEdge * Root()=0
int Cost(CharSamp **samp_array, int samp_cnt) const
SearchNode * BestNode() const
WordUnigrams * WordUnigramsObj() const
int Cost(const char_32 *str, CharSet *char_set) const
double CharBigramWgt() const
CharBigrams * Bigrams() const
SearchNode * AddNode(LangModEdge *edge, int score, SearchNode *parent, CubeRecoContext *cntxt)
int Cost(const char_32 *str32, LangModel *lang_mod, CharSet *char_set) const
int SizeCost(SearchObject *srch_obj, SearchNode *node, char_32 **str32=NULL) const
const char_32 * NodeString()
TuningParams * Params() const
virtual CharSamp * CharSample(int start_pt, int end_pt)=0
virtual int ClassID() const =0
void SetLabel(char_32 label)
char_32 * Alt(int alt) const
BeamSearch(CubeRecoContext *cntxt, bool word_mode=true)
LangModEdge * LangModelEdge()
WordSizeModel * SizeModel() const
SearchNode * ParentNode()
CharSet * CharacterSet() const
SearchNode ** Nodes() const
int ClassCost(int class_id) const
virtual CharAltList * RecognizeSegment(int start_pt, int end_pt)=0
virtual bool IsEOW() const =0
virtual int NoSpaceCost(int seg_pt)=0
WordAltList * Search(SearchObject *srch_obj, LangModel *lang_mod=NULL)
CharSamp ** BackTrack(SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
SearchColumn * Column(int col_idx) const