tesseract  4.0.0-1-g2a2b
tesseract::RecodeBeamSearch Class Reference

#include <recodebeam.h>

Public Member Functions

 RecodeBeamSearch (const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
 
void Decode (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
 
void Decode (const GENERIC_2D_ARRAY< float > &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
 
void ExtractBestPathAsLabels (GenericVector< int > *labels, GenericVector< int > *xcoords) const
 
void ExtractBestPathAsUnicharIds (bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
 
void ExtractBestPathAsWords (const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
 
void DebugBeams (const UNICHARSET &unicharset) const
 

Static Public Member Functions

static int LengthFromBeamsIndex (int index)
 
static NodeContinuation ContinuationFromBeamsIndex (int index)
 
static bool IsDawgFromBeamsIndex (int index)
 
static int BeamIndex (bool is_dawg, NodeContinuation cont, int length)
 

Public Attributes

std::vector< std::vector< std::pair< const char *, float > > > timesteps
 

Static Public Attributes

static const float kMinCertainty = -20.0f
 
static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1
 
static const int kNumBeams = 2 * NC_COUNT * kNumLengths
 

Detailed Description

Definition at line 179 of file recodebeam.h.

Constructor & Destructor Documentation

◆ RecodeBeamSearch()

tesseract::RecodeBeamSearch::RecodeBeamSearch ( const UnicharCompress recoder,
int  null_char,
bool  simple_text,
Dict dict 
)

Definition at line 68 of file recodebeam.cpp.

70  : recoder_(recoder),
71  beam_size_(0),
72  top_code_(-1),
73  second_code_(-1),
74  dict_(dict),
75  space_delimited_(true),
76  is_simple_text_(simple_text),
77  null_char_(null_char) {
78  if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false;
79 }
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:857

Member Function Documentation

◆ BeamIndex()

static int tesseract::RecodeBeamSearch::BeamIndex ( bool  is_dawg,
NodeContinuation  cont,
int  length 
)
inlinestatic

Definition at line 237 of file recodebeam.h.

237  {
238  return (is_dawg * NC_COUNT + cont) * kNumLengths + length;
239  }
static const int kNumLengths
Definition: recodebeam.h:224

◆ ContinuationFromBeamsIndex()

static NodeContinuation tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex ( int  index)
inlinestatic

Definition at line 230 of file recodebeam.h.

230  {
231  return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT);
232  }
static const int kNumLengths
Definition: recodebeam.h:224
NodeContinuation
Definition: recodebeam.h:72

◆ DebugBeams()

void tesseract::RecodeBeamSearch::DebugBeams ( const UNICHARSET unicharset) const

Definition at line 317 of file recodebeam.cpp.

317  {
318  for (int p = 0; p < beam_size_; ++p) {
319  for (int d = 0; d < 2; ++d) {
320  for (int c = 0; c < NC_COUNT; ++c) {
321  NodeContinuation cont = static_cast<NodeContinuation>(c);
322  int index = BeamIndex(d, cont, 0);
323  if (beam_[p]->beams_[index].empty()) continue;
324  // Print all the best scoring nodes for each unichar found.
325  tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
326  kNodeContNames[c]);
327  DebugBeamPos(unicharset, beam_[p]->beams_[index]);
328  }
329  }
330  }
331 }
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:237
NodeContinuation
Definition: recodebeam.h:72
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
const char * kNodeContNames[]
Definition: recodebeam.cpp:45

◆ Decode() [1/2]

void tesseract::RecodeBeamSearch::Decode ( const NetworkIO output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset,
int  lstm_choice_mode = 0 
)

Definition at line 82 of file recodebeam.cpp.

84  {
85  beam_size_ = 0;
86  int width = output.Width();
87  if (lstm_choice_mode)
88  timesteps.clear();
89  for (int t = 0; t < width; ++t) {
90  ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
91  DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
92  charset);
93  if (lstm_choice_mode) {
94  SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
95  }
96  }
97 }
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216

◆ Decode() [2/2]

void tesseract::RecodeBeamSearch::Decode ( const GENERIC_2D_ARRAY< float > &  output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset 
)

Definition at line 98 of file recodebeam.cpp.

101  {
102  beam_size_ = 0;
103  int width = output.dim1();
104  for (int t = 0; t < width; ++t) {
105  ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
106  DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
107  }
108 }
int dim1() const
Definition: matrix.h:206
int dim2() const
Definition: matrix.h:207

◆ ExtractBestPathAsLabels()

void tesseract::RecodeBeamSearch::ExtractBestPathAsLabels ( GenericVector< int > *  labels,
GenericVector< int > *  xcoords 
) const

Definition at line 140 of file recodebeam.cpp.

141  {
142  labels->truncate(0);
143  xcoords->truncate(0);
145  ExtractBestPaths(&best_nodes, nullptr);
146  // Now just run CTC on the best nodes.
147  int t = 0;
148  int width = best_nodes.size();
149  while (t < width) {
150  int label = best_nodes[t]->code;
151  if (label != null_char_) {
152  labels->push_back(label);
153  xcoords->push_back(t);
154  }
155  while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
156  }
157  }
158  xcoords->push_back(width);
159 }
int size() const
Definition: genericvector.h:71
int push_back(T object)
void truncate(int size)

◆ ExtractBestPathAsUnicharIds()

void tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds ( bool  debug,
const UNICHARSET unicharset,
GenericVector< int > *  unichar_ids,
GenericVector< float > *  certs,
GenericVector< float > *  ratings,
GenericVector< int > *  xcoords 
) const

Definition at line 163 of file recodebeam.cpp.

166  {
168  ExtractBestPaths(&best_nodes, nullptr);
169  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
170  if (debug) {
171  DebugPath(unicharset, best_nodes);
172  DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
173  *xcoords);
174  }
175 }

◆ ExtractBestPathAsWords()

void tesseract::RecodeBeamSearch::ExtractBestPathAsWords ( const TBOX line_box,
float  scale_factor,
bool  debug,
const UNICHARSET unicharset,
PointerVector< WERD_RES > *  words,
int  lstm_choice_mode = 0 
)

Definition at line 178 of file recodebeam.cpp.

182  {
183  words->truncate(0);
184  GenericVector<int> unichar_ids;
185  GenericVector<float> certs;
186  GenericVector<float> ratings;
187  GenericVector<int> xcoords;
190  std::deque<std::pair<int,int>> best_choices;
191  ExtractBestPaths(&best_nodes, &second_nodes);
192  if (debug) {
193  DebugPath(unicharset, best_nodes);
194  ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
195  &xcoords);
196  tprintf("\nSecond choice path:\n");
197  DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
198  xcoords);
199  }
200  int current_char;
201  int timestepEnd = 0;
202  //if lstm choice mode is required in granularity level 2 it stores the x
203  //Coordinates of every chosen character to match the alternative choices to it
204  if (lstm_choice_mode == 2) {
205  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
206  &xcoords, &best_choices);
207  if (best_choices.size() > 0) {
208  current_char = best_choices.front().first;
209  timestepEnd = best_choices.front().second;
210  best_choices.pop_front();
211  }
212  } else {
213  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
214  &xcoords);
215  }
216  int num_ids = unichar_ids.size();
217  if (debug) {
218  DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
219  xcoords);
220  }
221  // Convert labels to unichar-ids.
222  int word_end = 0;
223  float prev_space_cert = 0.0f;
224  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
225  for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
226  // A word is terminated when a space character or start_of_word flag is
227  // hit. We also want to force a separate word for every non
228  // space-delimited character when not in a dictionary context.
229  if (unichar_ids[word_end] == UNICHAR_SPACE) break;
230  int index = xcoords[word_end];
231  if (best_nodes[index]->start_of_word) break;
232  if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
233  (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
234  !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1])))
235  break;
236  }
237  float space_cert = 0.0f;
238  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
239  space_cert = certs[word_end];
240  bool leading_space =
241  word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
242  // Create a WERD_RES for the output word.
243  WERD_RES* word_res = InitializeWord(
244  leading_space, line_box, word_start, word_end,
245  std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
246  if (lstm_choice_mode == 1) {
247  for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
248  word_res->timesteps.push_back(timesteps[i]);
249  }
250  timestepEnd = xcoords[word_end];
251  } else if (lstm_choice_mode == 2) {
252  float sum = 0;
253  std::vector<std::pair<const char*, float>> choice_pairs;
254  for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
255  for (std::pair<const char*, float> choice : timesteps[i]) {
256  if (std::strcmp(choice.first, "") != 0) {
257  sum += choice.second;
258  choice_pairs.push_back(choice);
259  }
260  }
261  if ((best_choices.size() > 0 && i == best_choices.front().second - 1)
262  || i == xcoords[word_end]-1) {
263  std::map<const char*, float> summed_propabilities;
264  for (auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) {
265  summed_propabilities[it->first] += it->second;
266  }
267  std::vector<std::pair<const char*, float>> accumulated_timestep;
268  accumulated_timestep.push_back(std::pair<const char*,float>
269  (unicharset->id_to_unichar_ext
270  (current_char), 2.0));
271  int pos;
272  for (auto it = summed_propabilities.begin();
273  it != summed_propabilities.end(); ++it) {
274  if(sum == 0) break;
275  it->second/=sum;
276  pos = 0;
277  while (accumulated_timestep.size() > pos
278  && accumulated_timestep[pos].second > it->second) {
279  pos++;
280  }
281  accumulated_timestep.insert(accumulated_timestep.begin() + pos,
282  std::pair<const char*,float>(it->first,
283  it->second));
284  }
285  if (best_choices.size() > 0) {
286  current_char = best_choices.front().first;
287  best_choices.pop_front();
288  }
289  choice_pairs.clear();
290  word_res->timesteps.push_back(accumulated_timestep);
291  sum = 0;
292  }
293  }
294  timestepEnd = xcoords[word_end];
295  }
296  for (int i = word_start; i < word_end; ++i) {
297  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
298  BLOB_CHOICE_IT bc_it(choices);
299  BLOB_CHOICE* choice = new BLOB_CHOICE(
300  unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
301  static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
302  int col = i - word_start;
303  choice->set_matrix_cell(col, col);
304  bc_it.add_after_then_move(choice);
305  word_res->ratings->put(col, col, choices);
306  }
307  int index = xcoords[word_end - 1];
308  word_res->FakeWordFromRatings(best_nodes[index]->permuter);
309  words->push_back(word_res);
310  prev_space_cert = space_cert;
311  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
312  ++word_end;
313  }
314 }
int size() const
Definition: genericvector.h:71
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:904
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:647
void set_matrix_cell(int col, int row)
Definition: ratngs.h:157
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void put(ICOORD pos, const T &thing)
Definition: matrix.h:220
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:298
int push_back(T object)
MATRIX * ratings
Definition: pageres.h:231
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: pageres.h:224

◆ IsDawgFromBeamsIndex()

static bool tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex ( int  index)
inlinestatic

Definition at line 233 of file recodebeam.h.

233  {
234  return index / (kNumLengths * NC_COUNT) > 0;
235  }
static const int kNumLengths
Definition: recodebeam.h:224

◆ LengthFromBeamsIndex()

static int tesseract::RecodeBeamSearch::LengthFromBeamsIndex ( int  index)
inlinestatic

Definition at line 229 of file recodebeam.h.

229 { return index % kNumLengths; }
static const int kNumLengths
Definition: recodebeam.h:224

Member Data Documentation

◆ kMinCertainty

const float tesseract::RecodeBeamSearch::kMinCertainty = -20.0f
static

Definition at line 222 of file recodebeam.h.

◆ kNumBeams

const int tesseract::RecodeBeamSearch::kNumBeams = 2 * NC_COUNT * kNumLengths
static

Definition at line 227 of file recodebeam.h.

◆ kNumLengths

const int tesseract::RecodeBeamSearch::kNumLengths = RecodedCharID::kMaxCodeLen + 1
static

Definition at line 224 of file recodebeam.h.

◆ timesteps

std::vector< std::vector<std::pair<const char*, float> > > tesseract::RecodeBeamSearch::timesteps

Definition at line 216 of file recodebeam.h.


The documentation for this class was generated from the following files: