tesseract  5.0.0-alpha-619-ge9db
lstmrecognizer.cpp
Go to the documentation of this file.
1 // File: lstmrecognizer.cpp
3 // Description: Top-level line recognizer class for LSTM-based networks.
4 // Author: Ray Smith
5 // Created: Thu May 02 10:59:06 PST 2013
6 //
7 // (C) Copyright 2013, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23 
24 #include "lstmrecognizer.h"
25 
26 #include "allheaders.h"
27 #include "callcpp.h"
28 #include "dict.h"
29 #include "genericheap.h"
30 #include <tesseract/helpers.h>
31 #include "imagedata.h"
32 #include "input.h"
33 #include "lstm.h"
34 #include "normalis.h"
35 #include "pageres.h"
36 #include "ratngs.h"
37 #include "recodebeam.h"
38 #include "scrollview.h"
39 #include "statistc.h"
40 #include "tprintf.h"
41 
42 #include <unordered_set>
43 #include <vector>
44 
45 namespace tesseract {
46 
47 // Default ratio between dict and non-dict words.
48 const double kDictRatio = 2.25;
49 // Default certainty offset to give the dictionary a chance.
50 const double kCertOffset = -0.085;
51 
52 LSTMRecognizer::LSTMRecognizer(const STRING language_data_path_prefix)
54  ccutil_.language_data_path_prefix = language_data_path_prefix;
55 }
56 
58  : network_(nullptr),
59  training_flags_(0),
60  training_iteration_(0),
61  sample_iteration_(0),
63  learning_rate_(0.0f),
64  momentum_(0.0f),
65  adam_beta_(0.0f),
66  dict_(nullptr),
67  search_(nullptr),
68  debug_win_(nullptr) {}
69 
71  delete network_;
72  delete dict_;
73  delete search_;
74 }
75 
76 // Loads a model from mgr, including the dictionary only if lang is not null.
77 bool LSTMRecognizer::Load(const ParamsVectors* params, const char* lang,
78  TessdataManager* mgr) {
79  TFile fp;
80  if (!mgr->GetComponent(TESSDATA_LSTM, &fp)) return false;
81  if (!DeSerialize(mgr, &fp)) return false;
82  if (lang == nullptr) return true;
83  // Allow it to run without a dictionary.
84  LoadDictionary(params, lang, mgr);
85  return true;
86 }
87 
88 // Writes to the given file. Returns false in case of error.
89 bool LSTMRecognizer::Serialize(const TessdataManager* mgr, TFile* fp) const {
90  bool include_charsets = mgr == nullptr ||
93  if (!network_->Serialize(fp)) return false;
94  if (include_charsets && !GetUnicharset().save_to_file(fp)) return false;
95  if (!network_str_.Serialize(fp)) return false;
96  if (!fp->Serialize(&training_flags_)) return false;
97  if (!fp->Serialize(&training_iteration_)) return false;
98  if (!fp->Serialize(&sample_iteration_)) return false;
99  if (!fp->Serialize(&null_char_)) return false;
100  if (!fp->Serialize(&adam_beta_)) return false;
101  if (!fp->Serialize(&learning_rate_)) return false;
102  if (!fp->Serialize(&momentum_)) return false;
103  if (include_charsets && IsRecoding() && !recoder_.Serialize(fp)) return false;
104  return true;
105 }
106 
107 // Reads from the given file. Returns false in case of error.
109  delete network_;
111  if (network_ == nullptr) return false;
112  bool include_charsets = mgr == nullptr ||
115  if (include_charsets && !ccutil_.unicharset.load_from_file(fp, false))
116  return false;
117  if (!network_str_.DeSerialize(fp)) return false;
118  if (!fp->DeSerialize(&training_flags_)) return false;
119  if (!fp->DeSerialize(&training_iteration_)) return false;
120  if (!fp->DeSerialize(&sample_iteration_)) return false;
121  if (!fp->DeSerialize(&null_char_)) return false;
122  if (!fp->DeSerialize(&adam_beta_)) return false;
123  if (!fp->DeSerialize(&learning_rate_)) return false;
124  if (!fp->DeSerialize(&momentum_)) return false;
125  if (include_charsets && !LoadRecoder(fp)) return false;
126  if (!include_charsets && !LoadCharsets(mgr)) return false;
129  return true;
130 }
131 
132 // Loads the charsets from mgr.
134  TFile fp;
135  if (!mgr->GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) return false;
136  if (!ccutil_.unicharset.load_from_file(&fp, false)) return false;
137  if (!mgr->GetComponent(TESSDATA_LSTM_RECODER, &fp)) return false;
138  if (!LoadRecoder(&fp)) return false;
139  return true;
140 }
141 
142 // Loads the Recoder.
144  if (IsRecoding()) {
145  if (!recoder_.DeSerialize(fp)) return false;
146  RecodedCharID code;
148  if (code(0) != UNICHAR_SPACE) {
149  tprintf("Space was garbled in recoding!!\n");
150  return false;
151  }
152  } else {
155  }
156  return true;
157 }
158 
159 // Loads the dictionary if possible from the traineddata file.
160 // Prints a warning message, and returns false but otherwise fails silently
161 // and continues to work without it if loading fails.
162 // Note that dictionary load is independent from DeSerialize, but dependent
163 // on the unicharset matching. This enables training to deserialize a model
164 // from checkpoint or restore without having to go back and reload the
165 // dictionary.
166 // Some parameters have to be passed in (from langdata/config/api via Tesseract)
168  const char* lang, TessdataManager* mgr) {
169  delete dict_;
170  dict_ = new Dict(&ccutil_);
171  dict_->user_words_file.ResetFrom(params);
172  dict_->user_words_suffix.ResetFrom(params);
173  dict_->user_patterns_file.ResetFrom(params);
174  dict_->user_patterns_suffix.ResetFrom(params);
176  dict_->LoadLSTM(lang, mgr);
177  if (dict_->FinishLoad()) return true; // Success.
178  tprintf("Failed to load any lstm-specific dictionaries for lang %s!!\n",
179  lang);
180  delete dict_;
181  dict_ = nullptr;
182  return false;
183 }
184 
185 // Recognizes the line image, contained within image_data, returning the
186 // ratings matrix and matching box_word for each WERD_RES in the output.
187 void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
188  bool debug, double worst_dict_cert,
189  const TBOX& line_box,
191  int lstm_choice_mode,
192  int lstm_choice_amount) {
193  NetworkIO outputs;
194  float scale_factor;
195  NetworkIO inputs;
196  if (!RecognizeLine(image_data, invert, debug, false, false, &scale_factor,
197  &inputs, &outputs))
198  return;
199  if (search_ == nullptr) {
200  search_ =
202  }
203  search_->excludedUnichars.clear();
204  search_->Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert,
205  &GetUnicharset(), lstm_choice_mode);
206  search_->ExtractBestPathAsWords(line_box, scale_factor, debug,
207  &GetUnicharset(), words, lstm_choice_mode);
208  if (lstm_choice_mode){
210  for (int i = 0; i < lstm_choice_amount; ++i) {
212  worst_dict_cert, &GetUnicharset(),
213  lstm_choice_mode);
215  }
217  int char_it = 0;
218  for (int i = 0; i < words->size(); ++i) {
219  for (int j = 0; j < words->get(i)->end; ++j) {
220  if (char_it < search_->ctc_choices.size())
221  words->get(i)->CTC_symbol_choices.push_back(
222  search_->ctc_choices[char_it]);
223  if (char_it < search_->segmentedTimesteps.size())
224  words->get(i)->segmented_timesteps.push_back(
225  search_->segmentedTimesteps[char_it]);
226  ++char_it;
227  }
228  words->get(i)->timesteps = search_->combineSegmentedTimesteps(
229  &words->get(i)->segmented_timesteps);
230  }
231  search_->segmentedTimesteps.clear();
232  search_->ctc_choices.clear();
233  search_->excludedUnichars.clear();
234  }
235 }
236 
237 // Helper computes min and mean best results in the output.
238 void LSTMRecognizer::OutputStats(const NetworkIO& outputs, float* min_output,
239  float* mean_output, float* sd) {
240  const int kOutputScale = INT8_MAX;
241  STATS stats(0, kOutputScale + 1);
242  for (int t = 0; t < outputs.Width(); ++t) {
243  int best_label = outputs.BestLabel(t, nullptr);
244  if (best_label != null_char_) {
245  float best_output = outputs.f(t)[best_label];
246  stats.add(static_cast<int>(kOutputScale * best_output), 1);
247  }
248  }
249  // If the output is all nulls it could be that the photometric interpretation
250  // is wrong, so make it look bad, so the other way can win, even if not great.
251  if (stats.get_total() == 0) {
252  *min_output = 0.0f;
253  *mean_output = 0.0f;
254  *sd = 1.0f;
255  } else {
256  *min_output = static_cast<float>(stats.min_bucket()) / kOutputScale;
257  *mean_output = stats.mean() / kOutputScale;
258  *sd = stats.sd() / kOutputScale;
259  }
260 }
261 
262 // Recognizes the image_data, returning the labels,
263 // scores, and corresponding pairs of start, end x-coords in coords.
264 bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
265  bool debug, bool re_invert, bool upside_down,
266  float* scale_factor, NetworkIO* inputs,
267  NetworkIO* outputs) {
268  // Maximum width of image to train on.
269  const int kMaxImageWidth = 2560;
270  // This ensures consistent recognition results.
271  SetRandomSeed();
272  int min_width = network_->XScaleFactor();
273  Pix* pix = Input::PrepareLSTMInputs(image_data, network_, min_width,
274  &randomizer_, scale_factor);
275  if (pix == nullptr) {
276  tprintf("Line cannot be recognized!!\n");
277  return false;
278  }
279  if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
280  tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
281  pixGetHeight(pix));
282  pixDestroy(&pix);
283  return false;
284  }
285  if (upside_down) pixRotate180(pix, pix);
286  // Reduction factor from image to coords.
287  *scale_factor = min_width / *scale_factor;
288  inputs->set_int_mode(IsIntMode());
289  SetRandomSeed();
291  network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
292  // Check for auto inversion.
293  float pos_min, pos_mean, pos_sd;
294  OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
295  if (invert && pos_min < 0.5) {
296  // Run again inverted and see if it is any better.
297  NetworkIO inv_inputs, inv_outputs;
298  inv_inputs.set_int_mode(IsIntMode());
299  SetRandomSeed();
300  pixInvert(pix, pix);
302  &inv_inputs);
303  network_->Forward(debug, inv_inputs, nullptr, &scratch_space_,
304  &inv_outputs);
305  float inv_min, inv_mean, inv_sd;
306  OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
307  if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
308  // Inverted did better. Use inverted data.
309  if (debug) {
310  tprintf("Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
311  pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
312  }
313  *outputs = inv_outputs;
314  *inputs = inv_inputs;
315  } else if (re_invert) {
316  // Inverting was not an improvement, so undo and run again, so the
317  // outputs match the best forward result.
318  SetRandomSeed();
319  network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
320  }
321  }
322  pixDestroy(&pix);
323  if (debug) {
324  GenericVector<int> labels, coords;
325  LabelsFromOutputs(*outputs, &labels, &coords);
326  DisplayForward(*inputs, labels, coords, "LSTMForward", &debug_win_);
327  DebugActivationPath(*outputs, labels, coords);
328  }
329  return true;
330 }
331 
332 // Converts an array of labels to utf-8, whether or not the labels are
333 // augmented with character boundaries.
335  STRING result;
336  int end = 1;
337  for (int start = 0; start < labels.size(); start = end) {
338  if (labels[start] == null_char_) {
339  end = start + 1;
340  } else {
341  result += DecodeLabel(labels, start, &end, nullptr);
342  }
343  }
344  return result;
345 }
346 
347 // Displays the forward results in a window with the characters and
348 // boundaries as determined by the labels and label_coords.
350  const GenericVector<int>& labels,
351  const GenericVector<int>& label_coords,
352  const char* window_name,
353  ScrollView** window) {
354 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics
355  Pix* input_pix = inputs.ToPix();
356  Network::ClearWindow(false, window_name, pixGetWidth(input_pix),
357  pixGetHeight(input_pix), window);
358  int line_height = Network::DisplayImage(input_pix, *window);
359  DisplayLSTMOutput(labels, label_coords, line_height, *window);
360 #endif // GRAPHICS_DISABLED
361 }
362 
363 // Displays the labels and cuts at the corresponding xcoords.
364 // Size of labels should match xcoords.
366  const GenericVector<int>& xcoords,
367  int height, ScrollView* window) {
368 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics
369  int x_scale = network_->XScaleFactor();
370  window->TextAttributes("Arial", height / 4, false, false, false);
371  int end = 1;
372  for (int start = 0; start < labels.size(); start = end) {
373  int xpos = xcoords[start] * x_scale;
374  if (labels[start] == null_char_) {
375  end = start + 1;
376  window->Pen(ScrollView::RED);
377  } else {
378  window->Pen(ScrollView::GREEN);
379  const char* str = DecodeLabel(labels, start, &end, nullptr);
380  if (*str == '\\') str = "\\\\";
381  xpos = xcoords[(start + end) / 2] * x_scale;
382  window->Text(xpos, height, str);
383  }
384  window->Line(xpos, 0, xpos, height * 3 / 2);
385  }
386  window->Update();
387 #endif // GRAPHICS_DISABLED
388 }
389 
390 // Prints debug output detailing the activation path that is implied by the
391 // label_coords.
393  const GenericVector<int>& labels,
394  const GenericVector<int>& xcoords) {
395  if (xcoords[0] > 0)
396  DebugActivationRange(outputs, "<null>", null_char_, 0, xcoords[0]);
397  int end = 1;
398  for (int start = 0; start < labels.size(); start = end) {
399  if (labels[start] == null_char_) {
400  end = start + 1;
401  DebugActivationRange(outputs, "<null>", null_char_, xcoords[start],
402  xcoords[end]);
403  continue;
404  } else {
405  int decoded;
406  const char* label = DecodeLabel(labels, start, &end, &decoded);
407  DebugActivationRange(outputs, label, labels[start], xcoords[start],
408  xcoords[start + 1]);
409  for (int i = start + 1; i < end; ++i) {
410  DebugActivationRange(outputs, DecodeSingleLabel(labels[i]), labels[i],
411  xcoords[i], xcoords[i + 1]);
412  }
413  }
414  }
415 }
416 
417 // Prints debug output detailing activations and 2nd choice over a range
418 // of positions.
420  const char* label, int best_choice,
421  int x_start, int x_end) {
422  tprintf("%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end);
423  double max_score = 0.0;
424  double mean_score = 0.0;
425  const int width = x_end - x_start;
426  for (int x = x_start; x < x_end; ++x) {
427  const float* line = outputs.f(x);
428  const double score = line[best_choice] * 100.0;
429  if (score > max_score) max_score = score;
430  mean_score += score / width;
431  int best_c = 0;
432  double best_score = 0.0;
433  for (int c = 0; c < outputs.NumFeatures(); ++c) {
434  if (c != best_choice && line[c] > best_score) {
435  best_c = c;
436  best_score = line[c];
437  }
438  }
439  tprintf(" %.3g(%s=%d=%.3g)", score, DecodeSingleLabel(best_c), best_c,
440  best_score * 100.0);
441  }
442  tprintf(", Mean=%g, max=%g\n", mean_score, max_score);
443 }
444 
445 // Helper returns true if the null_char is the winner at t, and it beats the
446 // null_threshold, or the next choice is space, in which case we will use the
447 // null anyway.
448 #if 0 // TODO: unused, remove if still unused after 2020.
449 static bool NullIsBest(const NetworkIO& output, float null_thr,
450  int null_char, int t) {
451  if (output.f(t)[null_char] >= null_thr) return true;
452  if (output.BestLabel(t, null_char, null_char, nullptr) != UNICHAR_SPACE)
453  return false;
454  return output.f(t)[null_char] > output.f(t)[UNICHAR_SPACE];
455 }
456 #endif
457 
458 // Converts the network output to a sequence of labels. Outputs labels, scores
459 // and start xcoords of each char, and each null_char_, with an additional
460 // final xcoord for the end of the output.
461 // The conversion method is determined by internal state.
463  GenericVector<int>* labels,
464  GenericVector<int>* xcoords) {
465  if (SimpleTextOutput()) {
466  LabelsViaSimpleText(outputs, labels, xcoords);
467  } else {
468  LabelsViaReEncode(outputs, labels, xcoords);
469  }
470 }
471 
472 // As LabelsViaCTC except that this function constructs the best path that
473 // contains only legal sequences of subcodes for CJK.
475  GenericVector<int>* labels,
476  GenericVector<int>* xcoords) {
477  if (search_ == nullptr) {
478  search_ =
480  }
481  search_->Decode(output, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr);
482  search_->ExtractBestPathAsLabels(labels, xcoords);
483 }
484 
485 // Converts the network output to a sequence of labels, with scores, using
486 // the simple character model (each position is a char, and the null_char_ is
487 // mainly intended for tail padding.)
489  GenericVector<int>* labels,
490  GenericVector<int>* xcoords) {
491  labels->truncate(0);
492  xcoords->truncate(0);
493  const int width = output.Width();
494  for (int t = 0; t < width; ++t) {
495  float score = 0.0f;
496  const int label = output.BestLabel(t, &score);
497  if (label != null_char_) {
498  labels->push_back(label);
499  xcoords->push_back(t);
500  }
501  }
502  xcoords->push_back(width);
503 }
504 
505 // Returns a string corresponding to the label starting at start. Sets *end
506 // to the next start and if non-null, *decoded to the unichar id.
508  int start, int* end, int* decoded) {
509  *end = start + 1;
510  if (IsRecoding()) {
511  // Decode labels via recoder_.
512  RecodedCharID code;
513  if (labels[start] == null_char_) {
514  if (decoded != nullptr) {
515  code.Set(0, null_char_);
516  *decoded = recoder_.DecodeUnichar(code);
517  }
518  return "<null>";
519  }
520  int index = start;
521  while (index < labels.size() &&
523  code.Set(code.length(), labels[index++]);
524  while (index < labels.size() && labels[index] == null_char_) ++index;
525  int uni_id = recoder_.DecodeUnichar(code);
526  // If the next label isn't a valid first code, then we need to continue
527  // extending even if we have a valid uni_id from this prefix.
528  if (uni_id != INVALID_UNICHAR_ID &&
529  (index == labels.size() ||
531  recoder_.IsValidFirstCode(labels[index]))) {
532  *end = index;
533  if (decoded != nullptr) *decoded = uni_id;
534  if (uni_id == UNICHAR_SPACE) return " ";
535  return GetUnicharset().get_normed_unichar(uni_id);
536  }
537  }
538  return "<Undecodable>";
539  } else {
540  if (decoded != nullptr) *decoded = labels[start];
541  if (labels[start] == null_char_) return "<null>";
542  if (labels[start] == UNICHAR_SPACE) return " ";
543  return GetUnicharset().get_normed_unichar(labels[start]);
544  }
545 }
546 
547 // Returns a string corresponding to a given single label id, falling back to
548 // a default of ".." for part of a multi-label unichar-id.
549 const char* LSTMRecognizer::DecodeSingleLabel(int label) {
550  if (label == null_char_) return "<null>";
551  if (IsRecoding()) {
552  // Decode label via recoder_.
553  RecodedCharID code;
554  code.Set(0, label);
555  label = recoder_.DecodeUnichar(code);
556  if (label == INVALID_UNICHAR_ID) return ".."; // Part of a bigger code.
557  }
558  if (label == UNICHAR_SPACE) return " ";
559  return GetUnicharset().get_normed_unichar(label);
560 }
561 
562 } // namespace tesseract.
UNICHARSET::load_from_file
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:378
tesseract::RecodeBeamSearch::kMinCertainty
static constexpr float kMinCertainty
Definition: recodebeam.h:252
STATS::get_total
int32_t get_total() const
Definition: statistc.h:83
STATS::mean
double mean() const
Definition: statistc.cpp:119
tesseract::LSTMRecognizer::learning_rate_
float learning_rate_
Definition: lstmrecognizer.h:283
ScrollView
Definition: scrollview.h:97
tesseract::RecodeBeamSearch::excludedUnichars
std::vector< std::unordered_set< int > > excludedUnichars
Definition: recodebeam.h:245
normalis.h
tesseract::LSTMRecognizer::dict_
Dict * dict_
Definition: lstmrecognizer.h:292
STATS::min_bucket
int32_t min_bucket() const
Definition: statistc.cpp:187
tesseract::Network::SetRandomizer
virtual void SetRandomizer(TRand *randomizer)
Definition: network.cpp:138
tesseract::LSTMRecognizer::DebugActivationPath
void DebugActivationPath(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)
Definition: lstmrecognizer.cpp:392
pageres.h
dict.h
tesseract::RecodeBeamSearch::ctc_choices
std::vector< std::vector< std::pair< const char *, float > > > ctc_choices
Definition: recodebeam.h:243
tesseract::LSTMRecognizer::LabelsViaSimpleText
void LabelsViaSimpleText(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:488
tesseract::RecodeBeamSearch
Definition: recodebeam.h:180
tesseract::TessdataManager
Definition: tessdatamanager.h:126
tesseract::UnicharCompress::DeSerialize
bool DeSerialize(TFile *fp)
Definition: unicharcompress.cpp:305
tesseract::NetworkIO::set_int_mode
void set_int_mode(bool is_quantized)
Definition: networkio.h:130
tesseract::LSTMRecognizer::IsIntMode
bool IsIntMode() const
Definition: lstmrecognizer.h:70
tesseract::LSTMRecognizer::search_
RecodeBeamSearch * search_
Definition: lstmrecognizer.h:294
tesseract::Network::XScaleFactor
virtual int XScaleFactor() const
Definition: network.h:209
tesseract::LSTMRecognizer::LoadDictionary
bool LoadDictionary(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
Definition: lstmrecognizer.cpp:167
tesseract::LSTMRecognizer::randomizer_
TRand randomizer_
Definition: lstmrecognizer.h:289
tesseract::PointerVector< WERD_RES >
STRING
Definition: strngs.h:45
tesseract::NetworkIO::Width
int Width() const
Definition: networkio.h:107
recodebeam.h
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:717
tesseract::LSTMRecognizer::sample_iteration_
int32_t sample_iteration_
Definition: lstmrecognizer.h:278
tesseract::RecodeBeamSearch::Decode
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:78
tesseract::LSTMRecognizer::network_str_
STRING network_str_
Definition: lstmrecognizer.h:271
tesseract::Dict::user_patterns_file
char * user_patterns_file
Definition: dict.h:582
tesseract::LSTMRecognizer::DebugActivationRange
void DebugActivationRange(const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)
Definition: lstmrecognizer.cpp:419
tesseract::LSTMRecognizer::LabelsFromOutputs
void LabelsFromOutputs(const NetworkIO &outputs, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:462
tesseract::Dict::GlobalDawgCache
static TESS_API DawgCache * GlobalDawgCache()
Definition: dict.cpp:184
tesseract::kCertOffset
const double kCertOffset
Definition: lstmrecognizer.cpp:50
tesseract::LSTMRecognizer::DisplayForward
void DisplayForward(const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window)
Definition: lstmrecognizer.cpp:349
tesseract::CCUtil::language_data_path_prefix
STRING language_data_path_prefix
Definition: ccutil.h:56
tesseract::Input::PreparePixInput
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, NetworkIO *input)
Definition: input.cpp:111
tesseract::LSTMRecognizer::GetUnicharset
const UNICHARSET & GetUnicharset() const
Definition: lstmrecognizer.h:132
tesseract::ImageData
Definition: imagedata.h:104
UNICHARSET::get_normed_unichar
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:818
tesseract::Input::PrepareLSTMInputs
static Pix * PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale)
Definition: input.cpp:83
STRING::DeSerialize
bool DeSerialize(bool swap, FILE *fp)
Definition: strngs.cpp:157
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
tesseract::UnicharCompress::IsValidFirstCode
bool IsValidFirstCode(int code) const
Definition: unicharcompress.h:170
tesseract::LSTMRecognizer::LSTMRecognizer
LSTMRecognizer()
Definition: lstmrecognizer.cpp:57
STRING::Serialize
bool Serialize(FILE *fp) const
Definition: strngs.cpp:144
UNICHARSET::save_to_file
bool save_to_file(const char *const filename) const
Definition: unicharset.h:350
tesseract::LSTMRecognizer::LoadRecoder
bool LoadRecoder(TFile *fp)
Definition: lstmrecognizer.cpp:143
tesseract::LSTMRecognizer::ccutil_
CCUtil ccutil_
Definition: lstmrecognizer.h:264
tesseract::Network::IsTraining
bool IsTraining() const
Definition: network.h:115
ratngs.h
tesseract::Network::CacheXScaleFactor
virtual void CacheXScaleFactor(int factor)
Definition: network.h:215
tesseract::LSTMRecognizer::LoadCharsets
bool LoadCharsets(const TessdataManager *mgr)
Definition: lstmrecognizer.cpp:133
statistc.h
tesseract::Dict::user_words_suffix
char * user_words_suffix
Definition: dict.h:580
tesseract::LSTMRecognizer::DeSerialize
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
Definition: lstmrecognizer.cpp:108
UNICHAR_BROKEN
Definition: unicharset.h:36
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
tesseract::LSTMRecognizer::RecognizeLine
void RecognizeLine(const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0, int lstm_choice_amount=5)
Definition: lstmrecognizer.cpp:187
tesseract::LSTMRecognizer::scratch_space_
NetworkScratch scratch_space_
Definition: lstmrecognizer.h:290
tesseract::LSTMRecognizer::training_flags_
int32_t training_flags_
Definition: lstmrecognizer.h:274
STATS::sd
double sd() const
Definition: statistc.cpp:134
tesseract::Network::CreateFromFile
static Network * CreateFromFile(TFile *fp)
Definition: network.cpp:187
tesseract::RecodeBeamSearch::extractSymbolChoices
void extractSymbolChoices(const UNICHARSET *unicharset)
Definition: recodebeam.cpp:395
tesseract::UnicharCompress::DecodeUnichar
int DecodeUnichar(const RecodedCharID &code) const
Definition: unicharcompress.cpp:291
tesseract::LSTMRecognizer::SetRandomSeed
void SetRandomSeed()
Definition: lstmrecognizer.h:217
tesseract::NetworkIO::ToPix
Pix * ToPix() const
Definition: networkio.cpp:286
tesseract::LSTMRecognizer::momentum_
float momentum_
Definition: lstmrecognizer.h:284
tesseract::TFile::DeSerialize
bool DeSerialize(char *data, size_t count=1)
Definition: serialis.cpp:117
tesseract::LSTMRecognizer::debug_win_
ScrollView * debug_win_
Definition: lstmrecognizer.h:298
tesseract::TFile::Serialize
bool Serialize(const char *data, size_t count=1)
Definition: serialis.cpp:161
tesseract::RecodedCharID::Set
void Set(int index, int value)
Definition: unicharcompress.h:44
tesseract::TessdataManager::GetComponent
bool GetComponent(TessdataType type, TFile *fp)
Definition: tessdatamanager.cpp:216
UNICHAR_SPACE
Definition: unicharset.h:34
tesseract::TESSDATA_LSTM_RECODER
Definition: tessdatamanager.h:79
tesseract::NetworkIO::f
float * f(int t)
Definition: networkio.h:115
tesseract::Network::InputShape
virtual StaticShape InputShape() const
Definition: network.h:127
tesseract::TFile
Definition: serialis.h:75
tesseract::Dict::SetupForLoad
void SetupForLoad(DawgCache *dawg_cache)
Definition: dict.cpp:192
tesseract::NetworkIO
Definition: networkio.h:39
tesseract::ParamsVectors
Definition: params.h:56
tesseract::LSTMRecognizer::Serialize
bool Serialize(const TessdataManager *mgr, TFile *fp) const
Definition: lstmrecognizer.cpp:89
tesseract::LSTMRecognizer::recoder_
UnicharCompress recoder_
Definition: lstmrecognizer.h:268
tesseract::NetworkIO::BestLabel
int BestLabel(int t, float *score) const
Definition: networkio.h:161
lstmrecognizer.h
tesseract::LSTMRecognizer::training_iteration_
int32_t training_iteration_
Definition: lstmrecognizer.h:276
helpers.h
tesseract::TessdataManager::IsComponentAvailable
bool IsComponentAvailable(TessdataType type) const
Definition: tessdatamanager.h:161
tesseract
Definition: baseapi.h:65
null_char_
int null_char_
Definition: unicharcompress_test.cc:168
tesseract::LSTMRecognizer::OutputStats
void OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd)
Definition: lstmrecognizer.cpp:238
ScrollView::RED
Definition: scrollview.h:104
tesseract::LSTMRecognizer::SimpleTextOutput
bool SimpleTextOutput() const
Definition: lstmrecognizer.h:69
tesseract::kDictRatio
const double kDictRatio
Definition: lstmrecognizer.cpp:48
tesseract::RecodeBeamSearch::DecodeSecondaryBeams
void DecodeSecondaryBeams(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:105
STATS
Definition: statistc.h:30
lstm.h
tesseract::LSTMRecognizer::null_char_
int32_t null_char_
Definition: lstmrecognizer.h:281
tprintf.h
callcpp.h
tesseract::RecodedCharID::length
int length() const
Definition: unicharcompress.h:57
tesseract::RecodedCharID
Definition: unicharcompress.h:34
tesseract::RecodeBeamSearch::segmentTimestepsByCharacters
void segmentTimestepsByCharacters()
Definition: recodebeam.cpp:156
GenericVector< int >
tesseract::Dict
Definition: dict.h:91
tesseract::Dict::LoadLSTM
void LoadLSTM(const STRING &lang, TessdataManager *data_file)
Definition: dict.cpp:291
tesseract::Dict::FinishLoad
bool FinishLoad()
Definition: dict.cpp:351
tesseract::LSTMRecognizer::LabelsViaReEncode
void LabelsViaReEncode(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
Definition: lstmrecognizer.cpp:474
tesseract::LSTMRecognizer::~LSTMRecognizer
~LSTMRecognizer()
Definition: lstmrecognizer.cpp:70
imagedata.h
tesseract::RecodedCharID::kMaxCodeLen
static const int kMaxCodeLen
Definition: unicharcompress.h:37
tesseract::Network::ClearWindow
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
Definition: network.cpp:312
ScrollView::TextAttributes
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
Definition: scrollview.cpp:634
GenericVector::truncate
void truncate(int size)
Definition: genericvector.h:132
tesseract::LSTMRecognizer::DecodeSingleLabel
const char * DecodeSingleLabel(int label)
Definition: lstmrecognizer.cpp:549
GenericVector::get
T & get(int index) const
Definition: genericvector.h:716
tesseract::LSTMRecognizer
Definition: lstmrecognizer.h:53
STATS::add
void add(int32_t value, int32_t count)
Definition: statistc.cpp:87
ScrollView::GREEN
Definition: scrollview.h:106
tesseract::LSTMRecognizer::IsRecoding
bool IsRecoding() const
Definition: lstmrecognizer.h:72
tesseract::RecodeBeamSearch::combineSegmentedTimesteps
std::vector< std::vector< std::pair< const char *, float > > > combineSegmentedTimesteps(std::vector< std::vector< std::vector< std::pair< const char *, float >>>> *segmentedTimesteps)
Definition: recodebeam.cpp:166
tesseract::UnicharCompress::SetupPassThrough
void SetupPassThrough(const UNICHARSET &unicharset)
Definition: unicharcompress.cpp:216
ScrollView::Line
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:531
tesseract::Network::DisplayImage
static int DisplayImage(Pix *pix, ScrollView *window)
Definition: network.cpp:335
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::RecodeBeamSearch::ExtractBestPathAsWords
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
Definition: recodebeam.cpp:230
tesseract::LSTMRecognizer::network_
Network * network_
Definition: lstmrecognizer.h:261
tesseract::LSTMRecognizer::DisplayLSTMOutput
void DisplayLSTMOutput(const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)
Definition: lstmrecognizer.cpp:365
tesseract::LSTMRecognizer::DecodeLabel
const char * DecodeLabel(const GenericVector< int > &labels, int start, int *end, int *decoded)
Definition: lstmrecognizer.cpp:507
ScrollView::Update
static void Update()
Definition: scrollview.cpp:708
tesseract::Network::Serialize
virtual bool Serialize(TFile *fp) const
Definition: network.cpp:151
tesseract::UnicharCompress::EncodeUnichar
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
Definition: unicharcompress.cpp:283
ScrollView::Text
void Text(int x, int y, const char *mystring)
Definition: scrollview.cpp:651
tesseract::NetworkIO::NumFeatures
int NumFeatures() const
Definition: networkio.h:111
tesseract::TF_COMPRESS_UNICHARSET
Definition: lstmrecognizer.h:48
tesseract::LSTMRecognizer::Load
bool Load(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
Definition: lstmrecognizer.cpp:77
tesseract::TESSDATA_LSTM
Definition: tessdatamanager.h:74
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::Dict::user_words_file
char * user_words_file
Definition: dict.h:578
tesseract::LSTMRecognizer::adam_beta_
float adam_beta_
Definition: lstmrecognizer.h:286
scrollview.h
tesseract::TESSDATA_LSTM_UNICHARSET
Definition: tessdatamanager.h:78
input.h
genericheap.h
tesseract::Dict::user_patterns_suffix
char * user_patterns_suffix
Definition: dict.h:584
tesseract::RecodeBeamSearch::ExtractBestPathAsLabels
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:192
tesseract::RecodeBeamSearch::segmentedTimesteps
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmentedTimesteps
Definition: recodebeam.h:241
tesseract::Network::Forward
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)=0
tesseract::UnicharCompress::Serialize
bool Serialize(TFile *fp) const
Definition: unicharcompress.cpp:300
TBOX
Definition: rect.h:33
tesseract::LSTMRecognizer::DecodeLabels
STRING DecodeLabels(const GenericVector< int > &labels)
Definition: lstmrecognizer.cpp:334