tesseract  4.0.0-1-g2a2b
lstmrecognizer.cpp
Go to the documentation of this file.
1 // File: lstmrecognizer.cpp
3 // Description: Top-level line recognizer class for LSTM-based networks.
4 // Author: Ray Smith
5 // Created: Thu May 02 10:59:06 PST 2013
6 //
7 // (C) Copyright 2013, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 #include "config_auto.h"
22 #endif
23 
24 #include "lstmrecognizer.h"
25 
26 #include "allheaders.h"
27 #include "callcpp.h"
28 #include "dict.h"
29 #include "genericheap.h"
30 #include "helpers.h"
31 #include "imagedata.h"
32 #include "input.h"
33 #include "lstm.h"
34 #include "normalis.h"
35 #include "pageres.h"
36 #include "ratngs.h"
37 #include "recodebeam.h"
38 #include "scrollview.h"
39 #include "statistc.h"
40 #include "tprintf.h"
41 
42 namespace tesseract {
43 
44 // Default ratio between dict and non-dict words.
45 const double kDictRatio = 2.25;
46 // Default certainty offset to give the dictionary a chance.
47 const double kCertOffset = -0.085;
48 
50  : network_(nullptr),
51  training_flags_(0),
52  training_iteration_(0),
53  sample_iteration_(0),
54  null_char_(UNICHAR_BROKEN),
55  learning_rate_(0.0f),
56  momentum_(0.0f),
57  adam_beta_(0.0f),
58  dict_(nullptr),
59  search_(nullptr),
60  debug_win_(nullptr) {}
61 
63  delete network_;
64  delete dict_;
65  delete search_;
66 }
67 
68 // Loads a model from mgr, including the dictionary only if lang is not null.
69 bool LSTMRecognizer::Load(const char* lang, TessdataManager* mgr) {
70  TFile fp;
71  if (!mgr->GetComponent(TESSDATA_LSTM, &fp)) return false;
72  if (!DeSerialize(mgr, &fp)) return false;
73  if (lang == nullptr) return true;
74  // Allow it to run without a dictionary.
75  LoadDictionary(lang, mgr);
76  return true;
77 }
78 
79 // Writes to the given file. Returns false in case of error.
80 bool LSTMRecognizer::Serialize(const TessdataManager* mgr, TFile* fp) const {
81  bool include_charsets = mgr == nullptr ||
84  if (!network_->Serialize(fp)) return false;
85  if (include_charsets && !GetUnicharset().save_to_file(fp)) return false;
86  if (!network_str_.Serialize(fp)) return false;
87  if (!fp->Serialize(&training_flags_)) return false;
88  if (!fp->Serialize(&training_iteration_)) return false;
89  if (!fp->Serialize(&sample_iteration_)) return false;
90  if (!fp->Serialize(&null_char_)) return false;
91  if (!fp->Serialize(&adam_beta_)) return false;
92  if (!fp->Serialize(&learning_rate_)) return false;
93  if (!fp->Serialize(&momentum_)) return false;
94  if (include_charsets && IsRecoding() && !recoder_.Serialize(fp)) return false;
95  return true;
96 }
97 
98 // Reads from the given file. Returns false in case of error.
100  delete network_;
102  if (network_ == nullptr) return false;
103  bool include_charsets = mgr == nullptr ||
106  if (include_charsets && !ccutil_.unicharset.load_from_file(fp, false))
107  return false;
108  if (!network_str_.DeSerialize(fp)) return false;
109  if (!fp->DeSerialize(&training_flags_)) return false;
110  if (!fp->DeSerialize(&training_iteration_)) return false;
111  if (!fp->DeSerialize(&sample_iteration_)) return false;
112  if (!fp->DeSerialize(&null_char_)) return false;
113  if (!fp->DeSerialize(&adam_beta_)) return false;
114  if (!fp->DeSerialize(&learning_rate_)) return false;
115  if (!fp->DeSerialize(&momentum_)) return false;
116  if (include_charsets && !LoadRecoder(fp)) return false;
117  if (!include_charsets && !LoadCharsets(mgr)) return false;
120  return true;
121 }
122 
123 // Loads the charsets from mgr.
125  TFile fp;
126  if (!mgr->GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) return false;
127  if (!ccutil_.unicharset.load_from_file(&fp, false)) return false;
128  if (!mgr->GetComponent(TESSDATA_LSTM_RECODER, &fp)) return false;
129  if (!LoadRecoder(&fp)) return false;
130  return true;
131 }
132 
133 // Loads the Recoder.
135  if (IsRecoding()) {
136  if (!recoder_.DeSerialize(fp)) return false;
137  RecodedCharID code;
139  if (code(0) != UNICHAR_SPACE) {
140  tprintf("Space was garbled in recoding!!\n");
141  return false;
142  }
143  } else {
146  }
147  return true;
148 }
149 
150 // Loads the dictionary if possible from the traineddata file.
151 // Prints a warning message, and returns false but otherwise fails silently
152 // and continues to work without it if loading fails.
153 // Note that dictionary load is independent from DeSerialize, but dependent
154 // on the unicharset matching. This enables training to deserialize a model
155 // from checkpoint or restore without having to go back and reload the
156 // dictionary.
157 bool LSTMRecognizer::LoadDictionary(const char* lang, TessdataManager* mgr) {
158  delete dict_;
159  dict_ = new Dict(&ccutil_);
161  dict_->LoadLSTM(lang, mgr);
162  if (dict_->FinishLoad()) return true; // Success.
163  tprintf("Failed to load any lstm-specific dictionaries for lang %s!!\n",
164  lang);
165  delete dict_;
166  dict_ = nullptr;
167  return false;
168 }
169 
170 // Recognizes the line image, contained within image_data, returning the
171 // ratings matrix and matching box_word for each WERD_RES in the output.
172 void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
173  bool debug, double worst_dict_cert,
174  const TBOX& line_box,
176  int lstm_choice_mode) {
177  NetworkIO outputs;
178  float scale_factor;
179  NetworkIO inputs;
180  if (!RecognizeLine(image_data, invert, debug, false, false, &scale_factor,
181  &inputs, &outputs))
182  return;
183  if (search_ == nullptr) {
184  search_ =
186  }
187  search_->Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert,
188  &GetUnicharset(), lstm_choice_mode);
189  search_->ExtractBestPathAsWords(line_box, scale_factor, debug,
190  &GetUnicharset(), words, lstm_choice_mode);
191 }
192 
193 // Helper computes min and mean best results in the output.
194 void LSTMRecognizer::OutputStats(const NetworkIO& outputs, float* min_output,
195  float* mean_output, float* sd) {
196  const int kOutputScale = INT8_MAX;
197  STATS stats(0, kOutputScale + 1);
198  for (int t = 0; t < outputs.Width(); ++t) {
199  int best_label = outputs.BestLabel(t, nullptr);
200  if (best_label != null_char_) {
201  float best_output = outputs.f(t)[best_label];
202  stats.add(static_cast<int>(kOutputScale * best_output), 1);
203  }
204  }
205  // If the output is all nulls it could be that the photometric interpretation
206  // is wrong, so make it look bad, so the other way can win, even if not great.
207  if (stats.get_total() == 0) {
208  *min_output = 0.0f;
209  *mean_output = 0.0f;
210  *sd = 1.0f;
211  } else {
212  *min_output = static_cast<float>(stats.min_bucket()) / kOutputScale;
213  *mean_output = stats.mean() / kOutputScale;
214  *sd = stats.sd() / kOutputScale;
215  }
216 }
217 
218 // Recognizes the image_data, returning the labels,
219 // scores, and corresponding pairs of start, end x-coords in coords.
220 bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
221  bool debug, bool re_invert, bool upside_down,
222  float* scale_factor, NetworkIO* inputs,
223  NetworkIO* outputs) {
224  // Maximum width of image to train on.
225  const int kMaxImageWidth = 2560;
226  // This ensures consistent recognition results.
227  SetRandomSeed();
228  int min_width = network_->XScaleFactor();
229  Pix* pix = Input::PrepareLSTMInputs(image_data, network_, min_width,
230  &randomizer_, scale_factor);
231  if (pix == nullptr) {
232  tprintf("Line cannot be recognized!!\n");
233  return false;
234  }
235  if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
236  tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
237  pixGetHeight(pix));
238  pixDestroy(&pix);
239  return false;
240  }
241  if (upside_down) pixRotate180(pix, pix);
242  // Reduction factor from image to coords.
243  *scale_factor = min_width / *scale_factor;
244  inputs->set_int_mode(IsIntMode());
245  SetRandomSeed();
247  network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
248  // Check for auto inversion.
249  float pos_min, pos_mean, pos_sd;
250  OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
251  if (invert && pos_min < 0.5) {
252  // Run again inverted and see if it is any better.
253  NetworkIO inv_inputs, inv_outputs;
254  inv_inputs.set_int_mode(IsIntMode());
255  SetRandomSeed();
256  pixInvert(pix, pix);
258  &inv_inputs);
259  network_->Forward(debug, inv_inputs, nullptr, &scratch_space_, &inv_outputs);
260  float inv_min, inv_mean, inv_sd;
261  OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
262  if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
263  // Inverted did better. Use inverted data.
264  if (debug) {
265  tprintf("Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
266  pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
267  }
268  *outputs = inv_outputs;
269  *inputs = inv_inputs;
270  } else if (re_invert) {
271  // Inverting was not an improvement, so undo and run again, so the
272  // outputs match the best forward result.
273  SetRandomSeed();
274  network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
275  }
276  }
277  pixDestroy(&pix);
278  if (debug) {
279  GenericVector<int> labels, coords;
280  LabelsFromOutputs(*outputs, &labels, &coords);
281  DisplayForward(*inputs, labels, coords, "LSTMForward", &debug_win_);
282  DebugActivationPath(*outputs, labels, coords);
283  }
284  return true;
285 }
286 
287 // Converts an array of labels to utf-8, whether or not the labels are
288 // augmented with character boundaries.
290  STRING result;
291  int end = 1;
292  for (int start = 0; start < labels.size(); start = end) {
293  if (labels[start] == null_char_) {
294  end = start + 1;
295  } else {
296  result += DecodeLabel(labels, start, &end, nullptr);
297  }
298  }
299  return result;
300 }
301 
302 // Displays the forward results in a window with the characters and
303 // boundaries as determined by the labels and label_coords.
305  const GenericVector<int>& labels,
306  const GenericVector<int>& label_coords,
307  const char* window_name,
308  ScrollView** window) {
309 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics
310  Pix* input_pix = inputs.ToPix();
311  Network::ClearWindow(false, window_name, pixGetWidth(input_pix),
312  pixGetHeight(input_pix), window);
313  int line_height = Network::DisplayImage(input_pix, *window);
314  DisplayLSTMOutput(labels, label_coords, line_height, *window);
315 #endif // GRAPHICS_DISABLED
316 }
317 
318 // Displays the labels and cuts at the corresponding xcoords.
319 // Size of labels should match xcoords.
321  const GenericVector<int>& xcoords,
322  int height, ScrollView* window) {
323 #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics
324  int x_scale = network_->XScaleFactor();
325  window->TextAttributes("Arial", height / 4, false, false, false);
326  int end = 1;
327  for (int start = 0; start < labels.size(); start = end) {
328  int xpos = xcoords[start] * x_scale;
329  if (labels[start] == null_char_) {
330  end = start + 1;
331  window->Pen(ScrollView::RED);
332  } else {
333  window->Pen(ScrollView::GREEN);
334  const char* str = DecodeLabel(labels, start, &end, nullptr);
335  if (*str == '\\') str = "\\\\";
336  xpos = xcoords[(start + end) / 2] * x_scale;
337  window->Text(xpos, height, str);
338  }
339  window->Line(xpos, 0, xpos, height * 3 / 2);
340  }
341  window->Update();
342 #endif // GRAPHICS_DISABLED
343 }
344 
345 // Prints debug output detailing the activation path that is implied by the
346 // label_coords.
348  const GenericVector<int>& labels,
349  const GenericVector<int>& xcoords) {
350  if (xcoords[0] > 0)
351  DebugActivationRange(outputs, "<null>", null_char_, 0, xcoords[0]);
352  int end = 1;
353  for (int start = 0; start < labels.size(); start = end) {
354  if (labels[start] == null_char_) {
355  end = start + 1;
356  DebugActivationRange(outputs, "<null>", null_char_, xcoords[start],
357  xcoords[end]);
358  continue;
359  } else {
360  int decoded;
361  const char* label = DecodeLabel(labels, start, &end, &decoded);
362  DebugActivationRange(outputs, label, labels[start], xcoords[start],
363  xcoords[start + 1]);
364  for (int i = start + 1; i < end; ++i) {
365  DebugActivationRange(outputs, DecodeSingleLabel(labels[i]), labels[i],
366  xcoords[i], xcoords[i + 1]);
367  }
368  }
369  }
370 }
371 
372 // Prints debug output detailing activations and 2nd choice over a range
373 // of positions.
375  const char* label, int best_choice,
376  int x_start, int x_end) {
377  tprintf("%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end);
378  double max_score = 0.0;
379  double mean_score = 0.0;
380  const int width = x_end - x_start;
381  for (int x = x_start; x < x_end; ++x) {
382  const float* line = outputs.f(x);
383  const double score = line[best_choice] * 100.0;
384  if (score > max_score) max_score = score;
385  mean_score += score / width;
386  int best_c = 0;
387  double best_score = 0.0;
388  for (int c = 0; c < outputs.NumFeatures(); ++c) {
389  if (c != best_choice && line[c] > best_score) {
390  best_c = c;
391  best_score = line[c];
392  }
393  }
394  tprintf(" %.3g(%s=%d=%.3g)", score, DecodeSingleLabel(best_c), best_c,
395  best_score * 100.0);
396  }
397  tprintf(", Mean=%g, max=%g\n", mean_score, max_score);
398 }
399 
400 // Helper returns true if the null_char is the winner at t, and it beats the
401 // null_threshold, or the next choice is space, in which case we will use the
402 // null anyway.
403 #if 0 // TODO: unused, remove if still unused after 2020.
404 static bool NullIsBest(const NetworkIO& output, float null_thr,
405  int null_char, int t) {
406  if (output.f(t)[null_char] >= null_thr) return true;
407  if (output.BestLabel(t, null_char, null_char, nullptr) != UNICHAR_SPACE)
408  return false;
409  return output.f(t)[null_char] > output.f(t)[UNICHAR_SPACE];
410 }
411 #endif
412 
413 // Converts the network output to a sequence of labels. Outputs labels, scores
414 // and start xcoords of each char, and each null_char_, with an additional
415 // final xcoord for the end of the output.
416 // The conversion method is determined by internal state.
418  GenericVector<int>* labels,
419  GenericVector<int>* xcoords) {
420  if (SimpleTextOutput()) {
421  LabelsViaSimpleText(outputs, labels, xcoords);
422  } else {
423  LabelsViaReEncode(outputs, labels, xcoords);
424  }
425 }
426 
427 // As LabelsViaCTC except that this function constructs the best path that
428 // contains only legal sequences of subcodes for CJK.
430  GenericVector<int>* labels,
431  GenericVector<int>* xcoords) {
432  if (search_ == nullptr) {
433  search_ =
435  }
436  search_->Decode(output, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr);
437  search_->ExtractBestPathAsLabels(labels, xcoords);
438 }
439 
440 // Converts the network output to a sequence of labels, with scores, using
441 // the simple character model (each position is a char, and the null_char_ is
442 // mainly intended for tail padding.)
444  GenericVector<int>* labels,
445  GenericVector<int>* xcoords) {
446  labels->truncate(0);
447  xcoords->truncate(0);
448  const int width = output.Width();
449  for (int t = 0; t < width; ++t) {
450  float score = 0.0f;
451  const int label = output.BestLabel(t, &score);
452  if (label != null_char_) {
453  labels->push_back(label);
454  xcoords->push_back(t);
455  }
456  }
457  xcoords->push_back(width);
458 }
459 
460 // Returns a string corresponding to the label starting at start. Sets *end
461 // to the next start and if non-null, *decoded to the unichar id.
463  int start, int* end, int* decoded) {
464  *end = start + 1;
465  if (IsRecoding()) {
466  // Decode labels via recoder_.
467  RecodedCharID code;
468  if (labels[start] == null_char_) {
469  if (decoded != nullptr) {
470  code.Set(0, null_char_);
471  *decoded = recoder_.DecodeUnichar(code);
472  }
473  return "<null>";
474  }
475  int index = start;
476  while (index < labels.size() &&
478  code.Set(code.length(), labels[index++]);
479  while (index < labels.size() && labels[index] == null_char_) ++index;
480  int uni_id = recoder_.DecodeUnichar(code);
481  // If the next label isn't a valid first code, then we need to continue
482  // extending even if we have a valid uni_id from this prefix.
483  if (uni_id != INVALID_UNICHAR_ID &&
484  (index == labels.size() ||
486  recoder_.IsValidFirstCode(labels[index]))) {
487  *end = index;
488  if (decoded != nullptr) *decoded = uni_id;
489  if (uni_id == UNICHAR_SPACE) return " ";
490  return GetUnicharset().get_normed_unichar(uni_id);
491  }
492  }
493  return "<Undecodable>";
494  } else {
495  if (decoded != nullptr) *decoded = labels[start];
496  if (labels[start] == null_char_) return "<null>";
497  if (labels[start] == UNICHAR_SPACE) return " ";
498  return GetUnicharset().get_normed_unichar(labels[start]);
499  }
500 }
501 
502 // Returns a string corresponding to a given single label id, falling back to
503 // a default of ".." for part of a multi-label unichar-id.
504 const char* LSTMRecognizer::DecodeSingleLabel(int label) {
505  if (label == null_char_) return "<null>";
506  if (IsRecoding()) {
507  // Decode label via recoder_.
508  RecodedCharID code;
509  code.Set(0, label);
510  label = recoder_.DecodeUnichar(code);
511  if (label == INVALID_UNICHAR_ID) return ".."; // Part of a bigger code.
512  }
513  if (label == UNICHAR_SPACE) return " ";
514  return GetUnicharset().get_normed_unichar(label);
515 }
516 
517 } // namespace tesseract.
bool Serialize(FILE *fp) const
Definition: strngs.cpp:148
void set_int_mode(bool is_quantized)
Definition: networkio.h:130
const UNICHARSET & GetUnicharset() const
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:193
int BestLabel(int t, float *score) const
Definition: networkio.h:161
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:140
bool LoadCharsets(const TessdataManager *mgr)
bool LoadDictionary(const char *lang, TessdataManager *mgr)
virtual void CacheXScaleFactor(int factor)
Definition: network.h:215
static const int kMaxCodeLen
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
Definition: scrollview.cpp:637
int size() const
Definition: genericvector.h:71
static Network * CreateFromFile(TFile *fp)
Definition: network.cpp:199
const char * DecodeSingleLabel(int label)
int DecodeUnichar(const RecodedCharID &code) const
bool GetComponent(TessdataType type, TFile *fp)
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:823
void DebugActivationRange(const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:82
int32_t min_bucket() const
Definition: statistc.cpp:205
bool save_to_file(const char *const filename) const
Definition: unicharset.h:345
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)
Definition: network.h:262
bool DeSerialize(bool swap, FILE *fp)
Definition: strngs.cpp:161
STRING DecodeLabels(const GenericVector< int > &labels)
bool DeSerialize(char *data, size_t count=1)
Definition: serialis.cpp:103
Definition: rect.h:34
const char * DecodeLabel(const GenericVector< int > &labels, int start, int *end, int *decoded)
virtual void SetRandomizer(TRand *randomizer)
Definition: network.cpp:138
Definition: statistc.h:33
static void Update()
Definition: scrollview.cpp:711
NetworkScratch scratch_space_
const double kCertOffset
void Set(int index, int value)
virtual int XScaleFactor() const
Definition: network.h:209
virtual StaticShape InputShape() const
Definition: network.h:127
void DebugActivationPath(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)
virtual bool Serialize(TFile *fp) const
Definition: network.cpp:151
UNICHARSET unicharset
Definition: ccutil.h:68
void DisplayForward(const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window)
void OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd)
double mean() const
Definition: statistc.cpp:134
void Text(int x, int y, const char *mystring)
Definition: scrollview.cpp:654
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
bool Load(const char *lang, TessdataManager *mgr)
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
Definition: network.cpp:306
void LabelsViaReEncode(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
void RecognizeLine(const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
Pix * ToPix() const
Definition: networkio.cpp:291
bool FinishLoad()
Definition: dict.cpp:323
bool Serialize(const char *data, size_t count=1)
Definition: serialis.cpp:147
void DisplayLSTMOutput(const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void LoadLSTM(const STRING &lang, TessdataManager *data_file)
Definition: dict.cpp:302
void SetupPassThrough(const UNICHARSET &unicharset)
void SetupForLoad(DawgCache *dawg_cache)
Definition: dict.cpp:201
void add(int32_t value, int32_t count)
Definition: statistc.cpp:100
int push_back(T object)
const double kDictRatio
float * f(int t)
Definition: networkio.h:115
static int DisplayImage(Pix *pix, ScrollView *window)
Definition: network.cpp:329
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, NetworkIO *input)
Definition: input.cpp:112
static const float kMinCertainty
Definition: recodebeam.h:222
Definition: strngs.h:45
void LabelsViaSimpleText(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
double sd() const
Definition: statistc.cpp:150
bool IsTraining() const
Definition: network.h:115
void truncate(int size)
bool IsValidFirstCode(int code) const
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
Definition: recodebeam.cpp:178
void Pen(Color color)
Definition: scrollview.cpp:722
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:383
bool Serialize(const TessdataManager *mgr, TFile *fp) const
int Width() const
Definition: networkio.h:107
bool Serialize(TFile *fp) const
int32_t get_total() const
Definition: statistc.h:86
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:534
RecodeBeamSearch * search_
int NumFeatures() const
Definition: networkio.h:111
static Pix * PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale)
Definition: input.cpp:84
void LabelsFromOutputs(const NetworkIO &outputs, GenericVector< int > *labels, GenericVector< int > *xcoords)
bool IsComponentAvailable(TessdataType type) const