tesseract  5.0.0-alpha-619-ge9db
tessedit.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: tessedit.cpp (Formerly tessedit.c)
3  * Description: (Previously) Main program for merge of tess and editor.
4  * Now just code to load the language model and various
5  * engine-specific data files.
6  * Author: Ray Smith
7  *
8  * (C) Copyright 1992, Hewlett-Packard Ltd.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  *
19  **********************************************************************/
20 
21 // Include automatically generated configuration file if running autoconf.
22 #ifdef HAVE_CONFIG_H
23 # include "config_auto.h"
24 #endif
25 
26 #include "control.h"
27 # include "matchdefs.h"
28 #include "pageres.h"
29 #include "params.h"
30 #include "stopper.h"
31 #include "tesseractclass.h"
32 #include "tessvars.h"
33 #include "tprintf.h"
34 #ifndef DISABLED_LEGACY_ENGINE
35 # include "chop.h"
36 # include "intmatcher.h"
37 # include "reject.h"
38 #endif
39 #ifndef ANDROID_BUILD
40 # include "lstmrecognizer.h"
41 #endif
42 
43 namespace tesseract {
44 
45 // Read a "config" file containing a set of variable, value pairs.
46 // Searches the standard places: tessdata/configs, tessdata/tessconfigs
47 // and also accepts a relative or absolute path name.
48 void Tesseract::read_config_file(const char* filename,
49  SetParamConstraint constraint) {
50  STRING path = datadir;
51  path += "configs/";
52  path += filename;
53  FILE* fp;
54  if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
55  fclose(fp);
56  } else {
57  path = datadir;
58  path += "tessconfigs/";
59  path += filename;
60  if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
61  fclose(fp);
62  } else {
63  path = filename;
64  }
65  }
66  ParamUtils::ReadParamsFile(path.c_str(), constraint, this->params());
67 }
68 
69 // Returns false if a unicharset file for the specified language was not found
70 // or was invalid.
71 // This function initializes TessdataManager. After TessdataManager is
72 // no longer needed, TessdataManager::End() should be called.
73 //
74 // This function sets tessedit_oem_mode to the given OcrEngineMode oem, unless
75 // it is OEM_DEFAULT, in which case the value of the variable will be obtained
76 // from the language-specific config file (stored in [lang].traineddata), from
77 // the config files specified on the command line or left as the default
78 // OEM_TESSERACT_ONLY if none of the configs specify this variable.
80  const char* arg0, const char* textbase, const char* language,
81  OcrEngineMode oem, char** configs, int configs_size,
82  const GenericVector<STRING>* vars_vec,
83  const GenericVector<STRING>* vars_values, bool set_only_non_debug_params,
84  TessdataManager* mgr) {
85  // Set the basename, compute the data directory.
86  main_setup(arg0, textbase);
87 
88  // Set the language data path prefix
89  lang = language != nullptr ? language : "eng";
93 
94  // Initialize TessdataManager.
95  STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
96  if (!mgr->is_loaded() && !mgr->Init(tessdata_path.c_str())) {
97  tprintf("Error opening data file %s\n", tessdata_path.c_str());
98  tprintf(
99  "Please make sure the TESSDATA_PREFIX environment variable is set"
100  " to your \"tessdata\" directory.\n");
101  return false;
102  }
103 #ifndef DISABLED_LEGACY_ENGINE
104  if (oem == OEM_DEFAULT) {
105  // Set the engine mode from availability, which can then be overridden by
106  // the config file when we read it below.
107  if (!mgr->IsLSTMAvailable()) {
109  } else if (!mgr->IsBaseAvailable()) {
111  } else {
113  }
114  }
115 #endif // ndef DISABLED_LEGACY_ENGINE
116 
117  // If a language specific config file (lang.config) exists, load it in.
118  TFile fp;
119  if (mgr->GetComponent(TESSDATA_LANG_CONFIG, &fp)) {
121  this->params());
122  }
123 
124  SetParamConstraint set_params_constraint =
125  set_only_non_debug_params ? SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY
127  // Load tesseract variables from config files. This is done after loading
128  // language-specific variables from [lang].traineddata file, so that custom
129  // config files can override values in [lang].traineddata file.
130  for (int i = 0; i < configs_size; ++i) {
131  read_config_file(configs[i], set_params_constraint);
132  }
133 
134  // Set params specified in vars_vec (done after setting params from config
135  // files, so that params in vars_vec can override those from files).
136  if (vars_vec != nullptr && vars_values != nullptr) {
137  for (int i = 0; i < vars_vec->size(); ++i) {
138  if (!ParamUtils::SetParam((*vars_vec)[i].c_str(),
139  (*vars_values)[i].c_str(),
140  set_params_constraint, this->params())) {
141  tprintf("Warning: The parameter '%s' was not found.\n", (*vars_vec)[i].c_str());
142  }
143  }
144  }
145 
146  if (!tessedit_write_params_to_file.empty()) {
147  FILE* params_file = fopen(tessedit_write_params_to_file.c_str(), "wb");
148  if (params_file != nullptr) {
149  ParamUtils::PrintParams(params_file, this->params());
150  fclose(params_file);
151  } else {
152  tprintf("Failed to open %s for writing params.\n",
154  }
155  }
156 
157  // Determine which ocr engine(s) should be loaded and used for recognition.
158  if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
159 
160  // If we are only loading the config file (and so not planning on doing any
161  // recognition) then there's nothing else do here.
163  return true;
164  }
165 
166 // The various OcrEngineMode settings (see tesseract/publictypes.h) determine which
167 // engine-specific data files need to be loaded.
168 // If LSTM_ONLY is requested, the base Tesseract files are *Not* required.
169 #ifndef ANDROID_BUILD
170 # ifdef DISABLED_LEGACY_ENGINE
172 # else
175 # endif // ndef DISABLED_LEGACY_ENGINE
176  if (mgr->IsComponentAvailable(TESSDATA_LSTM)) {
177  lstm_recognizer_ = new LSTMRecognizer(language_data_path_prefix);
178  ASSERT_HOST(lstm_recognizer_->Load(
179  this->params(), lstm_use_matrix ? language : nullptr, mgr));
180  } else {
181  tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
183  }
184  }
185 #endif // ndef ANDROID_BUILD
186 
187  // Load the unicharset
189  // Avoid requiring a unicharset when we aren't running base tesseract.
190 #ifndef ANDROID_BUILD
191  unicharset.CopyFrom(lstm_recognizer_->GetUnicharset());
192 #endif // ndef ANDROID_BUILD
193  }
194 #ifndef DISABLED_LEGACY_ENGINE
195  else if (!mgr->GetComponent(TESSDATA_UNICHARSET, &fp) ||
196  !unicharset.load_from_file(&fp, false)) {
197  tprintf("Error: Tesseract (legacy) engine requested, but components are "
198  "not present in %s!!\n", tessdata_path.c_str());
199  return false;
200  }
201 #endif // ndef DISABLED_LEGACY_ENGINE
202  if (unicharset.size() > MAX_NUM_CLASSES) {
203  tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
204  return false;
205  }
206  right_to_left_ = unicharset.major_right_to_left();
207 
208 #ifndef DISABLED_LEGACY_ENGINE
209 
210  // Setup initial unichar ambigs table and read universal ambigs.
211  UNICHARSET encoder_unicharset;
212  encoder_unicharset.CopyFrom(unicharset);
214  unichar_ambigs.LoadUniversal(encoder_unicharset, &unicharset);
215 
216  if (!tessedit_ambigs_training && mgr->GetComponent(TESSDATA_AMBIGS, &fp)) {
217  unichar_ambigs.LoadUnicharAmbigs(encoder_unicharset, &fp,
220  }
221 
222  // Init ParamsModel.
223  // Load pass1 and pass2 weights (for now these two sets are the same, but in
224  // the future separate sets of weights can be generated).
226  ++p) {
227  language_model_->getParamsModel().SetPass(
228  static_cast<ParamsModel::PassEnum>(p));
229  if (mgr->GetComponent(TESSDATA_PARAMS_MODEL, &fp)) {
230  if (!language_model_->getParamsModel().LoadFromFp(lang.c_str(), &fp)) {
231  return false;
232  }
233  }
234  }
235 #endif // ndef DISABLED_LEGACY_ENGINE
236 
237  return true;
238 }
239 
240 // Helper returns true if the given string is in the vector of strings.
241 static bool IsStrInList(const STRING& str,
242  const GenericVector<STRING>& str_list) {
243  for (int i = 0; i < str_list.size(); ++i) {
244  if (str_list[i] == str) return true;
245  }
246  return false;
247 }
248 
249 // Parse a string of the form [~]<lang>[+[~]<lang>]*.
250 // Langs with no prefix get appended to to_load, provided they
251 // are not in there already.
252 // Langs with ~ prefix get appended to not_to_load, provided they are not in
253 // there already.
254 void Tesseract::ParseLanguageString(const char* lang_str,
255  GenericVector<STRING>* to_load,
256  GenericVector<STRING>* not_to_load) {
257  STRING remains(lang_str);
258  while (remains.length() > 0) {
259  // Find the start of the lang code and which vector to add to.
260  const char* start = remains.c_str();
261  while (*start == '+') ++start;
262  GenericVector<STRING>* target = to_load;
263  if (*start == '~') {
264  target = not_to_load;
265  ++start;
266  }
267  // Find the index of the end of the lang code in string start.
268  int end = strlen(start);
269  const char* plus = strchr(start, '+');
270  if (plus != nullptr && plus - start < end) end = plus - start;
271  STRING lang_code(start);
272  lang_code.truncate_at(end);
273  STRING next(start + end);
274  remains = next;
275  // Check whether lang_code is already in the target vector and add.
276  if (!IsStrInList(lang_code, *target)) {
277  target->push_back(lang_code);
278  }
279  }
280 }
281 
282 // Initialize for potentially a set of languages defined by the language
283 // string and recursively any additional languages required by any language
284 // traineddata file (via tessedit_load_sublangs in its config) that is loaded.
285 // See init_tesseract_internal for args.
286 int Tesseract::init_tesseract(const char* arg0, const char* textbase,
287  const char* language, OcrEngineMode oem,
288  char** configs, int configs_size,
289  const GenericVector<STRING>* vars_vec,
290  const GenericVector<STRING>* vars_values,
291  bool set_only_non_debug_params,
292  TessdataManager* mgr) {
293  GenericVector<STRING> langs_to_load;
294  GenericVector<STRING> langs_not_to_load;
295  ParseLanguageString(language, &langs_to_load, &langs_not_to_load);
296 
297  sub_langs_.delete_data_pointers();
298  sub_langs_.clear();
299  // Find the first loadable lang and load into this.
300  // Add any languages that this language requires
301  bool loaded_primary = false;
302  // Load the rest into sub_langs_.
303  for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) {
304  if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
305  const char* lang_str = langs_to_load[lang_index].c_str();
306  Tesseract* tess_to_init;
307  if (!loaded_primary) {
308  tess_to_init = this;
309  } else {
310  tess_to_init = new Tesseract;
311  }
312 
313  int result = tess_to_init->init_tesseract_internal(
314  arg0, textbase, lang_str, oem, configs, configs_size, vars_vec,
315  vars_values, set_only_non_debug_params, mgr);
316  // Forget that language, but keep any reader we were given.
317  mgr->Clear();
318 
319  if (!loaded_primary) {
320  if (result < 0) {
321  tprintf("Failed loading language '%s'\n", lang_str);
322  } else {
323  ParseLanguageString(tess_to_init->tessedit_load_sublangs.c_str(),
324  &langs_to_load, &langs_not_to_load);
325  loaded_primary = true;
326  }
327  } else {
328  if (result < 0) {
329  tprintf("Failed loading language '%s'\n", lang_str);
330  delete tess_to_init;
331  } else {
332  sub_langs_.push_back(tess_to_init);
333  // Add any languages that this language requires
334  ParseLanguageString(tess_to_init->tessedit_load_sublangs.c_str(),
335  &langs_to_load, &langs_not_to_load);
336  }
337  }
338  }
339  }
340  if (!loaded_primary) {
341  tprintf("Tesseract couldn't load any languages!\n");
342  return -1; // Couldn't load any language!
343  }
344 #ifndef DISABLED_LEGACY_ENGINE
345  if (!sub_langs_.empty()) {
346  // In multilingual mode word ratings have to be directly comparable,
347  // so use the same language model weights for all languages:
348  // use the primary language's params model if
349  // tessedit_use_primary_params_model is set,
350  // otherwise use default language model weights.
352  for (int s = 0; s < sub_langs_.size(); ++s) {
353  sub_langs_[s]->language_model_->getParamsModel().Copy(
354  this->language_model_->getParamsModel());
355  }
356  tprintf("Using params model of the primary language\n");
357  } else {
358  this->language_model_->getParamsModel().Clear();
359  for (int s = 0; s < sub_langs_.size(); ++s) {
360  sub_langs_[s]->language_model_->getParamsModel().Clear();
361  }
362  }
363  }
364 
366 #endif // ndef DISABLED_LEGACY_ENGINE
367  return 0;
368 }
369 
370 // Common initialization for a single language.
371 // arg0 is the datapath for the tessdata directory, which could be the
372 // path of the tessdata directory with no trailing /, or (if tessdata
373 // lives in the same directory as the executable, the path of the executable,
374 // hence the name arg0.
375 // textbase is an optional output file basename (used only for training)
376 // language is the language code to load.
377 // oem controls which engine(s) will operate on the image
378 // configs (argv) is an array of config filenames to load variables from.
379 // May be nullptr.
380 // configs_size (argc) is the number of elements in configs.
381 // vars_vec is an optional vector of variables to set.
382 // vars_values is an optional corresponding vector of values for the variables
383 // in vars_vec.
384 // If set_only_init_params is true, then only the initialization variables
385 // will be set.
386 int Tesseract::init_tesseract_internal(const char* arg0, const char* textbase,
387  const char* language, OcrEngineMode oem,
388  char** configs, int configs_size,
389  const GenericVector<STRING>* vars_vec,
390  const GenericVector<STRING>* vars_values,
391  bool set_only_non_debug_params,
392  TessdataManager* mgr) {
393  if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
394  configs_size, vars_vec, vars_values,
395  set_only_non_debug_params, mgr)) {
396  return -1;
397  }
399  return 0;
400  }
401  // If only LSTM will be used, skip loading Tesseract classifier's
402  // pre-trained templates and dictionary.
404  program_editup(textbase, init_tesseract ? mgr : nullptr,
405  init_tesseract ? mgr : nullptr);
406  return 0; // Normal exit
407 }
408 
409 #ifndef DISABLED_LEGACY_ENGINE
410 
411 // Helper builds the all_fonts table by adding new fonts from new_fonts.
412 static void CollectFonts(const UnicityTable<FontInfo>& new_fonts,
413  UnicityTable<FontInfo>* all_fonts) {
414  for (int i = 0; i < new_fonts.size(); ++i) {
415  // UnicityTable uniques as we go.
416  all_fonts->push_back(new_fonts.get(i));
417  }
418 }
419 
420 // Helper assigns an id to lang_fonts using the index in all_fonts table.
421 static void AssignIds(const UnicityTable<FontInfo>& all_fonts,
422  UnicityTable<FontInfo>* lang_fonts) {
423  for (int i = 0; i < lang_fonts->size(); ++i) {
424  int index = all_fonts.get_id(lang_fonts->get(i));
425  lang_fonts->get_mutable(i)->universal_id = index;
426  }
427 }
428 
429 // Set the universal_id member of each font to be unique among all
430 // instances of the same font loaded.
432  // Note that we can get away with bitwise copying FontInfo in
433  // all_fonts, as it is a temporary structure and we avoid setting the
434  // delete callback.
435  using namespace std::placeholders; // for _1, _2
436  UnicityTable<FontInfo> all_fonts;
437  all_fonts.set_compare_callback(std::bind(CompareFontInfo, _1, _2));
438 
439  // Create the universal ID table.
440  CollectFonts(get_fontinfo_table(), &all_fonts);
441  for (int i = 0; i < sub_langs_.size(); ++i) {
442  CollectFonts(sub_langs_[i]->get_fontinfo_table(), &all_fonts);
443  }
444  // Assign ids from the table to each font table.
445  AssignIds(all_fonts, &get_fontinfo_table());
446  for (int i = 0; i < sub_langs_.size(); ++i) {
447  AssignIds(all_fonts, &sub_langs_[i]->get_fontinfo_table());
448  }
449  font_table_size_ = all_fonts.size();
450 }
451 
452 // init the LM component
453 int Tesseract::init_tesseract_lm(const char* arg0, const char* textbase,
454  const char* language, TessdataManager* mgr) {
455  if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
456  nullptr, 0, nullptr, nullptr, false, mgr))
457  return -1;
459  getDict().Load(lang, mgr);
460  getDict().FinishLoad();
461  return 0;
462 }
463 
464 #endif // ndef DISABLED_LEGACY_ENGINE
465 
467 
468 /* Define command type identifiers */
469 
470 enum CMD_EVENTS {
472  RECOG_WERDS,
473  RECOG_PSEUDO,
475 };
476 } // namespace tesseract
UNICHARSET::load_from_file
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:378
tesseract::ParamUtils::ReadParamsFile
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:39
tesseract::Tesseract::init_tesseract
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:302
tesseract::CCUtil::datadir
STRING datadir
Definition: ccutil.h:53
tesseract::CCUtil::use_ambigs_for_adaption
bool use_ambigs_for_adaption
Definition: ccutil.h:73
pageres.h
tesseract::CMD_EVENTS
CMD_EVENTS
Definition: tessedit.cpp:486
tessvars.h
tesseract::Wordrec::end_recog
int end_recog()
Definition: tface.cpp:76
tesseract::OEM_TESSERACT_LSTM_COMBINED
Definition: publictypes.h:268
tesseractclass.h
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::TESSDATA_PARAMS_MODEL
Definition: tessdatamanager.h:73
control.h
params.h
chop.h
tesseract::Tesseract
Definition: tesseractclass.h:172
tesseract::SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY
Definition: params.h:52
tesseract::RECOG_WERDS
Definition: tessedit.cpp:488
STRING
Definition: strngs.h:45
tesseract::SetParamConstraint
SetParamConstraint
Definition: params.h:49
tesseract::UnicharAmbigs::LoadUnicharAmbigs
void LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambigs_file, int debug_level, bool use_ambigs_for_adaption, UNICHARSET *unicharset)
Definition: ambigs.cpp:75
tesseract::RECOG_PSEUDO
Definition: tessedit.cpp:489
tesseract::Dict::Load
void Load(const STRING &lang, TessdataManager *data_file)
Definition: dict.cpp:210
tesseract::ParamUtils::PrintParams
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:168
tesseract::OEM_LSTM_ONLY
Definition: publictypes.h:267
tesseract::Tesseract::read_config_file
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:64
stopper.h
tesseract::Wordrec::program_editup
void program_editup(const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
Definition: tface.cpp:54
UnicityTable::get_id
int get_id(T object) const
Definition: unicity_table.h:156
tesseract::Dict::GlobalDawgCache
static TESS_API DawgCache * GlobalDawgCache()
Definition: dict.cpp:184
tesseract::CCUtil::language_data_path_prefix
STRING language_data_path_prefix
Definition: ccutil.h:56
tesseract::LSTMRecognizer::GetUnicharset
const UNICHARSET & GetUnicharset() const
Definition: lstmrecognizer.h:132
tesseract::FontInfo::universal_id
int32_t universal_id
Definition: fontinfo.h:123
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
UnicityTable::push_back
int push_back(T object)
Add an element in the table.
Definition: unicity_table.h:168
tesseract::UnicharAmbigs::LoadUniversal
void LoadUniversal(const UNICHARSET &encoder_set, UNICHARSET *unicharset)
Definition: ambigs.cpp:68
tesseract::TESSDATA_UNICHARSET
Definition: tessdatamanager.h:58
tesseract::Classify::get_fontinfo_table
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:386
tesseract::OcrEngineMode
OcrEngineMode
Definition: publictypes.h:265
tesseract::Tesseract::init_tesseract_lm
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
Definition: tessedit.cpp:469
UNICHARSET::major_right_to_left
bool major_right_to_left() const
Definition: unicharset.cpp:952
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
tesseract::ACTION_2_CMD_EVENT
Definition: tessedit.cpp:490
tesseract::ParamsModel::PTRAIN_PASS1
Definition: params_model.h:35
UnicityTable::set_compare_callback
void set_compare_callback(std::function< bool(const T &, const T &)> cb)
Definition: unicity_table.h:74
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
MAX_NUM_CLASSES
#define MAX_NUM_CLASSES
Definition: matchdefs.h:29
tesseract::CCUtil::ambigs_debug_level
int ambigs_debug_level
Definition: ccutil.h:71
tesseract::Tesseract::end_tesseract
void end_tesseract()
Definition: tessedit.cpp:482
tesseract::Tesseract::tessedit_load_sublangs
char * tessedit_load_sublangs
Definition: tesseractclass.h:1051
tesseract::OEM_DEFAULT
Definition: publictypes.h:271
UNICHARSET
Definition: unicharset.h:145
tesseract::Dict::SetupForLoad
void SetupForLoad(DawgCache *dawg_cache)
Definition: dict.cpp:192
tesseract::Tesseract::getDict
Dict & getDict() override
Definition: tesseractclass.cpp:564
tesseract::Tesseract::tessedit_ambigs_training
bool tessedit_ambigs_training
Definition: tesseractclass.h:809
tesseract::Tesseract::init_tesseract_lang_data
bool init_tesseract_lang_data(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:95
tesseract::CCUtil::main_setup
void main_setup(const char *argv0, const char *basename)
CCUtil::main_setup - set location of tessdata and name of image.
Definition: mainblk.cpp:58
tesseract::CCUtil::lang
STRING lang
Definition: ccutil.h:55
lstmrecognizer.h
UnicityTable::get
const T & get(int id) const
Return the object from an id.
Definition: unicity_table.h:140
tesseract::TESSDATA_AMBIGS
Definition: tessdatamanager.h:59
tesseract
Definition: baseapi.h:65
tesseract::CCUtil::params
ParamsVectors * params()
Definition: ccutil.h:51
tesseract::Tesseract::tessedit_init_config_only
bool tessedit_init_config_only
Definition: tesseractclass.h:1064
tesseract::Tesseract::init_tesseract_internal
int init_tesseract_internal(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:402
tesseract::UnicharAmbigs::InitUnicharAmbigs
void InitUnicharAmbigs(const UNICHARSET &unicharset, bool use_ambigs_for_adaption)
Definition: ambigs.cpp:54
tprintf.h
GenericVector< STRING >
tesseract::TESSDATA_LANG_CONFIG
Definition: tessdatamanager.h:57
UnicityTable
Definition: fontinfo.h:30
tesseract::ParamsModel::PTRAIN_NUM_PASSES
Definition: params_model.h:38
tesseract::CCUtil::unichar_ambigs
UnicharAmbigs unichar_ambigs
Definition: ccutil.h:59
reject.h
UnicityTable::size
int size() const
Return the size used.
Definition: unicity_table.h:127
tesseract::Tesseract::Tesseract
Tesseract()
Definition: tesseractclass.cpp:52
tesseract::Dict::FinishLoad
bool FinishLoad()
Definition: dict.cpp:351
tesseract::ParamUtils::ReadParamsFromFp
static bool ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, ParamsVectors *member_params)
Definition: params.cpp:50
UnicityTable::get_mutable
T * get_mutable(int id)
Definition: unicity_table.h:145
tesseract::Tesseract::SetupUniversalFontIds
void SetupUniversalFontIds()
Definition: tessedit.cpp:447
tesseract::CompareFontInfo
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:122
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::ParamUtils::SetParam
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:79
tesseract::SET_PARAM_CONSTRAINT_NONE
Definition: params.h:50
tesseract::Tesseract::lstm_use_matrix
bool lstm_use_matrix
Definition: tesseractclass.h:895
tesseract::Wordrec::language_model_
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:471
tesseract::LSTMRecognizer::Load
bool Load(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
Definition: lstmrecognizer.cpp:77
tesseract::TESSDATA_LSTM
Definition: tessdatamanager.h:74
tesseract::Tesseract::tessedit_use_primary_params_model
bool tessedit_use_primary_params_model
Definition: tesseractclass.h:1053
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::OEM_TESSERACT_ONLY
Definition: publictypes.h:266
tesseract::Tesseract::tessedit_ocr_engine_mode
int tessedit_ocr_engine_mode
Definition: tesseractclass.h:802
matchdefs.h
tesseract::Tesseract::tessedit_write_params_to_file
char * tessedit_write_params_to_file
Definition: tesseractclass.h:819
UNICHARSET::size
int size() const
Definition: unicharset.h:341
UNICHARSET::CopyFrom
void CopyFrom(const UNICHARSET &src)
Definition: unicharset.cpp:447
tesseract::Tesseract::ParseLanguageString
void ParseLanguageString(const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load)
Definition: tessedit.cpp:270
tesseract::ACTION_1_CMD_EVENT
Definition: tessedit.cpp:487
intmatcher.h