tesseract  5.0.0-alpha-619-ge9db
dawg_cache.cpp
Go to the documentation of this file.
1 // File: dawg_cache.cpp
3 // Description: A class that knows about loading and caching dawgs.
4 // Author: David Eger
5 //
6 // (C) Copyright 2012, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "dawg_cache.h"
20 
21 #include "dawg.h"
22 #include "object_cache.h"
23 #include <tesseract/strngs.h>
24 #include "tessdatamanager.h"
25 
26 namespace tesseract {
27 
28 struct DawgLoader {
29  DawgLoader(const STRING &lang, TessdataType tessdata_dawg_type,
30  int dawg_debug_level, TessdataManager *data_file)
31  : lang_(lang),
32  data_file_(data_file),
33  tessdata_dawg_type_(tessdata_dawg_type),
34  dawg_debug_level_(dawg_debug_level) {}
35 
36  Dawg *Load();
37 
42 };
43 
45  TessdataType tessdata_dawg_type,
46  int debug_level, TessdataManager *data_file) {
47  STRING data_id = data_file->GetDataFileName();
48  data_id += kTessdataFileSuffixes[tessdata_dawg_type];
49  DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file);
50  return dawgs_.Get(data_id, std::bind(&DawgLoader::Load, &loader));
51 }
52 
54  TFile fp;
55  if (!data_file_->GetComponent(tessdata_dawg_type_, &fp)) return nullptr;
56  DawgType dawg_type;
57  PermuterType perm_type;
58  switch (tessdata_dawg_type_) {
59  case TESSDATA_PUNC_DAWG:
61  dawg_type = DAWG_TYPE_PUNCTUATION;
62  perm_type = PUNC_PERM;
63  break;
66  dawg_type = DAWG_TYPE_WORD;
67  perm_type = SYSTEM_DAWG_PERM;
68  break;
71  dawg_type = DAWG_TYPE_NUMBER;
72  perm_type = NUMBER_PERM;
73  break;
75  dawg_type = DAWG_TYPE_WORD; // doesn't actually matter
76  perm_type = COMPOUND_PERM; // doesn't actually matter
77  break;
79  dawg_type = DAWG_TYPE_WORD;
80  perm_type = SYSTEM_DAWG_PERM;
81  break;
82  case TESSDATA_FREQ_DAWG:
83  dawg_type = DAWG_TYPE_WORD;
84  perm_type = FREQ_DAWG_PERM;
85  break;
86  default:
87  return nullptr;
88  }
89  auto *retval =
90  new SquishedDawg(dawg_type, lang_, perm_type, dawg_debug_level_);
91  if (retval->Load(&fp)) return retval;
92  delete retval;
93  return nullptr;
94 }
95 
96 } // namespace tesseract
strngs.h
tesseract::DAWG_TYPE_PUNCTUATION
Definition: dawg.h:67
tesseract::SquishedDawg
Definition: dawg.h:405
tesseract::TESSDATA_SYSTEM_DAWG
Definition: tessdatamanager.h:64
tesseract::TessdataManager
Definition: tessdatamanager.h:126
tesseract::TESSDATA_BIGRAM_DAWG
Definition: tessdatamanager.h:71
dawg_cache.h
SYSTEM_DAWG_PERM
Definition: ratngs.h:239
PermuterType
PermuterType
Definition: ratngs.h:230
tesseract::DAWG_TYPE_NUMBER
Definition: dawg.h:69
STRING
Definition: strngs.h:45
COMPOUND_PERM
Definition: ratngs.h:243
tesseract::DawgLoader::tessdata_dawg_type_
TessdataType tessdata_dawg_type_
Definition: dawg_cache.cpp:40
tesseract::TESSDATA_LSTM_SYSTEM_DAWG
Definition: tessdatamanager.h:76
tesseract::DAWG_TYPE_WORD
Definition: dawg.h:68
tesseract::DawgLoader::dawg_debug_level_
int dawg_debug_level_
Definition: dawg_cache.cpp:41
tesseract::TESSDATA_PUNC_DAWG
Definition: tessdatamanager.h:63
tesseract::DawgLoader::data_file_
TessdataManager * data_file_
Definition: dawg_cache.cpp:39
dawg.h
tesseract::DawgCache::GetSquishedDawg
Dawg * GetSquishedDawg(const STRING &lang, TessdataType tessdata_dawg_type, int debug_level, TessdataManager *data_file)
Definition: dawg_cache.cpp:44
tesseract::TessdataManager::GetComponent
bool GetComponent(TessdataType type, TFile *fp)
Definition: tessdatamanager.cpp:216
tesseract::TESSDATA_LSTM_NUMBER_DAWG
Definition: tessdatamanager.h:77
tesseract::TFile
Definition: serialis.h:75
tesseract
Definition: baseapi.h:65
tesseract::TessdataType
TessdataType
Definition: tessdatamanager.h:56
tesseract::DawgType
DawgType
Definition: dawg.h:66
tesseract::DawgLoader::lang_
STRING lang_
Definition: dawg_cache.cpp:38
tesseract::TESSDATA_UNAMBIG_DAWG
Definition: tessdatamanager.h:72
tesseract::TESSDATA_LSTM_PUNC_DAWG
Definition: tessdatamanager.h:75
tesseract::Dawg
Definition: dawg.h:113
tesseract::DawgLoader::Load
Dawg * Load()
Definition: dawg_cache.cpp:53
tesseract::TessdataManager::GetDataFileName
const STRING & GetDataFileName() const
Definition: tessdatamanager.h:186
tesseract::DawgLoader::DawgLoader
DawgLoader(const STRING &lang, TessdataType tessdata_dawg_type, int dawg_debug_level, TessdataManager *data_file)
Definition: dawg_cache.cpp:29
PUNC_PERM
Definition: ratngs.h:232
tesseract::DawgLoader
Definition: dawg_cache.cpp:28
object_cache.h
FREQ_DAWG_PERM
Definition: ratngs.h:242
tessdatamanager.h
tesseract::TESSDATA_NUMBER_DAWG
Definition: tessdatamanager.h:65
NUMBER_PERM
Definition: ratngs.h:237
tesseract::TESSDATA_FREQ_DAWG
Definition: tessdatamanager.h:66