tesseract
5.0.0-alpha-619-ge9db
dawg_cache.cpp
Go to the documentation of this file.
1
// File: dawg_cache.cpp
3
// Description: A class that knows about loading and caching dawgs.
4
// Author: David Eger
5
//
6
// (C) Copyright 2012, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
18
19
#include "
dawg_cache.h
"
20
21
#include "
dawg.h
"
22
#include "
object_cache.h
"
23
#include <
tesseract/strngs.h
>
24
#include "
tessdatamanager.h
"
25
26
namespace
tesseract
{
27
28
struct
DawgLoader
{
29
DawgLoader
(
const
STRING
&lang,
TessdataType
tessdata_dawg_type,
30
int
dawg_debug_level,
TessdataManager
*data_file)
31
:
lang_
(lang),
32
data_file_
(data_file),
33
tessdata_dawg_type_
(tessdata_dawg_type),
34
dawg_debug_level_
(dawg_debug_level) {}
35
36
Dawg
*
Load
();
37
38
STRING
lang_
;
39
TessdataManager
*
data_file_
;
40
TessdataType
tessdata_dawg_type_
;
41
int
dawg_debug_level_
;
42
};
43
44
Dawg
*
DawgCache::GetSquishedDawg
(
const
STRING
&lang,
45
TessdataType
tessdata_dawg_type,
46
int
debug_level,
TessdataManager
*data_file) {
47
STRING
data_id = data_file->
GetDataFileName
();
48
data_id += kTessdataFileSuffixes[tessdata_dawg_type];
49
DawgLoader
loader(lang, tessdata_dawg_type, debug_level, data_file);
50
return
dawgs_.Get(data_id, std::bind(&
DawgLoader::Load
, &loader));
51
}
52
53
Dawg
*
DawgLoader::Load
() {
54
TFile
fp;
55
if
(!
data_file_
->
GetComponent
(
tessdata_dawg_type_
, &fp))
return
nullptr
;
56
DawgType
dawg_type;
57
PermuterType
perm_type;
58
switch
(
tessdata_dawg_type_
) {
59
case
TESSDATA_PUNC_DAWG
:
60
case
TESSDATA_LSTM_PUNC_DAWG
:
61
dawg_type =
DAWG_TYPE_PUNCTUATION
;
62
perm_type =
PUNC_PERM
;
63
break
;
64
case
TESSDATA_SYSTEM_DAWG
:
65
case
TESSDATA_LSTM_SYSTEM_DAWG
:
66
dawg_type =
DAWG_TYPE_WORD
;
67
perm_type =
SYSTEM_DAWG_PERM
;
68
break
;
69
case
TESSDATA_NUMBER_DAWG
:
70
case
TESSDATA_LSTM_NUMBER_DAWG
:
71
dawg_type =
DAWG_TYPE_NUMBER
;
72
perm_type =
NUMBER_PERM
;
73
break
;
74
case
TESSDATA_BIGRAM_DAWG
:
75
dawg_type =
DAWG_TYPE_WORD
;
// doesn't actually matter
76
perm_type =
COMPOUND_PERM
;
// doesn't actually matter
77
break
;
78
case
TESSDATA_UNAMBIG_DAWG
:
79
dawg_type =
DAWG_TYPE_WORD
;
80
perm_type =
SYSTEM_DAWG_PERM
;
81
break
;
82
case
TESSDATA_FREQ_DAWG
:
83
dawg_type =
DAWG_TYPE_WORD
;
84
perm_type =
FREQ_DAWG_PERM
;
85
break
;
86
default
:
87
return
nullptr
;
88
}
89
auto
*retval =
90
new
SquishedDawg
(dawg_type,
lang_
, perm_type,
dawg_debug_level_
);
91
if
(retval->Load(&fp))
return
retval;
92
delete
retval;
93
return
nullptr
;
94
}
95
96
}
// namespace tesseract
strngs.h
tesseract::DAWG_TYPE_PUNCTUATION
Definition:
dawg.h:67
tesseract::SquishedDawg
Definition:
dawg.h:405
tesseract::TESSDATA_SYSTEM_DAWG
Definition:
tessdatamanager.h:64
tesseract::TessdataManager
Definition:
tessdatamanager.h:126
tesseract::TESSDATA_BIGRAM_DAWG
Definition:
tessdatamanager.h:71
dawg_cache.h
SYSTEM_DAWG_PERM
Definition:
ratngs.h:239
PermuterType
PermuterType
Definition:
ratngs.h:230
tesseract::DAWG_TYPE_NUMBER
Definition:
dawg.h:69
STRING
Definition:
strngs.h:45
COMPOUND_PERM
Definition:
ratngs.h:243
tesseract::DawgLoader::tessdata_dawg_type_
TessdataType tessdata_dawg_type_
Definition:
dawg_cache.cpp:40
tesseract::TESSDATA_LSTM_SYSTEM_DAWG
Definition:
tessdatamanager.h:76
tesseract::DAWG_TYPE_WORD
Definition:
dawg.h:68
tesseract::DawgLoader::dawg_debug_level_
int dawg_debug_level_
Definition:
dawg_cache.cpp:41
tesseract::TESSDATA_PUNC_DAWG
Definition:
tessdatamanager.h:63
tesseract::DawgLoader::data_file_
TessdataManager * data_file_
Definition:
dawg_cache.cpp:39
dawg.h
tesseract::DawgCache::GetSquishedDawg
Dawg * GetSquishedDawg(const STRING &lang, TessdataType tessdata_dawg_type, int debug_level, TessdataManager *data_file)
Definition:
dawg_cache.cpp:44
tesseract::TessdataManager::GetComponent
bool GetComponent(TessdataType type, TFile *fp)
Definition:
tessdatamanager.cpp:216
tesseract::TESSDATA_LSTM_NUMBER_DAWG
Definition:
tessdatamanager.h:77
tesseract::TFile
Definition:
serialis.h:75
tesseract
Definition:
baseapi.h:65
tesseract::TessdataType
TessdataType
Definition:
tessdatamanager.h:56
tesseract::DawgType
DawgType
Definition:
dawg.h:66
tesseract::DawgLoader::lang_
STRING lang_
Definition:
dawg_cache.cpp:38
tesseract::TESSDATA_UNAMBIG_DAWG
Definition:
tessdatamanager.h:72
tesseract::TESSDATA_LSTM_PUNC_DAWG
Definition:
tessdatamanager.h:75
tesseract::Dawg
Definition:
dawg.h:113
tesseract::DawgLoader::Load
Dawg * Load()
Definition:
dawg_cache.cpp:53
tesseract::TessdataManager::GetDataFileName
const STRING & GetDataFileName() const
Definition:
tessdatamanager.h:186
tesseract::DawgLoader::DawgLoader
DawgLoader(const STRING &lang, TessdataType tessdata_dawg_type, int dawg_debug_level, TessdataManager *data_file)
Definition:
dawg_cache.cpp:29
PUNC_PERM
Definition:
ratngs.h:232
tesseract::DawgLoader
Definition:
dawg_cache.cpp:28
object_cache.h
FREQ_DAWG_PERM
Definition:
ratngs.h:242
tessdatamanager.h
tesseract::TESSDATA_NUMBER_DAWG
Definition:
tessdatamanager.h:65
NUMBER_PERM
Definition:
ratngs.h:237
tesseract::TESSDATA_FREQ_DAWG
Definition:
tessdatamanager.h:66
src
dict
dawg_cache.cpp
Generated on Thu Jan 30 2020 14:22:20 for tesseract by
1.8.16