tesseract
5.0.0-alpha-619-ge9db
renderer.h
Go to the documentation of this file.
1
// File: renderer.h
3
// Description: Rendering interface to inject into TessBaseAPI
4
//
5
// (C) Copyright 2011, Google Inc.
6
// Licensed under the Apache License, Version 2.0 (the "License");
7
// you may not use this file except in compliance with the License.
8
// You may obtain a copy of the License at
9
// http://www.apache.org/licenses/LICENSE-2.0
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
//
17
18
#ifndef TESSERACT_API_RENDERER_H_
19
#define TESSERACT_API_RENDERER_H_
20
21
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
22
// complexity of includes here. Use forward declarations wherever possible
23
// and hide includes of complex types in baseapi.cpp.
24
#include <string>
// for std::string
25
26
#include "
genericvector.h
"
27
#include "
platform.h
"
28
#include "
strngs.h
"
// for STRING
29
30
struct
Pix;
31
32
namespace
tesseract
{
33
34
class
TessBaseAPI
;
35
49
class
TESS_API
TessResultRenderer
{
50
public
:
51
virtual
~
TessResultRenderer
();
52
53
// Takes ownership of pointer so must be new'd instance.
54
// Renderers aren't ordered, but appends the sequences of next parameter
55
// and existing next(). The renderers should be unique across both lists.
56
void
insert(
TessResultRenderer
* next);
57
58
// Returns the next renderer or nullptr.
59
TessResultRenderer
*
next
() {
60
return
next_;
61
}
62
68
bool
BeginDocument(
const
char
* title);
69
78
bool
AddImage(
TessBaseAPI
* api);
79
84
bool
EndDocument();
85
86
const
char
*
file_extension
()
const
{
87
return
file_extension_;
88
}
89
const
char
*
title
()
const
{
90
return
title_.c_str();
91
}
92
93
// Is everything fine? Otherwise something went wrong.
94
bool
happy
() {
95
return
happy_;
96
}
97
107
int
imagenum
()
const
{
108
return
imagenum_;
109
}
110
111
protected
:
122
TessResultRenderer
(
const
char
* outputbase,
const
char
* extension);
123
124
// Hook for specialized handling in BeginDocument()
125
virtual
bool
BeginDocumentHandler();
126
127
// This must be overridden to render the OCR'd results
128
virtual
bool
AddImageHandler(
TessBaseAPI
* api) = 0;
129
130
// Hook for specialized handling in EndDocument()
131
virtual
bool
EndDocumentHandler();
132
133
// Renderers can call this to append '\0' terminated strings into
134
// the output string returned by GetOutput.
135
// This method will grow the output buffer if needed.
136
void
AppendString(
const
char
* s);
137
138
// Renderers can call this to append binary byte sequences into
139
// the output string returned by GetOutput. Note that s is not necessarily
140
// '\0' terminated (and can contain '\0' within it).
141
// This method will grow the output buffer if needed.
142
void
AppendData(
const
char
* s,
int
len);
143
144
private
:
145
const
char
* file_extension_;
// standard extension for generated output
146
STRING
title_;
// title of document being rendered
147
int
imagenum_;
// index of last image added
148
149
FILE* fout_;
// output file pointer
150
TessResultRenderer
* next_;
// Can link multiple renderers together
151
bool
happy_;
// I get grumpy when the disk fills up, etc.
152
};
153
157
class
TESS_API
TessTextRenderer
:
public
TessResultRenderer
{
158
public
:
159
explicit
TessTextRenderer
(
const
char
* outputbase);
160
161
protected
:
162
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
163
};
164
168
class
TESS_API
TessHOcrRenderer
:
public
TessResultRenderer
{
169
public
:
170
explicit
TessHOcrRenderer
(
const
char
* outputbase,
bool
font_info);
171
explicit
TessHOcrRenderer
(
const
char
* outputbase);
172
173
protected
:
174
bool
BeginDocumentHandler()
override
;
175
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
176
bool
EndDocumentHandler()
override
;
177
178
private
:
179
bool
font_info_;
// whether to print font information
180
};
181
185
class
TESS_API
TessAltoRenderer
:
public
TessResultRenderer
{
186
public
:
187
explicit
TessAltoRenderer
(
const
char
* outputbase);
188
189
protected
:
190
bool
BeginDocumentHandler()
override
;
191
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
192
bool
EndDocumentHandler()
override
;
193
};
194
198
class
TESS_API
TessTsvRenderer
:
public
TessResultRenderer
{
199
public
:
200
explicit
TessTsvRenderer
(
const
char
* outputbase,
bool
font_info);
201
explicit
TessTsvRenderer
(
const
char
* outputbase);
202
203
protected
:
204
bool
BeginDocumentHandler()
override
;
205
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
206
bool
EndDocumentHandler()
override
;
207
208
private
:
209
bool
font_info_;
// whether to print font information
210
};
211
215
class
TESS_API
TessPDFRenderer
:
public
TessResultRenderer
{
216
public
:
217
// datadir is the location of the TESSDATA. We need it because
218
// we load a custom PDF font from this location.
219
TessPDFRenderer
(
const
char
* outputbase,
const
char
* datadir,
220
bool
textonly =
false
);
221
222
protected
:
223
bool
BeginDocumentHandler()
override
;
224
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
225
bool
EndDocumentHandler()
override
;
226
227
private
:
228
// We don't want to have every image in memory at once,
229
// so we store some metadata as we go along producing
230
// PDFs one page at a time. At the end, that metadata is
231
// used to make everything that isn't easily handled in a
232
// streaming fashion.
233
long
int
obj_;
// counter for PDF objects
234
GenericVector<long int>
offsets_;
// offset of every PDF object in bytes
235
GenericVector<long int>
pages_;
// object number for every /Page object
236
std::string
datadir_;
// where to find the custom font
237
bool
textonly_;
// skip images if set
238
// Bookkeeping only. DIY = Do It Yourself.
239
void
AppendPDFObjectDIY(
size_t
objectsize);
240
// Bookkeeping + emit data.
241
void
AppendPDFObject(
const
char
* data);
242
// Create the /Contents object for an entire page.
243
char
* GetPDFTextObjects(
TessBaseAPI
* api,
double
width,
double
height);
244
// Turn an image into a PDF object. Only transcode if we have to.
245
static
bool
imageToPDFObj(Pix* pix,
const
char
* filename,
long
int
objnum,
246
char
** pdf_object,
long
int
* pdf_object_size,
247
int
jpg_quality);
248
};
249
253
class
TESS_API
TessUnlvRenderer
:
public
TessResultRenderer
{
254
public
:
255
explicit
TessUnlvRenderer
(
const
char
* outputbase);
256
257
protected
:
258
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
259
};
260
264
class
TESS_API
TessLSTMBoxRenderer
:
public
TessResultRenderer
{
265
public
:
266
explicit
TessLSTMBoxRenderer
(
const
char
* outputbase);
267
268
protected
:
269
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
270
};
271
275
class
TESS_API
TessBoxTextRenderer
:
public
TessResultRenderer
{
276
public
:
277
explicit
TessBoxTextRenderer
(
const
char
* outputbase);
278
279
protected
:
280
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
281
};
282
286
class
TESS_API
TessWordStrBoxRenderer
:
public
TessResultRenderer
{
287
public
:
288
explicit
TessWordStrBoxRenderer
(
const
char
* outputbase);
289
290
protected
:
291
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
292
};
293
294
#ifndef DISABLED_LEGACY_ENGINE
295
299
class
TESS_API
TessOsdRenderer
:
public
TessResultRenderer
{
300
public
:
301
explicit
TessOsdRenderer
(
const
char
* outputbase);
302
303
protected
:
304
bool
AddImageHandler(
TessBaseAPI
* api)
override
;
305
};
306
307
#endif // ndef DISABLED_LEGACY_ENGINE
308
309
}
// namespace tesseract.
310
311
#endif // TESSERACT_API_RENDERER_H_
string
std::string string
Definition:
equationdetect_test.cc:21
strngs.h
tesseract::TessTextRenderer
Definition:
renderer.h:157
tesseract::TessResultRenderer::next
TessResultRenderer * next()
Definition:
renderer.h:59
platform.h
STRING
Definition:
strngs.h:45
tesseract::TessOsdRenderer
Definition:
renderer.h:299
tesseract::TessBoxTextRenderer
Definition:
renderer.h:275
tesseract::TessPDFRenderer
Definition:
renderer.h:215
genericvector.h
tesseract::TessAltoRenderer
Definition:
renderer.h:185
tesseract::TessBaseAPI
Definition:
baseapi.h:98
tesseract
Definition:
baseapi.h:65
tesseract::TessLSTMBoxRenderer
Definition:
renderer.h:264
GenericVector< long int >
tesseract::TessResultRenderer::title
const char * title() const
Definition:
renderer.h:89
tesseract::TessResultRenderer
Definition:
renderer.h:49
tesseract::TessWordStrBoxRenderer
Definition:
renderer.h:286
tesseract::TessUnlvRenderer
Definition:
renderer.h:253
TESS_API
#define TESS_API
Definition:
platform.h:54
tesseract::TessTsvRenderer
Definition:
renderer.h:198
tesseract::TessResultRenderer::file_extension
const char * file_extension() const
Definition:
renderer.h:86
tesseract::TessResultRenderer::imagenum
int imagenum() const
Definition:
renderer.h:107
tesseract::TessResultRenderer::happy
bool happy()
Definition:
renderer.h:94
TessBaseAPI
struct TessBaseAPI TessBaseAPI
Definition:
capi.h:72
TessResultRenderer
struct TessResultRenderer TessResultRenderer
Definition:
capi.h:71
tesseract::TessHOcrRenderer
Definition:
renderer.h:168
include
tesseract
renderer.h
Generated on Thu Jan 30 2020 14:22:19 for tesseract by
1.8.16