tesseract  5.0.0-alpha-619-ge9db
renderer.h
Go to the documentation of this file.
1 // File: renderer.h
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifndef TESSERACT_API_RENDERER_H_
19 #define TESSERACT_API_RENDERER_H_
20 
21 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
22 // complexity of includes here. Use forward declarations wherever possible
23 // and hide includes of complex types in baseapi.cpp.
24 #include <string> // for std::string
25 
26 #include "genericvector.h"
27 #include "platform.h"
28 #include "strngs.h" // for STRING
29 
30 struct Pix;
31 
32 namespace tesseract {
33 
34 class TessBaseAPI;
35 
50  public:
51  virtual ~TessResultRenderer();
52 
53  // Takes ownership of pointer so must be new'd instance.
54  // Renderers aren't ordered, but appends the sequences of next parameter
55  // and existing next(). The renderers should be unique across both lists.
56  void insert(TessResultRenderer* next);
57 
58  // Returns the next renderer or nullptr.
60  return next_;
61  }
62 
68  bool BeginDocument(const char* title);
69 
78  bool AddImage(TessBaseAPI* api);
79 
84  bool EndDocument();
85 
86  const char* file_extension() const {
87  return file_extension_;
88  }
89  const char* title() const {
90  return title_.c_str();
91  }
92 
93  // Is everything fine? Otherwise something went wrong.
94  bool happy() {
95  return happy_;
96  }
97 
107  int imagenum() const {
108  return imagenum_;
109  }
110 
111  protected:
122  TessResultRenderer(const char* outputbase, const char* extension);
123 
124  // Hook for specialized handling in BeginDocument()
125  virtual bool BeginDocumentHandler();
126 
127  // This must be overridden to render the OCR'd results
128  virtual bool AddImageHandler(TessBaseAPI* api) = 0;
129 
130  // Hook for specialized handling in EndDocument()
131  virtual bool EndDocumentHandler();
132 
133  // Renderers can call this to append '\0' terminated strings into
134  // the output string returned by GetOutput.
135  // This method will grow the output buffer if needed.
136  void AppendString(const char* s);
137 
138  // Renderers can call this to append binary byte sequences into
139  // the output string returned by GetOutput. Note that s is not necessarily
140  // '\0' terminated (and can contain '\0' within it).
141  // This method will grow the output buffer if needed.
142  void AppendData(const char* s, int len);
143 
144  private:
145  const char* file_extension_; // standard extension for generated output
146  STRING title_; // title of document being rendered
147  int imagenum_; // index of last image added
148 
149  FILE* fout_; // output file pointer
150  TessResultRenderer* next_; // Can link multiple renderers together
151  bool happy_; // I get grumpy when the disk fills up, etc.
152 };
153 
158  public:
159  explicit TessTextRenderer(const char* outputbase);
160 
161  protected:
162  bool AddImageHandler(TessBaseAPI* api) override;
163 };
164 
169  public:
170  explicit TessHOcrRenderer(const char* outputbase, bool font_info);
171  explicit TessHOcrRenderer(const char* outputbase);
172 
173  protected:
174  bool BeginDocumentHandler() override;
175  bool AddImageHandler(TessBaseAPI* api) override;
176  bool EndDocumentHandler() override;
177 
178  private:
179  bool font_info_; // whether to print font information
180 };
181 
186  public:
187  explicit TessAltoRenderer(const char* outputbase);
188 
189  protected:
190  bool BeginDocumentHandler() override;
191  bool AddImageHandler(TessBaseAPI* api) override;
192  bool EndDocumentHandler() override;
193 };
194 
199  public:
200  explicit TessTsvRenderer(const char* outputbase, bool font_info);
201  explicit TessTsvRenderer(const char* outputbase);
202 
203  protected:
204  bool BeginDocumentHandler() override;
205  bool AddImageHandler(TessBaseAPI* api) override;
206  bool EndDocumentHandler() override;
207 
208  private:
209  bool font_info_; // whether to print font information
210 };
211 
216  public:
217  // datadir is the location of the TESSDATA. We need it because
218  // we load a custom PDF font from this location.
219  TessPDFRenderer(const char* outputbase, const char* datadir,
220  bool textonly = false);
221 
222  protected:
223  bool BeginDocumentHandler() override;
224  bool AddImageHandler(TessBaseAPI* api) override;
225  bool EndDocumentHandler() override;
226 
227  private:
228  // We don't want to have every image in memory at once,
229  // so we store some metadata as we go along producing
230  // PDFs one page at a time. At the end, that metadata is
231  // used to make everything that isn't easily handled in a
232  // streaming fashion.
233  long int obj_; // counter for PDF objects
234  GenericVector<long int> offsets_; // offset of every PDF object in bytes
235  GenericVector<long int> pages_; // object number for every /Page object
236  std::string datadir_; // where to find the custom font
237  bool textonly_; // skip images if set
238  // Bookkeeping only. DIY = Do It Yourself.
239  void AppendPDFObjectDIY(size_t objectsize);
240  // Bookkeeping + emit data.
241  void AppendPDFObject(const char* data);
242  // Create the /Contents object for an entire page.
243  char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
244  // Turn an image into a PDF object. Only transcode if we have to.
245  static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
246  char** pdf_object, long int* pdf_object_size,
247  int jpg_quality);
248 };
249 
254  public:
255  explicit TessUnlvRenderer(const char* outputbase);
256 
257  protected:
258  bool AddImageHandler(TessBaseAPI* api) override;
259 };
260 
265  public:
266  explicit TessLSTMBoxRenderer(const char* outputbase);
267 
268  protected:
269  bool AddImageHandler(TessBaseAPI* api) override;
270 };
271 
276  public:
277  explicit TessBoxTextRenderer(const char* outputbase);
278 
279  protected:
280  bool AddImageHandler(TessBaseAPI* api) override;
281 };
282 
287  public:
288  explicit TessWordStrBoxRenderer(const char* outputbase);
289 
290  protected:
291  bool AddImageHandler(TessBaseAPI* api) override;
292 };
293 
294 #ifndef DISABLED_LEGACY_ENGINE
295 
300  public:
301  explicit TessOsdRenderer(const char* outputbase);
302 
303  protected:
304  bool AddImageHandler(TessBaseAPI* api) override;
305 };
306 
307 #endif // ndef DISABLED_LEGACY_ENGINE
308 
309 } // namespace tesseract.
310 
311 #endif // TESSERACT_API_RENDERER_H_
string
std::string string
Definition: equationdetect_test.cc:21
strngs.h
tesseract::TessTextRenderer
Definition: renderer.h:157
tesseract::TessResultRenderer::next
TessResultRenderer * next()
Definition: renderer.h:59
platform.h
STRING
Definition: strngs.h:45
tesseract::TessOsdRenderer
Definition: renderer.h:299
tesseract::TessBoxTextRenderer
Definition: renderer.h:275
tesseract::TessPDFRenderer
Definition: renderer.h:215
genericvector.h
tesseract::TessAltoRenderer
Definition: renderer.h:185
tesseract::TessBaseAPI
Definition: baseapi.h:98
tesseract
Definition: baseapi.h:65
tesseract::TessLSTMBoxRenderer
Definition: renderer.h:264
GenericVector< long int >
tesseract::TessResultRenderer::title
const char * title() const
Definition: renderer.h:89
tesseract::TessResultRenderer
Definition: renderer.h:49
tesseract::TessWordStrBoxRenderer
Definition: renderer.h:286
tesseract::TessUnlvRenderer
Definition: renderer.h:253
TESS_API
#define TESS_API
Definition: platform.h:54
tesseract::TessTsvRenderer
Definition: renderer.h:198
tesseract::TessResultRenderer::file_extension
const char * file_extension() const
Definition: renderer.h:86
tesseract::TessResultRenderer::imagenum
int imagenum() const
Definition: renderer.h:107
tesseract::TessResultRenderer::happy
bool happy()
Definition: renderer.h:94
TessBaseAPI
struct TessBaseAPI TessBaseAPI
Definition: capi.h:72
TessResultRenderer
struct TessResultRenderer TessResultRenderer
Definition: capi.h:71
tesseract::TessHOcrRenderer
Definition: renderer.h:168