tesseract  5.0.0-alpha-619-ge9db
renderer.cpp
Go to the documentation of this file.
1 // File: renderer.cpp
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config_auto.h"
20 #endif
21 
22 #include <cstring>
23 #include <memory> // std::unique_ptr
24 #include <tesseract/baseapi.h>
26 #include <tesseract/renderer.h>
27 
28 namespace tesseract {
29 
30 /**********************************************************************
31  * Base Renderer interface implementation
32  **********************************************************************/
33 TessResultRenderer::TessResultRenderer(const char *outputbase,
34  const char* extension)
35  : file_extension_(extension),
36  title_(""), imagenum_(-1),
37  fout_(stdout),
38  next_(nullptr),
39  happy_(true) {
40  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
41  STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
42  fout_ = fopen(outfile.c_str(), "wb");
43  if (fout_ == nullptr) {
44  happy_ = false;
45  }
46  }
47 }
48 
50  if (fout_ != nullptr) {
51  if (fout_ != stdout)
52  fclose(fout_);
53  else
54  clearerr(fout_);
55  }
56  delete next_;
57 }
58 
60  if (next == nullptr) return;
61 
62  TessResultRenderer* remainder = next_;
63  next_ = next;
64  if (remainder) {
65  while (next->next_ != nullptr) {
66  next = next->next_;
67  }
68  next->next_ = remainder;
69  }
70 }
71 
72 bool TessResultRenderer::BeginDocument(const char* title) {
73  if (!happy_) return false;
74  title_ = title;
75  imagenum_ = -1;
76  bool ok = BeginDocumentHandler();
77  if (next_) {
78  ok = next_->BeginDocument(title) && ok;
79  }
80  return ok;
81 }
82 
84  if (!happy_) return false;
85  ++imagenum_;
86  bool ok = AddImageHandler(api);
87  if (next_) {
88  ok = next_->AddImage(api) && ok;
89  }
90  return ok;
91 }
92 
94  if (!happy_) return false;
95  bool ok = EndDocumentHandler();
96  if (next_) {
97  ok = next_->EndDocument() && ok;
98  }
99  return ok;
100 }
101 
102 void TessResultRenderer::AppendString(const char* s) {
103  AppendData(s, strlen(s));
104 }
105 
106 void TessResultRenderer::AppendData(const char* s, int len) {
107  if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
108  fflush(fout_);
109 }
110 
112  return happy_;
113 }
114 
116  return happy_;
117 }
118 
119 
120 /**********************************************************************
121  * UTF8 Text Renderer interface implementation
122  **********************************************************************/
123 TessTextRenderer::TessTextRenderer(const char *outputbase)
124  : TessResultRenderer(outputbase, "txt") {
125 }
126 
128  const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
129  if (utf8 == nullptr) {
130  return false;
131  }
132 
133  AppendString(utf8.get());
134 
135  const char* pageSeparator = api->GetStringVariable("page_separator");
136  if (pageSeparator != nullptr && *pageSeparator != '\0') {
137  AppendString(pageSeparator);
138  }
139 
140  return true;
141 }
142 
143 /**********************************************************************
144  * TSV Text Renderer interface implementation
145  **********************************************************************/
146 TessTsvRenderer::TessTsvRenderer(const char* outputbase)
147  : TessResultRenderer(outputbase, "tsv") {
148  font_info_ = false;
149 }
150 
151 TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
152  : TessResultRenderer(outputbase, "tsv") {
153  font_info_ = font_info;
154 }
155 
157  // Output TSV column headings
158  AppendString(
159  "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
160  "num\tleft\ttop\twidth\theight\tconf\ttext\n");
161  return true;
162 }
163 
164 bool TessTsvRenderer::EndDocumentHandler() { return true; }
165 
167  const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
168  if (tsv == nullptr) return false;
169 
170  AppendString(tsv.get());
171 
172  return true;
173 }
174 
175 /**********************************************************************
176  * UNLV Text Renderer interface implementation
177  **********************************************************************/
178 TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
179  : TessResultRenderer(outputbase, "unlv") {
180 }
181 
183  const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
184  if (unlv == nullptr) return false;
185 
186  AppendString(unlv.get());
187 
188  return true;
189 }
190 
191 /**********************************************************************
192  * BoxText Renderer interface implementation
193  **********************************************************************/
194 TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
195  : TessResultRenderer(outputbase, "box") {
196 }
197 
199  const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
200  if (text == nullptr) return false;
201 
202  AppendString(text.get());
203 
204  return true;
205 }
206 
207 #ifndef DISABLED_LEGACY_ENGINE
208 
209 /**********************************************************************
210  * Osd Text Renderer interface implementation
211  **********************************************************************/
212 TessOsdRenderer::TessOsdRenderer(const char* outputbase)
213  : TessResultRenderer(outputbase, "osd") {}
214 
216  char* osd = api->GetOsdText(imagenum());
217  if (osd == nullptr) return false;
218 
219  AppendString(osd);
220  delete[] osd;
221 
222  return true;
223 }
224 
225 #endif // ndef DISABLED_LEGACY_ENGINE
226 
227 } // namespace tesseract
tesseract::TessOsdRenderer::AddImageHandler
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:209
tesseract::TessResultRenderer::EndDocument
bool EndDocument()
Definition: renderer.cpp:92
tesseract::TessTsvRenderer::TessTsvRenderer
TessTsvRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:148
tesseract::TessResultRenderer::next
TessResultRenderer * next()
Definition: renderer.h:59
tesseract::TessTsvRenderer::AddImageHandler
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:163
STRING
Definition: strngs.h:45
tesseract::TessResultRenderer::AddImageHandler
virtual bool AddImageHandler(TessBaseAPI *api)=0
tesseract::TessBaseAPI::GetStringVariable
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:305
tesseract::TessResultRenderer::AppendString
void AppendString(const char *s)
Definition: renderer.cpp:101
tesseract::TessOsdRenderer::TessOsdRenderer
TessOsdRenderer(const char *outputbase)
Definition: renderer.cpp:206
tesseract::TessResultRenderer::AddImage
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:82
genericvector.h
baseapi.h
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::TessBaseAPI
Definition: baseapi.h:98
tesseract::TessBaseAPI::GetUNLVText
char * GetUNLVText()
Definition: baseapi.cpp:1571
tesseract::TessBoxTextRenderer::AddImageHandler
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:193
tesseract::TessResultRenderer::TessResultRenderer
TessResultRenderer(const char *outputbase, const char *extension)
Definition: renderer.cpp:32
tesseract::TessUnlvRenderer::AddImageHandler
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:178
tesseract::TessBaseAPI::GetBoxText
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1517
tesseract
Definition: baseapi.h:65
tesseract::TessUnlvRenderer::TessUnlvRenderer
TessUnlvRenderer(const char *outputbase)
Definition: renderer.cpp:174
tesseract::TessBaseAPI::GetTSVText
char * GetTSVText(int page_number)
Definition: baseapi.cpp:1380
tesseract::TessResultRenderer::title
const char * title() const
Definition: renderer.h:89
tesseract::TessResultRenderer::BeginDocument
bool BeginDocument(const char *title)
Definition: renderer.cpp:71
renderer.h
tesseract::TessResultRenderer::BeginDocumentHandler
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:110
tesseract::TessResultRenderer
Definition: renderer.h:49
tesseract::TessTextRenderer::AddImageHandler
bool AddImageHandler(TessBaseAPI *api) override
Definition: renderer.cpp:125
tesseract::TessResultRenderer::EndDocumentHandler
virtual bool EndDocumentHandler()
Definition: renderer.cpp:114
tesseract::TessTsvRenderer::EndDocumentHandler
bool EndDocumentHandler() override
Definition: renderer.cpp:161
tesseract::TessResultRenderer::imagenum
int imagenum() const
Definition: renderer.h:107
tesseract::TessBaseAPI::GetUTF8Text
char * GetUTF8Text()
Definition: baseapi.cpp:1348
tesseract::TessTextRenderer::TessTextRenderer
TessTextRenderer(const char *outputbase)
Definition: renderer.cpp:121
tesseract::TessBoxTextRenderer::TessBoxTextRenderer
TessBoxTextRenderer(const char *outputbase)
Definition: renderer.cpp:189
tesseract::Serialize
bool Serialize(FILE *fp, const char *data, size_t n=1)
Definition: serialis.cpp:73
tesseract::TessResultRenderer::insert
void insert(TessResultRenderer *next)
Definition: renderer.cpp:58
tesseract::TessTsvRenderer::BeginDocumentHandler
bool BeginDocumentHandler() override
Definition: renderer.cpp:153
tesseract::TessBaseAPI::GetOsdText
char * GetOsdText(int page_number)
Definition: baseapi.cpp:1714
tesseract::TessResultRenderer::AppendData
void AppendData(const char *s, int len)
Definition: renderer.cpp:105
tesseract::TessResultRenderer::~TessResultRenderer
virtual ~TessResultRenderer()
Definition: renderer.cpp:48