tesseract  5.0.0-alpha-619-ge9db
baseapi_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 #include "absl/strings/ascii.h"
17 #include "absl/strings/str_cat.h"
18 #include "allheaders.h"
19 
20 #include "include_gunit.h"
21 #include "gmock/gmock-matchers.h"
22 
23 #include <tesseract/baseapi.h>
24 #include "cycletimer.h" // for CycleTimer
25 #include "log.h" // for LOG
26 #include "ocrblock.h" // for class BLOCK
27 #include "pageres.h"
28 
29 namespace {
30 
31 using ::testing::ContainsRegex;
32 using ::testing::HasSubstr;
33 
34 static const char* langs[] = {"eng", "vie", "hin", "ara", nullptr};
35 static const char* image_files[] = {"HelloGoogle.tif", "viet.tif", "raaj.tif",
36  "arabic.tif", nullptr};
37 static const char* gt_text[] = {"Hello Google", "\x74\x69\xe1\xba\xbf\x6e\x67",
38  "\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c",
39  "\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a",
40  nullptr};
41 
42 class FriendlyTessBaseAPI : public tesseract::TessBaseAPI {
43  FRIEND_TEST(TesseractTest, LSTMGeometryTest);
44 };
45 
46 std::string GetCleanedTextResult(tesseract::TessBaseAPI* tess, Pix* pix) {
47  tess->SetImage(pix);
48  char* result = tess->GetUTF8Text();
49  std::string ocr_result = result;
50  delete[] result;
51  absl::StripAsciiWhitespace(&ocr_result);
52  return ocr_result;
53 }
54 
55 // The fixture for testing Tesseract.
56 class TesseractTest : public testing::Test {
57  protected:
58  static std::string TestDataNameToPath(const std::string& name) {
59  return file::JoinPath(TESTING_DIR, name);
60  }
61  static std::string TessdataPath() {
62  return TESSDATA_DIR;
63  }
64 };
65 
66 // Tests that Tesseract gets exactly the right answer on phototest.
67 TEST_F(TesseractTest, BasicTesseractTest) {
69  std::string truth_text;
70  std::string ocr_text;
71  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
72  Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
73  CHECK(src_pix);
74  ocr_text = GetCleanedTextResult(&api, src_pix);
75  CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"),
76  &truth_text, file::Defaults()));
77  absl::StripAsciiWhitespace(&truth_text);
78  EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
79  pixDestroy(&src_pix);
80  } else {
81  // eng.traineddata not found.
82  GTEST_SKIP();
83  }
84 }
85 
86 // Test that api.GetComponentImages() will return a set of images for
87 // paragraphs even if text recognition was not run.
88 TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
90  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
92  api.SetVariable("paragraph_debug_level", "3");
93 #if 0 // TODO: b622.png is missing
94  Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
95  CHECK(src_pix);
96  api.SetImage(src_pix);
97  Boxa* para_boxes =
98  api.GetComponentImages(tesseract::RIL_PARA, true, nullptr, nullptr);
99  EXPECT_TRUE(para_boxes != nullptr);
100  Boxa* block_boxes =
101  api.GetComponentImages(tesseract::RIL_BLOCK, true, nullptr, nullptr);
102  EXPECT_TRUE(block_boxes != nullptr);
103  // TODO(eger): Get paragraphs out of this page pre-text.
104  EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
105  boxaDestroy(&block_boxes);
106  boxaDestroy(&para_boxes);
107  pixDestroy(&src_pix);
108 #endif
109  } else {
110  // eng.traineddata not found.
111  GTEST_SKIP();
112  }
113 }
114 
115 // We should get hOCR output and not seg fault, even if the api caller doesn't
116 // call SetInputName().
117 TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
119  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
120  // eng.traineddata not found.
121  GTEST_SKIP();
122  return;
123  }
124  Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
125  CHECK(src_pix);
126  api.SetImage(src_pix);
127  char* result = api.GetHOCRText(0);
128  EXPECT_TRUE(result != nullptr);
129  EXPECT_THAT(result, HasSubstr("Hello"));
130  EXPECT_THAT(result, HasSubstr("<div class='ocr_page'"));
131  delete[] result;
132  pixDestroy(&src_pix);
133 }
134 
135 // hOCR output should contain baseline info for upright textlines.
136 TEST_F(TesseractTest, HOCRContainsBaseline) {
138  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
139  // eng.traineddata not found.
140  GTEST_SKIP();
141  return;
142  }
143  Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
144  CHECK(src_pix);
145  api.SetInputName("HelloGoogle.tif");
146  api.SetImage(src_pix);
147  char* result = api.GetHOCRText(0);
148  EXPECT_TRUE(result != nullptr);
149  EXPECT_THAT(result, HasSubstr("Hello"));
150  EXPECT_THAT(result, ContainsRegex("<span class='ocr_line'[^>]* "
151  "baseline [-.0-9]+ [-.0-9]+"));
152  delete[] result;
153  pixDestroy(&src_pix);
154 }
155 
156 // A provided document we once misread "RICK SNYDER" as "FUCK SNYDER"
157 // causing a bit of an embarrassment. This was due to bad baseline fitting
158 // which has been addressed by both better baseline finding and by
159 // better algorithms to deal with baseline and xheight consistency.
160 TEST_F(TesseractTest, RickSnyderNotFuckSnyder) {
162  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
163  // eng.traineddata not found.
164  GTEST_SKIP();
165  return;
166  }
167  api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
168 #if 0 // TODO: rick_snyder.jpeg is missing
169  Pix* src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str());
170  CHECK(src_pix);
171  api.SetImage(src_pix);
172  char* result = api.GetHOCRText(0);
173  EXPECT_TRUE(result != nullptr);
174  EXPECT_THAT(result, Not(HasSubstr("FUCK")));
175  delete[] result;
176  pixDestroy(&src_pix);
177 #else
178  GTEST_SKIP();
179 #endif
180 }
181 
182 // Tests that Tesseract gets exactly the right answer on some page numbers.
183 TEST_F(TesseractTest, AdaptToWordStrTest) {
184 #ifdef DISABLED_LEGACY_ENGINE
185  // Skip test because TessBaseAPI::AdaptToWordStr is missing.
186  GTEST_SKIP();
187 #else
188  static const char* kTrainingPages[] = {
189  "136.tif", "256.tif", "410.tif", "432.tif", "540.tif",
190  "692.tif", "779.tif", "793.tif", "808.tif", "815.tif",
191  "12.tif", "12.tif", nullptr};
192  static const char* kTrainingText[] = {
193  "1 3 6", "2 5 6", "4 1 0", "4 3 2", "5 4 0", "6 9 2", "7 7 9",
194  "7 9 3", "8 0 8", "8 1 5", "1 2", "1 2", nullptr};
195  static const char* kTestPages[] = {"324.tif", "433.tif", "12.tif", nullptr};
196  static const char* kTestText[] = {"324", "433", "12", nullptr};
198  std::string truth_text;
199  std::string ocr_text;
200  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
201  // eng.traineddata not found.
202  GTEST_SKIP();
203  return;
204  }
205  api.SetVariable("matcher_sufficient_examples_for_prototyping", "1");
206  api.SetVariable("classify_class_pruner_threshold", "220");
207  // Train on the training text.
208  for (int i = 0; kTrainingPages[i] != nullptr; ++i) {
209  std::string image_file = TestDataNameToPath(kTrainingPages[i]);
210  Pix* src_pix = pixRead(image_file.c_str());
211  CHECK(src_pix);
212  api.SetImage(src_pix);
213  EXPECT_TRUE(
214  api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, kTrainingText[i]))
215  << "Failed to adapt to text \"" << kTrainingText[i] << "\" on image "
216  << image_file;
217  pixDestroy(&src_pix);
218  }
219  // Test the test text.
220  api.SetVariable("tess_bn_matching", "1");
222  for (int i = 0; kTestPages[i] != nullptr; ++i) {
223  Pix* src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
224  CHECK(src_pix);
225  ocr_text = GetCleanedTextResult(&api, src_pix);
226  absl::StripAsciiWhitespace(&truth_text);
227  EXPECT_STREQ(kTestText[i], ocr_text.c_str());
228  pixDestroy(&src_pix);
229  }
230 #endif
231 }
232 
233 // Tests that LSTM gets exactly the right answer on phototest.
234 TEST_F(TesseractTest, BasicLSTMTest) {
236  std::string truth_text;
237  std::string ocr_text;
238  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
239  // eng.traineddata not found.
240  GTEST_SKIP();
241  return;
242  }
243  Pix* src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
244  CHECK(src_pix);
245  ocr_text = GetCleanedTextResult(&api, src_pix);
246  CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"),
247  &truth_text, file::Defaults()));
248  absl::StripAsciiWhitespace(&truth_text);
249  EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
250  pixDestroy(&src_pix);
251 }
252 
253 // Test that LSTM's character bounding boxes are properly converted to
254 // Tesseract structures. Note that we can't guarantee that LSTM's
255 // character boxes fall completely within Tesseract's word box because
256 // the baseline denormalization/normalization transforms may introduce
257 // errors due to float/int conversions (e.g., see OUTLINE::move() in
258 // ccstruct/poutline.h) Instead, we do a loose check.
259 TEST_F(TesseractTest, LSTMGeometryTest) {
260 #ifdef DISABLED_LEGACY_ENGINE
261  // Skip test because TessBaseAPI::GetPageRes is missing.
262  GTEST_SKIP();
263 #else
264  Pix* src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
265  FriendlyTessBaseAPI api;
266  if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
267  // eng.traineddata not found.
268  GTEST_SKIP();
269  return;
270  }
271  api.SetImage(src_pix);
272  ASSERT_EQ(api.Recognize(nullptr), 0);
273 
274  const PAGE_RES* page_res = api.GetPageRes();
275  PAGE_RES_IT page_res_it(const_cast<PAGE_RES*>(page_res));
276  page_res_it.restart_page();
277  BLOCK* block = page_res_it.block()->block;
278  CHECK(block);
279 
280  // extract word and character boxes for each word
281  for (page_res_it.restart_page(); page_res_it.word() != nullptr;
282  page_res_it.forward()) {
283  WERD_RES* word = page_res_it.word();
284  CHECK(word);
285  CHECK(word->best_choice);
286  CHECK_GT(word->best_choice->length(), 0);
287  CHECK(word->word);
288  CHECK(word->box_word);
289  // tesseract's word box
290  TBOX tess_blob_box;
291  tess_blob_box = word->word->bounding_box();
292  tess_blob_box.rotate(block->re_rotation());
293  // verify that each of LSTM's character boxes lies close to within
294  // tesseract's word box
295  for (int i = 0; i < word->box_word->length(); ++i) {
296  TBOX lstm_blob_box = word->box_word->BlobBox(i);
297  // LSTM character box should not spill out of tesseract word box
298  // by more than a few pixels in any direction
299  EXPECT_LT(tess_blob_box.left() - lstm_blob_box.left(), 5);
300  EXPECT_LT(lstm_blob_box.right() - tess_blob_box.right(), 5);
301  EXPECT_LT(tess_blob_box.bottom() - lstm_blob_box.bottom(), 5);
302  EXPECT_LT(lstm_blob_box.top() - tess_blob_box.top(), 5);
303  }
304  }
305  pixDestroy(&src_pix);
306 #endif
307 }
308 
309 TEST_F(TesseractTest, InitConfigOnlyTest) {
310  // Languages for testing initialization.
311  const char* langs[] = {"eng", "chi_tra", "jpn", "vie"};
312  std::unique_ptr<tesseract::TessBaseAPI> api;
313  CycleTimer timer;
314  for (size_t i = 0; i < ARRAYSIZE(langs); ++i) {
315  api.reset(new tesseract::TessBaseAPI);
316  timer.Restart();
317  EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i],
319  timer.Stop();
320  LOG(INFO) << "Lang " << langs[i] << " took " << timer.GetInMs()
321  << "ms in regular init";
322  }
323  // Init variables to set for config-only initialization.
324  GenericVector<STRING> vars_vec, vars_values;
325  vars_vec.push_back(STRING("tessedit_init_config_only"));
326  vars_values.push_back(STRING("1"));
327  LOG(INFO) << "Switching to config only initialization:";
328  for (size_t i = 0; i < ARRAYSIZE(langs); ++i) {
329  api.reset(new tesseract::TessBaseAPI);
330  timer.Restart();
331  EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i],
332  tesseract::OEM_TESSERACT_ONLY, nullptr, 0, &vars_vec,
333  &vars_values, false));
334  timer.Stop();
335  LOG(INFO) << "Lang " << langs[i] << " took " << timer.GetInMs()
336  << "ms in config-only init";
337  }
338 }
339 
340 // Tests if two instances of Tesseract/LSTM can co-exist in the same thread.
341 // NOTE: This is not an exhaustive test and current support for multiple
342 // instances in Tesseract is fragile. This test is intended largely as a means
343 // of detecting and guarding against the existing support being possibly broken
344 // by future CLs. TessBaseAPI instances are initialized using the default
345 // OEM_DEFAULT mode.
346 TEST(TesseractInstanceTest, TestMultipleTessInstances) {
347  int num_langs = 0;
348  while (langs[num_langs] != nullptr) ++num_langs;
349 
350  const std::string kTessdataPath = TESSDATA_DIR;
351 
352  // Preload images and verify that OCR is correct on them individually.
353  std::vector<Pix*> pix(num_langs);
354  for (int i = 0; i < num_langs; ++i) {
355  SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i]));
356  std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
357  pix[i] = pixRead(path.c_str());
358  QCHECK(pix[i] != nullptr) << "Could not read " << path;
359 
361  EXPECT_EQ(0, tess.Init(kTessdataPath.c_str(), langs[i]));
362  std::string ocr_result = GetCleanedTextResult(&tess, pix[i]);
363  EXPECT_STREQ(gt_text[i], ocr_result.c_str());
364  }
365 
366  // Process the images in all pairwise combinations of associated languages.
367  std::string ocr_result[2];
368  for (int i = 0; i < num_langs; ++i) {
369  for (int j = i + 1; j < num_langs; ++j) {
370  tesseract::TessBaseAPI tess1, tess2;
371  tess1.Init(kTessdataPath.c_str(), langs[i]);
372  tess2.Init(kTessdataPath.c_str(), langs[j]);
373 
374  ocr_result[0] = GetCleanedTextResult(&tess1, pix[i]);
375  ocr_result[1] = GetCleanedTextResult(&tess2, pix[j]);
376 
377  EXPECT_FALSE(strcmp(gt_text[i], ocr_result[0].c_str()) ||
378  strcmp(gt_text[j], ocr_result[1].c_str()))
379  << "OCR failed on language pair " << langs[i] << "-" << langs[j];
380  }
381  }
382 
383  for (int i = 0; i < num_langs; ++i) pixDestroy(&pix[i]);
384 }
385 
386 // Tests whether Tesseract parameters are correctly set for the two instances.
387 TEST(TesseractInstanceTest, TestMultipleTessInstanceVariables) {
388  std::string illegal_name = "an_illegal_name";
389  std::string langs[2] = {"eng", "hin"};
390  std::string int_param_name = "tessedit_pageseg_mode";
391  int int_param[2] = {1, 2};
392  std::string int_param_str[2] = {"1", "2"};
393  std::string bool_param_name = "tessedit_ambigs_training";
394  bool bool_param[2] = {false, true};
395  std::string bool_param_str[2] = {"F", "T"};
396  std::string str_param_name = "tessedit_char_blacklist";
397  std::string str_param[2] = {"abc", "def"};
398  std::string double_param_name = "segment_penalty_dict_frequent_word";
399  std::string double_param_str[2] = {"0.01", "2"};
400  double double_param[2] = {0.01, 2};
401 
402  const std::string kTessdataPath = TESSDATA_DIR;
403 
404  tesseract::TessBaseAPI tess1, tess2;
405  for (int i = 0; i < 2; ++i) {
406  tesseract::TessBaseAPI* api = (i == 0) ? &tess1 : &tess2;
407  api->Init(kTessdataPath.c_str(), langs[i].c_str());
408  api->SetVariable(illegal_name.c_str(), "none");
409  api->SetVariable(int_param_name.c_str(), int_param_str[i].c_str());
410  api->SetVariable(bool_param_name.c_str(), bool_param_str[i].c_str());
411  api->SetVariable(str_param_name.c_str(), str_param[i].c_str());
412  api->SetVariable(double_param_name.c_str(), double_param_str[i].c_str());
413  }
414  for (int i = 0; i < 2; ++i) {
415  tesseract::TessBaseAPI* api = (i == 0) ? &tess1 : &tess2;
416  EXPECT_FALSE(api->GetStringVariable(illegal_name.c_str()));
417  int intvar;
418  EXPECT_TRUE(api->GetIntVariable(int_param_name.c_str(), &intvar));
419  EXPECT_EQ(int_param[i], intvar);
420  bool boolvar;
421  EXPECT_TRUE(api->GetBoolVariable(bool_param_name.c_str(), &boolvar));
422  EXPECT_EQ(bool_param[i], boolvar);
423  EXPECT_STREQ(str_param[i].c_str(),
424  api->GetStringVariable(str_param_name.c_str()));
425  double doublevar;
426  EXPECT_TRUE(api->GetDoubleVariable(double_param_name.c_str(), &doublevar));
427  EXPECT_EQ(double_param[i], doublevar);
428  }
429 }
430 
431 } // namespace
file::JoinPath
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:43
string
std::string string
Definition: equationdetect_test.cc:21
INFO
Definition: log.h:29
WERD_RES::box_word
tesseract::BoxWord * box_word
Definition: pageres.h:266
CHECK_OK
#define CHECK_OK(test)
Definition: include_gunit.h:62
pageres.h
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
tesseract::RIL_BLOCK
Definition: publictypes.h:217
tesseract::TessBaseAPI::GetComponentImages
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:697
TBOX::top
int16_t top() const
Definition: rect.h:57
STRING
Definition: strngs.h:45
tesseract::TessBaseAPI::GetIntVariable
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:289
file::Defaults
static int Defaults()
Definition: include_gunit.h:39
WERD_RES
Definition: pageres.h:160
ARRAYSIZE
#define ARRAYSIZE(arr)
Definition: include_gunit.h:53
tesseract::OEM_LSTM_ONLY
Definition: publictypes.h:267
tesseract::PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:168
tesseract::TessBaseAPI::GetStringVariable
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:305
include_gunit.h
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
file::GetContents
static bool GetContents(const std::string &filename, std::string *out, int)
Definition: include_gunit.h:31
tesseract::TessBaseAPI::Init
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:337
TBOX::rotate
void rotate(const FCOORD &vec)
Definition: rect.h:196
tesseract::PSM_SINGLE_WORD
Treat the image as a single word.
Definition: publictypes.h:170
CycleTimer::GetInMs
int64_t GetInMs() const
Definition: cycletimer.h:48
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
BLOCK
Definition: ocrblock.h:28
CHECK
#define CHECK(test)
Definition: include_gunit.h:57
tesseract::TessBaseAPI::SetVariable
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:277
tesseract::BoxWord::BlobBox
const TBOX & BlobBox(int index) const
Definition: boxword.h:83
baseapi.h
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::TessBaseAPI
Definition: baseapi.h:98
cycletimer.h
tesseract::TessBaseAPI::SetPageSegMode
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:506
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
CycleTimer::Stop
void Stop()
Definition: cycletimer.h:42
tesseract::TessBaseAPI::SetInputName
void SetInputName(const char *name)
Definition: baseapi.cpp:262
PAGE_RES
Definition: pageres.h:73
GenericVector< STRING >
PAGE_RES_IT
Definition: pageres.h:668
tesseract::TessBaseAPI::GetBoolVariable
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:297
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
tesseract::BoxWord::length
int length() const
Definition: boxword.h:82
TBOX::left
int16_t left() const
Definition: rect.h:71
ocrblock.h
CycleTimer
Definition: cycletimer.h:19
TBOX::right
int16_t right() const
Definition: rect.h:78
CycleTimer::Restart
void Restart()
Definition: cycletimer.h:37
tesseract::TessBaseAPI::GetDoubleVariable
bool GetDoubleVariable(const char *name, double *value) const
Definition: baseapi.cpp:311
WERD_RES::word
WERD * word
Definition: pageres.h:180
log.h
tesseract::TessBaseAPI::AdaptToWordStr
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
Definition: baseapi.cpp:1796
CHECK_GT
#define CHECK_GT(test, value)
Definition: include_gunit.h:59
LOG
Definition: cleanapi_test.cc:19
tesseract::TessBaseAPI::GetUTF8Text
char * GetUTF8Text()
Definition: baseapi.cpp:1348
BLOCK::re_rotation
FCOORD re_rotation() const
Definition: ocrblock.h:133
tesseract::OEM_TESSERACT_ONLY
Definition: publictypes.h:266
tesseract::RIL_PARA
Definition: publictypes.h:218
tesseract::TessBaseAPI::SetImage
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:571
tesseract::TessBaseAPI::GetHOCRText
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
Definition: hocrrenderer.cpp:147
TBOX
Definition: rect.h:33