tesseract  5.0.0-alpha-619-ge9db
apiexample_test.cc
Go to the documentation of this file.
1 // File: apiexample_test.cc
3 // Description: Api Test for Tesseract using text fixtures and parameters.
4 // Tests for Devanagari, Latin and Arabic scripts are disabled by default.
5 // Disabled tests can be run when required by using the
6 // --gtest_also_run_disabled_tests argument.
7 // ./unittest/apiexample_test --gtest_also_run_disabled_tests
8 //
9 // Author: ShreeDevi Kumar
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 // http://www.apache.org/licenses/LICENSE-2.0
15 // Unless required by applicable law or agreed to in writing, software
16 // distributed under the License is distributed on an "AS IS" BASIS,
17 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 // See the License for the specific language governing permissions and
19 // limitations under the License.
21 
22 // expects clone of tessdata_fast repo in ../../tessdata_fast
23 
24 //#include "log.h"
25 #include <limits.h>
26 #include <time.h>
27 #include <fstream>
28 #include <iostream>
29 #include <locale>
30 #include <memory> // std::unique_ptr
31 #include <string>
32 #include <tesseract/baseapi.h>
33 #include "include_gunit.h"
34 #include "leptonica/allheaders.h"
35 
36 namespace {
37 
38 class QuickTest : public testing::Test {
39  protected:
40  virtual void SetUp() { start_time_ = time(nullptr); }
41  virtual void TearDown() {
42 #if defined(DEBUG)
43  // Debug builds can be very slow, so allow 4 min for OCR of a test image.
44  // apitest_example including disabled tests takes about 18 min on ARMv7.
45  const time_t MAX_SECONDS_FOR_TEST = 240;
46 #else
47  // Release builds typically need less than 10 s for OCR of a test image,
48  // apitest_example including disabled tests takes about 90 s on ARMv7.
49  const time_t MAX_SECONDS_FOR_TEST = 55;
50 #endif
51  const time_t end_time = time(nullptr);
52  EXPECT_TRUE(end_time - start_time_ <= MAX_SECONDS_FOR_TEST)
53  << "The test took too long - "
54  << ::testing::PrintToString(end_time - start_time_);
55  }
56  time_t start_time_;
57 };
58 
59 void OCRTester(const char* imgname, const char* groundtruth,
60  const char* tessdatadir, const char* lang) {
61  // log.info() << tessdatadir << " for language: " << lang << std::endl;
62  char* outText;
63  std::locale loc("C"); // You can also use "" for the default system locale
64  std::ifstream file(groundtruth);
65  file.imbue(loc); // Use it for file input
66  std::string gtText((std::istreambuf_iterator<char>(file)),
67  std::istreambuf_iterator<char>());
68  std::unique_ptr<tesseract::TessBaseAPI> api(new tesseract::TessBaseAPI());
69  ASSERT_FALSE(api->Init(tessdatadir, lang))
70  << "Could not initialize tesseract.";
71  Pix* image = pixRead(imgname);
72  ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
73  api->SetImage(image);
74  outText = api->GetUTF8Text();
75  EXPECT_EQ(gtText, outText)
76  << "Phototest.tif OCR does not match ground truth for "
77  << ::testing::PrintToString(lang);
78  api->End();
79  delete[] outText;
80  pixDestroy(&image);
81 }
82 
83 class MatchGroundTruth : public QuickTest,
84  public ::testing::WithParamInterface<const char*> {};
85 
86 TEST_P(MatchGroundTruth, FastPhototestOCR) {
87  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt",
88  TESSDATA_DIR "_fast", GetParam());
89 }
90 
91 TEST_P(MatchGroundTruth, BestPhototestOCR) {
92  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt",
93  TESSDATA_DIR "_best", GetParam());
94 }
95 
96 TEST_P(MatchGroundTruth, TessPhototestOCR) {
97  OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt",
98  TESSDATA_DIR, GetParam());
99 }
100 
101 INSTANTIATE_TEST_CASE_P(Eng, MatchGroundTruth, ::testing::Values("eng"));
102 INSTANTIATE_TEST_CASE_P(DISABLED_Latin, MatchGroundTruth,
103  ::testing::Values("script/Latin"));
104 INSTANTIATE_TEST_CASE_P(DISABLED_Deva, MatchGroundTruth,
105  ::testing::Values("script/Devanagari"));
106 INSTANTIATE_TEST_CASE_P(DISABLED_Arabic, MatchGroundTruth,
107  ::testing::Values("script/Arabic"));
108 
109 class EuroText : public QuickTest {};
110 
111 TEST_F(EuroText, FastLatinOCR) {
112  OCRTester(TESTING_DIR "/eurotext.tif", TESTING_DIR "/eurotext.txt",
113  TESSDATA_DIR "_fast", "script/Latin");
114 }
115 
116 // script/Latin for eurotext.tif does not match groundtruth
117 // for tessdata & tessdata_best.
118 // so do not test these here.
119 
120 } // namespace
string
std::string string
Definition: equationdetect_test.cc:21
include_gunit.h
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
baseapi.h
file
Definition: include_gunit.h:22
tesseract::TessBaseAPI
Definition: baseapi.h:98