tesseract  5.0.0-alpha-619-ge9db
textlineprojection_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <allheaders.h>
13 #include <string> // for std::string
14 
15 #include "absl/strings/str_format.h" // for absl::StrFormat
16 #include "include_gunit.h"
17 
18 #include <tesseract/baseapi.h>
19 #include "colfind.h"
20 #include "log.h" // for LOG
21 #include "mutableiterator.h"
22 #include <tesseract/osdetect.h>
23 #include "pageres.h"
24 #include "tesseractclass.h"
25 #include "textlineprojection.h"
26 
27 namespace {
28 
33 
34 // Minimum score for a STRONG_CHAIN textline.
35 // NOTE: Keep in sync with textlineprojection.cc.
36 const int kMinStrongTextValue = 6;
37 
38 // The fixture for testing Tesseract.
39 class TextlineProjectionTest : public testing::Test {
40  protected:
41  std::string OutputNameToPath(const std::string& name) {
42  return file::JoinPath(FLAGS_test_tmpdir, name);
43  }
44 
45  TextlineProjectionTest() {
46  src_pix_ = nullptr;
47  bin_pix_ = nullptr;
48  tesseract_ = nullptr;
49  finder_ = nullptr;
50  denorm_ = nullptr;
51  projection_ = nullptr;
52  }
53  virtual ~TextlineProjectionTest() {
54  pixDestroy(&src_pix_);
55  pixDestroy(&bin_pix_);
56  delete finder_;
57  delete tesseract_;
58  }
59 
60  void SetImage(const char* filename) {
61  pixDestroy(&src_pix_);
62  src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str());
63  api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
64  api_.SetPageSegMode(tesseract::PSM_AUTO_OSD);
65  api_.SetImage(src_pix_);
66  }
67 
68  // Ugly hacked-together function sets up projection_ and denorm_ by setting
69  // up for auto pagelayout, setting up a ColumnFinder, running it, and
70  // using accessors to get at the internal denorm and projection.
71  // If the coordinates have been rotated, the denorm should match
72  // correctly and transform coordinates back to the projection.
73  // We throw away all the blocks, blobs etc, and test the projection with
74  // the resultiterator from a separate BaseAPI run.
75  void SetupProjection() {
77  Tesseract* osd_tess = new Tesseract;
78  OSResults osr;
79  EXPECT_EQ(osd_tess->init_tesseract(TESSDATA_DIR, nullptr, "osd",
81  nullptr, nullptr, false, &mgr),
82  0);
83  tesseract_ = new Tesseract;
84  EXPECT_EQ(tesseract_->init_tesseract(TESSDATA_DIR, nullptr, "eng",
86  nullptr, nullptr, false, &mgr),
87  0);
88  bin_pix_ = api_.GetThresholdedImage();
89  *tesseract_->mutable_pix_binary() = pixClone(bin_pix_);
90  osd_tess->set_source_resolution(api_.tesseract()->source_resolution());
91  tesseract_->set_source_resolution(api_.tesseract()->source_resolution());
92  int width = pixGetWidth(bin_pix_);
93  int height = pixGetHeight(bin_pix_);
94  // First make a single block covering the whole image.
95  BLOCK* block = new BLOCK("", true, 0, 0, 0, 0, width, height);
96  block->set_right_to_left(false);
97  BLOCK_LIST src_blocks;
98  BLOCK_IT block_it(&src_blocks);
99  block_it.add_to_end(block);
100  Pix* photomask_pix = nullptr;
101  // The blocks made by the ColumnFinder. Moved to blocks before return.
102  BLOCK_LIST found_blocks;
103  TO_BLOCK_LIST temp_blocks;
104  finder_ = tesseract_->SetupPageSegAndDetectOrientation(
105  tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess, &osr, &temp_blocks,
106  &photomask_pix, nullptr);
107  TO_BLOCK_IT to_block_it(&temp_blocks);
108  TO_BLOCK* to_block = to_block_it.data();
109  denorm_ = finder_->denorm();
110  TO_BLOCK_LIST to_blocks;
111  BLOBNBOX_LIST diacritic_blobs;
112  EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block,
113  photomask_pix, nullptr, nullptr, nullptr,
114  &found_blocks, &diacritic_blobs, &to_blocks),
115  0);
116  projection_ = finder_->projection();
117  pixDestroy(&photomask_pix);
118  delete osd_tess;
119  }
120 
121  // Helper evaluates the given box, expects the result to be greater_than
122  // or !greater_than the target_value and provides diagnostics if not.
123  void EvaluateBox(const TBOX& box, bool greater_or_equal, int target_value,
124  const char* text, const char* message) {
125  int value = projection_->EvaluateBox(box, denorm_, false);
126  if (greater_or_equal != (value > target_value)) {
127  LOG(INFO) << absl::StrFormat(
128  "EvaluateBox too %s:%d vs %d for %s word '%s' at:",
129  greater_or_equal ? "low" : "high", value, target_value, message,
130  text);
131  box.print();
132  value = projection_->EvaluateBox(box, denorm_, true);
133  } else {
134  LOG(INFO) << absl::StrFormat("EvaluateBox OK(%d) for %s word '%s'",
135  value, message, text);
136  }
137  if (greater_or_equal) {
138  EXPECT_GE(value, target_value);
139  } else {
140  EXPECT_LT(value, target_value);
141  }
142  }
143 
144  // Helper evaluates the DistanceOfBoxFromBox function by expecting that
145  // box should be nearer to true_box than false_box.
146  void EvaluateDistance(const TBOX& box, const TBOX& true_box,
147  const TBOX& false_box, const char* text,
148  const char* message) {
149  int true_dist =
150  projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
151  int false_dist =
152  projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
153  if (false_dist <= true_dist) {
154  LOG(INFO) << absl::StrFormat(
155  "Distance wrong:%d vs %d for %s word '%s' at:",
156  false_dist, true_dist, message, text);
157  true_box.print();
158  projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
159  projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
160  } else {
161  LOG(INFO) << absl::StrFormat("Distance OK(%d vs %d) for %s word '%s'",
162  false_dist, true_dist, message, text);
163  }
164  }
165 
166  // Tests the projection on the word boxes of the given image.
167  // line_height is the cap + descender size of the text.
168  void VerifyBoxes(const char* imagefile, int line_height) {
169  SetImage(imagefile);
170  api_.Recognize(nullptr);
171  SetupProjection();
172  MutableIterator* it = api_.GetMutableIterator();
173  do {
174  char* text = it->GetUTF8Text(tesseract::RIL_WORD);
175  const PAGE_RES_IT* pr_it = it->PageResIt();
176  WERD_RES* word = pr_it->word();
177  // The word_box refers to the internal, possibly rotated, coords.
178  TBOX word_box = word->word->bounding_box();
179  bool small_word = word_box.height() * 1.5 < line_height;
180  bool tall_word = word_box.height() * 1.125 > line_height;
181  // We pad small and tall words differently because ascenders and
182  // descenders affect the position and size of the upper/lower boxes.
183  int padding;
184  if (small_word) {
185  padding = word_box.height();
186  } else if (tall_word) {
187  padding = word_box.height() / 3;
188  } else {
189  padding = word_box.height() / 2;
190  }
191  // Test that the word box gets a good score.
192  EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word");
193 
194  // Now test a displaced box, both above and below the word.
195  TBOX upper_box(word_box);
196  upper_box.set_bottom(word_box.top());
197  upper_box.set_top(word_box.top() + padding);
198  EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word");
199  EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical");
200  TBOX lower_box = word_box;
201  lower_box.set_top(word_box.bottom());
202  lower_box.set_bottom(word_box.bottom() - padding);
203  if (tall_word) lower_box.move(ICOORD(0, padding / 2));
204  EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
205  EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");
206 
207  // Since some words have no text below and some words have no text above
208  // check that at least one of the boxes satisfies BoxOutOfTextline.
209  bool upper_or_lower_out_of_textline =
210  projection_->BoxOutOfHTextline(upper_box, denorm_, false) ||
211  projection_->BoxOutOfHTextline(lower_box, denorm_, false);
212  if (!upper_or_lower_out_of_textline) {
213  projection_->BoxOutOfHTextline(upper_box, denorm_, true);
214  projection_->BoxOutOfHTextline(lower_box, denorm_, true);
215  }
216  EXPECT_TRUE(upper_or_lower_out_of_textline);
217 
218  // Now test DistanceOfBoxFromBox by faking a challenger word, and asking
219  // that each pad box be nearer to its true textline than the
220  // challenger. Due to the tight spacing of latin text, getting
221  // the right position and size of these test boxes is quite fiddly.
222  padding = line_height / 4;
223  upper_box.set_top(upper_box.bottom() + padding);
224  TBOX target_box(word_box);
225  if (!small_word) {
226  upper_box.move(ICOORD(0, -padding * 3 / 2));
227  }
228  target_box.set_top(upper_box.bottom());
229  TBOX upper_challenger(upper_box);
230  upper_challenger.set_bottom(upper_box.top());
231  upper_challenger.set_top(upper_box.top() + word_box.height());
232  EvaluateDistance(upper_box, target_box, upper_challenger, text,
233  "Upper Word");
234  if (tall_word) lower_box.move(ICOORD(0, padding / 2));
235  lower_box.set_bottom(lower_box.top() - padding);
236  target_box = word_box;
237  target_box.set_bottom(lower_box.top());
238  TBOX lower_challenger(lower_box);
239  lower_challenger.set_top(lower_box.bottom());
240  lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
241  EvaluateDistance(lower_box, target_box, lower_challenger, text,
242  "Lower Word");
243 
244  delete[] text;
245  } while (it->Next(tesseract::RIL_WORD));
246  delete it;
247  }
248 
249  Pix* src_pix_;
250  Pix* bin_pix_;
251  BLOCK_LIST blocks_;
252  std::string ocr_text_;
254  Tesseract* tesseract_;
255  ColumnFinder* finder_;
256  const DENORM* denorm_;
257  const TextlineProjection* projection_;
258 };
259 
260 // Tests all word boxes on an unrotated image.
261 TEST_F(TextlineProjectionTest, Unrotated) { VerifyBoxes("phototest.tif", 31); }
262 
263 // Tests character-level applyboxes on italic Times New Roman.
264 TEST_F(TextlineProjectionTest, Rotated) { VerifyBoxes("phototestrot.tif", 31); }
265 
266 } // namespace
file::JoinPath
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:43
string
std::string string
Definition: equationdetect_test.cc:21
INFO
Definition: log.h:29
TBOX::move
void move(const ICOORD vec)
Definition: rect.h:156
tesseract::RIL_WORD
Definition: publictypes.h:220
pageres.h
tesseract::TessdataManager
Definition: tessdatamanager.h:126
tesseractclass.h
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
ICOORD
integer coordinate
Definition: points.h:30
tesseract::Tesseract
Definition: tesseractclass.h:172
TBOX::print
void print() const
Definition: rect.h:277
BLOCK::set_right_to_left
void set_right_to_left(bool value)
Definition: ocrblock.h:81
TBOX::top
int16_t top() const
Definition: rect.h:57
OSResults
Definition: osdetect.h:50
TO_BLOCK
Definition: blobbox.h:691
TBOX::set_top
void set_top(int y)
Definition: rect.h:60
WERD_RES
Definition: pageres.h:160
include_gunit.h
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
tesseract::kMinStrongTextValue
const int kMinStrongTextValue
Definition: colpartition.cpp:59
TBOX::height
int16_t height() const
Definition: rect.h:107
mutableiterator.h
BLOCK
Definition: ocrblock.h:28
textlineprojection.h
colfind.h
baseapi.h
FLAGS_test_tmpdir
const char * FLAGS_test_tmpdir
Definition: include_gunit.h:20
tesseract::TessBaseAPI
Definition: baseapi.h:98
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::PSM_AUTO_OSD
Definition: publictypes.h:161
PAGE_RES_IT::word
WERD_RES * word() const
Definition: pageres.h:748
PAGE_RES_IT
Definition: pageres.h:668
tesseract::PSM_AUTO
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:164
tesseract::MutableIterator
Definition: mutableiterator.h:44
WERD_RES::word
WERD * word
Definition: pageres.h:180
log.h
LOG
Definition: cleanapi_test.cc:19
TBOX::set_bottom
void set_bottom(int y)
Definition: rect.h:67
tesseract::OEM_TESSERACT_ONLY
Definition: publictypes.h:266
osdetect.h
tesseract::TextlineProjection
Definition: textlineprojection.h:33
tesseract::ColumnFinder
Definition: colfind.h:50
TBOX
Definition: rect.h:33
DENORM
Definition: normalis.h:49