tesseract  5.0.0-alpha-619-ge9db
layout_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <string>
13 #include <utility>
14 
15 #include "include_gunit.h"
16 
17 #include "allheaders.h"
18 #include <tesseract/baseapi.h>
19 #include "coutln.h"
20 #include "log.h" // for LOG
21 #include "mutableiterator.h"
22 #include "ocrblock.h" // for class BLOCK
23 #include "pageres.h"
24 #include "polyblk.h"
26 #include "stepblob.h"
27 
28 namespace {
29 
33 
35 static const char* kPolyBlockNames[] = {
36  "Unknown",
37  "Flowing Text",
38  "Heading Text",
39  "Pullout Text",
40  "Equation",
41  "Inline Equation",
42  "Table",
43  "Vertical Text",
44  "Caption Text",
45  "Flowing Image",
46  "Heading Image",
47  "Pullout Image",
48  "Horizontal Line",
49  "Vertical Line",
50  "Noise",
51  "" // End marker for testing that sizes match.
52 };
53 
54 const char* kStrings8087_054[] = {
55  "dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", nullptr};
56 const PolyBlockType kBlocks8087_054[] = {PT_HEADING_TEXT, PT_FLOWING_TEXT,
59 
60 // The fixture for testing Tesseract.
61 class LayoutTest : public testing::Test {
62  protected:
63  std::string TestDataNameToPath(const std::string& name) {
64  return file::JoinPath(TESTING_DIR, "/" + name);
65  }
66  std::string TessdataPath() {
67  return file::JoinPath(TESSDATA_DIR, "");
68  }
69 
70  LayoutTest() { src_pix_ = nullptr; }
71  ~LayoutTest() { pixDestroy(&src_pix_); }
72 
73  void SetImage(const char* filename, const char* lang) {
74  pixDestroy(&src_pix_);
75  src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
76  api_.Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY);
77  api_.SetPageSegMode(tesseract::PSM_AUTO);
78  api_.SetImage(src_pix_);
79  }
80 
81  // Tests reading order and block finding (very roughly) by iterating
82  // over the blocks, expecting that they contain the strings in order,
83  // allowing for other blocks in between.
84  // An empty string should match an image block, and a nullptr string
85  // indicates the end of the array.
86  void VerifyBlockTextOrder(const char* strings[], const PolyBlockType* blocks,
87  ResultIterator* it) {
88  it->Begin();
89  int string_index = 0;
90  int block_index = 0;
91  do {
92  char* block_text = it->GetUTF8Text(tesseract::RIL_BLOCK);
93  if (block_text != nullptr && it->BlockType() == blocks[string_index] &&
94  strstr(block_text, strings[string_index]) != nullptr) {
95  LOG(INFO) << "Found string " << strings[string_index]
96  << " in block " << block_index
97  << " of type " << kPolyBlockNames[blocks[string_index]] << "\n";
98  // Found this one.
99  ++string_index;
100  } else if (it->BlockType() == blocks[string_index] &&
101  block_text == nullptr && strings[string_index][0] == '\0') {
102  LOG(INFO) << "Found block of type " << kPolyBlockNames[blocks[string_index]]
103  << " at block " << block_index << "\n";
104  // Found this one.
105  ++string_index;
106  } else {
107  LOG(INFO) << "No match found in block with text:\n" << block_text;
108  }
109  delete[] block_text;
110  ++block_index;
111  if (strings[string_index] == nullptr) break;
112  } while (it->Next(tesseract::RIL_BLOCK));
113  EXPECT_TRUE(strings[string_index] == nullptr);
114  }
115 
116  // Tests that approximate order of the biggest text blocks is correct.
117  // Correctness is tested by the following simple rules:
118  // If a block overlaps its predecessor in x, then it must be below it.
119  // otherwise, if the block is not below its predecessor, then it must
120  // be to the left of it if right_to_left is true, or to the right otherwise.
121  void VerifyRoughBlockOrder(bool right_to_left, ResultIterator* it) {
122  int prev_left = 0;
123  int prev_right = 0;
124  int prev_bottom = 0;
125  it->Begin();
126  do {
127  int left, top, right, bottom;
128  if (it->BoundingBox(tesseract::RIL_BLOCK, &left, &top, &right, &bottom) &&
129  PTIsTextType(it->BlockType()) && right - left > 800 &&
130  bottom - top > 200) {
131  if (prev_right > prev_left) {
132  if (std::min(right, prev_right) > std::max(left, prev_left)) {
133  EXPECT_GE(top, prev_bottom) << "Overlapping block should be below";
134  } else if (top < prev_bottom) {
135  if (right_to_left) {
136  EXPECT_GE(prev_left, right) << "Block should be to the left";
137  } else {
138  EXPECT_GE(left, prev_right) << "Block should be to the right";
139  }
140  }
141  }
142  prev_left = left;
143  prev_right = right;
144  prev_bottom = bottom;
145  }
146  } while (it->Next(tesseract::RIL_BLOCK));
147  }
148 
149  // Tests that every blob assigned to the biggest text blocks is contained
150  // fully within its block by testing that the block polygon winds around
151  // the center of the bounding boxes of the outlines in the blob.
152  void VerifyTotalContainment(int winding_target, MutableIterator* it) {
153  it->Begin();
154  do {
155  int left, top, right, bottom;
156  if (it->BoundingBox(tesseract::RIL_BLOCK, &left, &top, &right, &bottom) &&
157  PTIsTextType(it->BlockType()) && right - left > 800 &&
158  bottom - top > 200) {
159  const PAGE_RES_IT* pr_it = it->PageResIt();
160  POLY_BLOCK* pb = pr_it->block()->block->pdblk.poly_block();
161  CHECK(pb != nullptr);
162  FCOORD skew = pr_it->block()->block->skew();
163  EXPECT_GT(skew.x(), 0.0f);
164  EXPECT_GT(skew.y(), 0.0f);
165  // Iterate the words in the block.
166  MutableIterator word_it = *it;
167  do {
168  const PAGE_RES_IT* w_it = word_it.PageResIt();
169  // Iterate the blobs in the word.
170  C_BLOB_IT b_it(w_it->word()->word->cblob_list());
171  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
172  C_BLOB* blob = b_it.data();
173  // Iterate the outlines in the blob.
174  C_OUTLINE_IT ol_it(blob->out_list());
175  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
176  C_OUTLINE* ol = ol_it.data();
177  TBOX box = ol->bounding_box();
178  ICOORD middle((box.left() + box.right()) / 2,
179  (box.top() + box.bottom()) / 2);
180  EXPECT_EQ(winding_target, pb->winding_number(middle));
181  }
182  }
183  } while (word_it.Next(tesseract::RIL_WORD) &&
184  !word_it.IsAtBeginningOf(tesseract::RIL_BLOCK));
185  }
186  } while (it->Next(tesseract::RIL_BLOCK));
187  }
188 
189  Pix* src_pix_;
190  std::string ocr_text_;
192 };
193 
194 // Tests that array sizes match their intended size.
195 TEST_F(LayoutTest, ArraySizeTest) {
196  int size = 0;
197  for (size = 0; kPolyBlockNames[size][0] != '\0'; ++size)
198  ;
199  EXPECT_EQ(size, PT_COUNT);
200 }
201 
202 // Tests that Tesseract gets the important blocks and in the right order
203 // on a UNLV page numbered 8087_054.3B.tif. (Dubrovnik)
204 TEST_F(LayoutTest, UNLV8087_054) {
205  SetImage("8087_054.3B.tif", "eng");
206  // Just run recognition.
207  EXPECT_EQ(api_.Recognize(nullptr), 0);
208  // Check iterator position.
209  tesseract::ResultIterator* it = api_.GetIterator();
210  VerifyBlockTextOrder(kStrings8087_054, kBlocks8087_054, it);
211  delete it;
212 }
213 
214 // Tests that Tesseract gets the important blocks and in the right order
215 // on GOOGLE:13510798882202548:74:84.sj-79.tif (Hebrew image)
216 // TODO: replace hebrew.png by Google image referred above
217 TEST_F(LayoutTest, HebrewOrderingAndSkew) {
218  SetImage("hebrew.png", "eng");
219  // Just run recognition.
220  EXPECT_EQ(api_.Recognize(nullptr), 0);
221  tesseract::MutableIterator* it = api_.GetMutableIterator();
222  // In eng mode, block order should not be RTL.
223  VerifyRoughBlockOrder(false, it);
224  VerifyTotalContainment(1, it);
225  delete it;
226  // Now try again using Hebrew.
227  SetImage("hebrew.png", "heb");
228  // Just run recognition.
229  EXPECT_EQ(api_.Recognize(nullptr), 0);
230  it = api_.GetMutableIterator();
231  // In heb mode, block order should be RTL.
232  VerifyRoughBlockOrder(true, it);
233  // And blobs should still be fully contained.
234  VerifyTotalContainment(-1, it);
235  delete it;
236 }
237 
238 } // namespace
file::JoinPath
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:43
string
std::string string
Definition: equationdetect_test.cc:21
INFO
Definition: log.h:29
tesseract::RIL_WORD
Definition: publictypes.h:220
pageres.h
BLOCK::skew
FCOORD skew() const
Definition: ocrblock.h:145
C_BLOB::out_list
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:69
tesseract::RIL_BLOCK
Definition: publictypes.h:217
PAGE_RES_IT::block
BLOCK_RES * block() const
Definition: pageres.h:754
FCOORD::y
float y() const
Definition: points.h:209
ICOORD
integer coordinate
Definition: points.h:30
FCOORD::x
float x() const
Definition: points.h:206
TBOX::top
int16_t top() const
Definition: rect.h:57
polyblk.h
PT_CAPTION_TEXT
Definition: capi.h:116
include_gunit.h
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
FCOORD
Definition: points.h:187
C_BLOB
Definition: stepblob.h:36
PT_COUNT
Definition: capi.h:123
C_OUTLINE
Definition: coutln.h:71
resultiterator.h
WERD::cblob_list
C_BLOB_LIST * cblob_list()
Definition: werd.h:94
mutableiterator.h
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
CHECK
#define CHECK(test)
Definition: include_gunit.h:57
baseapi.h
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
PT_HEADING_TEXT
Definition: capi.h:110
POLY_BLOCK::winding_number
int16_t winding_number(const ICOORD &test_pt)
Definition: polyblk.cpp:99
tesseract::TessBaseAPI
Definition: baseapi.h:98
stepblob.h
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::PageIteratorLevel
PageIteratorLevel
Definition: publictypes.h:216
coutln.h
PAGE_RES_IT::word
WERD_RES * word() const
Definition: pageres.h:748
PAGE_RES_IT
Definition: pageres.h:668
tesseract::ResultIterator
Definition: resultiterator.h:44
PT_PULLOUT_IMAGE
Definition: capi.h:119
PTIsTextType
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:81
BLOCK_RES::block
BLOCK * block
Definition: pageres.h:113
TBOX::left
int16_t left() const
Definition: rect.h:71
ocrblock.h
PT_FLOWING_TEXT
Definition: capi.h:109
C_OUTLINE::bounding_box
const TBOX & bounding_box() const
Definition: coutln.h:112
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::PSM_AUTO
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:164
tesseract::MutableIterator
Definition: mutableiterator.h:44
POLY_BLOCK
Definition: polyblk.h:26
WERD_RES::word
WERD * word
Definition: pageres.h:180
log.h
LOG
Definition: cleanapi_test.cc:19
PolyBlockType
PolyBlockType
Definition: publictypes.h:52
tesseract::OEM_TESSERACT_ONLY
Definition: publictypes.h:266
TBOX
Definition: rect.h:33