tesseract  5.0.0-alpha-619-ge9db
tablefind_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <memory>
13 
14 #include "colpartition.h"
15 #include "colpartitiongrid.h"
16 #include "tablefind.h"
17 
18 #include "include_gunit.h"
19 
21 using tesseract::ColPartition_LIST;
23 
24 namespace {
25 
26 class TestableTableFinder : public tesseract::TableFinder {
27  public:
36 
37  void ExpectPartition(const TBOX& box) {
38  tesseract::ColPartitionGridSearch gsearch(&fragmented_text_grid_);
39  gsearch.SetUniqueMode(true);
40  gsearch.StartFullSearch();
41  ColPartition* part = nullptr;
42  bool found = false;
43  while ((part = gsearch.NextFullSearch()) != nullptr) {
44  if (part->bounding_box().left() == box.left() &&
45  part->bounding_box().bottom() == box.bottom() &&
46  part->bounding_box().right() == box.right() &&
47  part->bounding_box().top() == box.top()) {
48  found = true;
49  }
50  }
51  EXPECT_TRUE(found);
52  }
53  void ExpectPartitionCount(int expected_count) {
54  tesseract::ColPartitionGridSearch gsearch(&fragmented_text_grid_);
55  gsearch.SetUniqueMode(true);
56  gsearch.StartFullSearch();
57  ColPartition* part = nullptr;
58  int count = 0;
59  while ((part = gsearch.NextFullSearch()) != nullptr) {
60  ++count;
61  }
62  EXPECT_EQ(expected_count, count);
63  }
64 };
65 
66 class TableFinderTest : public testing::Test {
67  protected:
68  void SetUp() {
69  std::locale::global(std::locale(""));
70  free_boxes_it_.set_to_list(&free_boxes_);
71  finder_.reset(new TestableTableFinder());
72  finder_->Init(1, ICOORD(0, 0), ICOORD(500, 500));
73  // gap finding
74  finder_->set_global_median_xheight(5);
75  finder_->set_global_median_blob_width(5);
76  }
77 
78  void TearDown() {
79  if (partition_.get() != nullptr) partition_->DeleteBoxes();
80  DeletePartitionListBoxes();
81  finder_.reset(nullptr);
82  }
83 
84  void MakePartition(int x_min, int y_min, int x_max, int y_max) {
85  MakePartition(x_min, y_min, x_max, y_max, 0, 0);
86  }
87 
88  void MakePartition(int x_min, int y_min, int x_max, int y_max,
89  int first_column, int last_column) {
90  if (partition_.get() != nullptr) partition_->DeleteBoxes();
91  TBOX box;
92  box.set_to_given_coords(x_min, y_min, x_max, y_max);
93  partition_.reset(
94  ColPartition::FakePartition(box, PT_UNKNOWN, BRT_UNKNOWN, BTFT_NONE));
95  partition_->set_first_column(first_column);
96  partition_->set_last_column(last_column);
97  }
98 
99  void InsertTextPartition(ColPartition* part) {
100  finder_->InsertTextPartition(part);
101  free_boxes_it_.add_after_then_move(part);
102  }
103 
104  void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max) {
105  InsertLeaderPartition(x_min, y_min, x_max, y_max, 0, 0);
106  }
107 
108  void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max,
109  int first_column, int last_column) {
110  TBOX box;
111  box.set_to_given_coords(x_min, y_min, x_max, y_max);
112  ColPartition* part = ColPartition::FakePartition(box, PT_FLOWING_TEXT,
114  part->set_first_column(first_column);
115  part->set_last_column(last_column);
116  finder_->InsertLeaderPartition(part);
117  free_boxes_it_.add_after_then_move(part);
118  }
119 
120  void DeletePartitionListBoxes() {
121  for (free_boxes_it_.mark_cycle_pt(); !free_boxes_it_.cycled_list();
122  free_boxes_it_.forward()) {
123  ColPartition* part = free_boxes_it_.data();
124  part->DeleteBoxes();
125  }
126  }
127 
128  std::unique_ptr<TestableTableFinder> finder_;
129  std::unique_ptr<ColPartition> partition_;
130 
131  private:
132  tesseract::ColPartition_CLIST free_boxes_;
133  tesseract::ColPartition_C_IT free_boxes_it_;
134 };
135 
136 TEST_F(TableFinderTest, GapInXProjectionNoGap) {
137  int data[100];
138  for (int i = 0; i < 100; ++i) data[i] = 10;
139  EXPECT_FALSE(finder_->GapInXProjection(data, 100));
140 }
141 
142 TEST_F(TableFinderTest, GapInXProjectionEdgeGap) {
143  int data[100];
144  for (int i = 0; i < 10; ++i) data[i] = 2;
145  for (int i = 10; i < 90; ++i) data[i] = 10;
146  for (int i = 90; i < 100; ++i) data[i] = 2;
147  EXPECT_FALSE(finder_->GapInXProjection(data, 100));
148 }
149 
150 TEST_F(TableFinderTest, GapInXProjectionExists) {
151  int data[100];
152  for (int i = 0; i < 10; ++i) data[i] = 10;
153  for (int i = 10; i < 90; ++i) data[i] = 2;
154  for (int i = 90; i < 100; ++i) data[i] = 10;
155  EXPECT_TRUE(finder_->GapInXProjection(data, 100));
156 }
157 
158 TEST_F(TableFinderTest, HasLeaderAdjacentOverlapping) {
159  InsertLeaderPartition(90, 0, 150, 5);
160  MakePartition(0, 0, 100, 10);
161  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
162  MakePartition(0, 25, 100, 40);
163  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
164  MakePartition(145, 0, 200, 20);
165  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
166  MakePartition(40, 0, 50, 4);
167  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
168 }
169 
170 TEST_F(TableFinderTest, HasLeaderAdjacentNoOverlap) {
171  InsertLeaderPartition(90, 10, 150, 15);
172  MakePartition(0, 10, 85, 20);
173  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
174  MakePartition(0, 25, 100, 40);
175  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
176  MakePartition(0, 0, 100, 10);
177  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
178  // TODO(nbeato): is this a useful metric? case fails
179  // MakePartition(160, 0, 200, 15); // leader is primarily above it
180  // EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
181 }
182 
183 TEST_F(TableFinderTest, HasLeaderAdjacentPreservesColumns) {
184  InsertLeaderPartition(90, 0, 150, 5, 1, 2);
185  MakePartition(0, 0, 85, 10, 0, 0);
186  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
187  MakePartition(0, 0, 100, 10, 0, 1);
188  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
189  MakePartition(0, 0, 200, 10, 0, 5);
190  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
191  MakePartition(155, 0, 200, 10, 5, 5);
192  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
193 }
194 
195 // TODO(nbeato): Only testing a splitting case. Add more...
196 // Also test non-split cases.
197 TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicPass) {
198  finder_->set_global_median_blob_width(3);
199  finder_->set_global_median_xheight(10);
200 
201  TBOX part_box(10, 5, 100, 15);
202  ColPartition* all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
204  all->set_blob_type(BRT_TEXT);
205  all->set_flow(BTFT_CHAIN);
206  all->set_left_margin(10);
207  all->set_right_margin(100);
208  TBOX blob_box = part_box;
209  for (int i = 10; i <= 20; i += 5) {
210  blob_box.set_left(i + 1);
211  blob_box.set_right(i + 4);
212  all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
213  }
214  for (int i = 35; i <= 55; i += 5) {
215  blob_box.set_left(i + 1);
216  blob_box.set_right(i + 4);
217  all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
218  }
219  for (int i = 80; i <= 95; i += 5) {
220  blob_box.set_left(i + 1);
221  blob_box.set_right(i + 4);
222  all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
223  }
224  // TODO(nbeato): Ray's newer code...
225  // all->ClaimBoxes();
226  all->ComputeLimits(); // This is to make sure median iinfo is set.
227  InsertTextPartition(all); // This is to delete blobs
228  ColPartition* fragment_me = all->CopyButDontOwnBlobs();
229 
230  finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
231  finder_->ExpectPartition(TBOX(11, 5, 24, 15));
232  finder_->ExpectPartition(TBOX(36, 5, 59, 15));
233  finder_->ExpectPartition(TBOX(81, 5, 99, 15));
234  finder_->ExpectPartitionCount(3);
235 }
236 
237 TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicFail) {
238  finder_->set_global_median_blob_width(3);
239  finder_->set_global_median_xheight(10);
240 
241  TBOX part_box(10, 5, 100, 15);
242  ColPartition* all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
244  all->set_blob_type(BRT_TEXT);
245  all->set_flow(BTFT_CHAIN);
246  all->set_left_margin(10);
247  all->set_right_margin(100);
248  TBOX blob_box = part_box;
249  for (int i = 10; i <= 95; i += 5) {
250  blob_box.set_left(i + 1);
251  blob_box.set_right(i + 4);
252  all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
253  }
254  // TODO(nbeato): Ray's newer code...
255  // all->ClaimBoxes();
256  all->ComputeLimits(); // This is to make sure median iinfo is set.
257  InsertTextPartition(all); // This is to delete blobs
258  ColPartition* fragment_me = all->CopyButDontOwnBlobs();
259 
260  finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
261  finder_->ExpectPartition(TBOX(11, 5, 99, 15));
262  finder_->ExpectPartitionCount(1);
263 }
264 
265 } // namespace
TBOX
Definition: cleanapi_test.cc:19
tesseract::TableFinder
Definition: tablefind.h:130
tesseract::ColPartition::set_right_margin
void set_right_margin(int margin)
Definition: colpartition.h:121
C_BLOB::FakeBlob
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:236
tesseract::TableFinder::set_global_median_xheight
void set_global_median_xheight(int xheight)
Definition: tablefind.cpp:756
BTFT_NONE
Definition: blobbox.h:114
tesseract::TableFinder::set_global_median_ledding
void set_global_median_ledding(int ledding)
Definition: tablefind.cpp:762
tesseract::ColPartition::set_last_column
void set_last_column(int column)
Definition: colpartition.h:734
tesseract::ColPartition::set_first_column
void set_first_column(int column)
Definition: colpartition.h:731
BRT_UNKNOWN
Definition: blobbox.h:77
ICOORD
integer coordinate
Definition: points.h:30
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::TableFinder::set_global_median_blob_width
void set_global_median_blob_width(int width)
Definition: tablefind.cpp:759
tesseract::TableFinder::HasLeaderAdjacent
bool HasLeaderAdjacent(const ColPartition &part)
Definition: tablefind.cpp:946
colpartition.h
tesseract::TableFinder::InsertLeaderPartition
void InsertLeaderPartition(ColPartition *part)
Definition: tablefind.cpp:410
include_gunit.h
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
tesseract::TableFinder::SplitAndInsertFragmentedTextPartition
void SplitAndInsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:436
BLOBNBOX
Definition: blobbox.h:142
BTFT_CHAIN
Definition: blobbox.h:117
TBOX::set_to_given_coords
void set_to_given_coords(int x_min, int y_min, int x_max, int y_max)
Definition: rect.h:270
BTFT_LEADER
Definition: blobbox.h:120
tesseract::ColPartition
Definition: colpartition.h:67
tesseract::ColPartition::set_blob_type
void set_blob_type(BlobRegionType t)
Definition: colpartition.h:151
TBOX::set_right
void set_right(int x)
Definition: rect.h:81
BRT_TEXT
Definition: blobbox.h:79
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::GridSearch
Definition: bbgrid.h:48
PT_UNKNOWN
Definition: capi.h:108
tesseract::ColPartition::set_flow
void set_flow(BlobTextFlowType f)
Definition: colpartition.h:157
tesseract::ColPartition::set_type
void set_type(PolyBlockType t)
Definition: colpartition.h:184
tesseract::ColPartition::bounding_box
const TBOX & bounding_box() const
Definition: colpartition.h:109
count
int count(LIST var_list)
Definition: oldlist.cpp:79
tesseract::ColPartition::AddBox
void AddBox(BLOBNBOX *box)
Definition: colpartition.cpp:169
tesseract::ColPartition::ComputeLimits
void ComputeLimits()
Definition: colpartition.cpp:861
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::ColPartitionGrid
Definition: colpartitiongrid.h:32
PT_FLOWING_TEXT
Definition: capi.h:109
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::ColPartition::DeleteBoxes
void DeleteBoxes()
Definition: colpartition.cpp:305
tesseract::TableFinder::InsertTextPartition
void InsertTextPartition(ColPartition *part)
Definition: tablefind.cpp:394
tesseract::ColPartition::set_left_margin
void set_left_margin(int margin)
Definition: colpartition.h:115
tesseract::TableFinder::GapInXProjection
bool GapInXProjection(int *xprojection, int length)
Definition: tablefind.cpp:1768
tesseract::ColPartition::CopyButDontOwnBlobs
ColPartition * CopyButDontOwnBlobs()
Definition: colpartition.cpp:1758
tablefind.h
colpartitiongrid.h
TBOX::set_left
void set_left(int x)
Definition: rect.h:74
TBOX
Definition: rect.h:33