tesseract  5.0.0-alpha-619-ge9db
shapetable_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <string>
13 #include <utility>
14 
15 #include "absl/strings/str_format.h" // for absl::StrFormat
16 
17 #include "include_gunit.h"
18 
19 #include <tesseract/serialis.h>
20 #include "shapetable.h"
21 #include "unicharset.h"
22 
23 namespace {
24 
25 #ifndef DISABLED_LEGACY_ENGINE
26 
27  using tesseract::Shape;
29 using tesseract::TFile;
31 
32 static std::string TmpNameToPath(const std::string& name) {
33  return file::JoinPath(FLAGS_test_tmpdir, name);
34 }
35 
36 // Sets up a simple shape with some unichars.
37 static void Setup352(int font_id, Shape* shape) {
38  shape->AddToShape(3, font_id);
39  shape->AddToShape(5, font_id);
40  shape->AddToShape(2, font_id);
41 }
42 
43 // Verifies some properties of the 352 shape.
44 static void Expect352(int font_id, const Shape& shape) {
45  EXPECT_EQ(3, shape.size());
46  EXPECT_TRUE(shape.ContainsUnichar(2));
47  EXPECT_TRUE(shape.ContainsUnichar(3));
48  EXPECT_TRUE(shape.ContainsUnichar(5));
49  EXPECT_FALSE(shape.ContainsUnichar(1));
50  EXPECT_TRUE(shape.ContainsUnicharAndFont(2, font_id));
51  EXPECT_FALSE(shape.ContainsUnicharAndFont(2, font_id - 1));
52  EXPECT_FALSE(shape.ContainsUnicharAndFont(font_id, 2));
53  // It should be a subset of itself.
54  EXPECT_TRUE(shape.IsSubsetOf(shape));
55 }
56 
57 #endif
58 
59 // The fixture for testing Shape.
60 class ShapeTest : public testing::Test {
61  protected:
62  void SetUp() {
63  std::locale::global(std::locale(""));
64  }
65 };
66 
67 // Tests that a Shape works as expected for all the basic functions.
68 TEST_F(ShapeTest, BasicTest) {
69 #ifdef DISABLED_LEGACY_ENGINE
70  // Skip test because Shape is missing.
71  GTEST_SKIP();
72 #else
73  Shape shape1;
74  EXPECT_EQ(0, shape1.size());
75  Setup352(101, &shape1);
76  Expect352(101, shape1);
77  // It should still work after file I/O.
78  std::string filename = TmpNameToPath("shapefile");
79  FILE* fp = fopen(filename.c_str(), "wb");
80  EXPECT_TRUE(fp != nullptr);
81  EXPECT_TRUE(shape1.Serialize(fp));
82  fclose(fp);
83  TFile tfp;
84  EXPECT_TRUE(tfp.Open(filename.c_str(), nullptr));
85  Shape shape2;
86  EXPECT_TRUE(shape2.DeSerialize(&tfp));
87  Expect352(101, shape2);
88  // They should be subsets of each other.
89  EXPECT_TRUE(shape1.IsSubsetOf(shape2));
90  EXPECT_TRUE(shape2.IsSubsetOf(shape1));
91  // They should be equal unichars.
92  EXPECT_TRUE(shape1.IsEqualUnichars(&shape2));
93  // and still pass afterwards.
94  Expect352(101, shape1);
95  Expect352(101, shape2);
96 #endif
97 }
98 
99 // Tests AddShape separately, as it takes quite a bit of work.
100 TEST_F(ShapeTest, AddShapeTest) {
101 #ifdef DISABLED_LEGACY_ENGINE
102  // Skip test because Shape is missing.
103  GTEST_SKIP();
104 #else
105  Shape shape1;
106  Setup352(101, &shape1);
107  Expect352(101, shape1);
108  // Now setup a different shape with different content.
109  Shape shape2;
110  shape2.AddToShape(3, 101); // Duplicates shape1.
111  shape2.AddToShape(5, 110); // Different font to shape1.
112  shape2.AddToShape(7, 101); // Different unichar to shape1.
113  // They should NOT be subsets of each other.
114  EXPECT_FALSE(shape1.IsSubsetOf(shape2));
115  EXPECT_FALSE(shape2.IsSubsetOf(shape1));
116  // Now add shape2 to shape1.
117  shape1.AddShape(shape2);
118  // Test subsets again.
119  EXPECT_FALSE(shape1.IsSubsetOf(shape2));
120  EXPECT_TRUE(shape2.IsSubsetOf(shape1));
121  EXPECT_EQ(4, shape1.size());
122  EXPECT_FALSE(shape1.ContainsUnichar(1));
123  EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 101));
124  EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 110));
125  EXPECT_FALSE(shape1.ContainsUnicharAndFont(3, 110));
126  EXPECT_FALSE(shape1.ContainsUnicharAndFont(7, 110));
127  EXPECT_FALSE(shape1.IsEqualUnichars(&shape2));
128 #endif
129 }
130 
131 // The fixture for testing Shape.
132 class ShapeTableTest : public testing::Test {};
133 
134 // Tests that a Shape works as expected for all the basic functions.
135 TEST_F(ShapeTableTest, FullTest) {
136 #ifdef DISABLED_LEGACY_ENGINE
137  // Skip test because Shape is missing.
138  GTEST_SKIP();
139 #else
140  Shape shape1;
141  Setup352(101, &shape1);
142  // Build a shape table with the same data, but in separate shapes.
143  UNICHARSET unicharset;
144  unicharset.unichar_insert(" ");
145  for (int i = 1; i <= 10; ++i) {
146  std::string class_str = absl::StrFormat("class%d", i);
147  unicharset.unichar_insert(class_str.c_str());
148  }
149  ShapeTable st(unicharset);
150  EXPECT_EQ(0, st.AddShape(3, 101));
151  EXPECT_EQ(1, st.AddShape(5, 101));
152  EXPECT_EQ(2, st.AddShape(2, 101));
153  EXPECT_EQ(3, st.NumShapes());
154  Expect352(101, shape1);
155  EXPECT_EQ(3, st.AddShape(shape1));
156  for (int i = 0; i < 3; ++i) {
157  EXPECT_FALSE(st.MutableShape(i)->IsEqualUnichars(&shape1));
158  }
159  EXPECT_TRUE(st.MutableShape(3)->IsEqualUnichars(&shape1));
160  EXPECT_TRUE(st.AnyMultipleUnichars());
161  st.DeleteShape(3);
162  EXPECT_FALSE(st.AnyMultipleUnichars());
163 
164  // Now merge to make a single shape like shape1.
165  EXPECT_EQ(1, st.MasterUnicharCount(0));
166  st.MergeShapes(0, 1);
167  EXPECT_EQ(3, st.MergedUnicharCount(1, 2));
168  st.MergeShapes(1, 2);
169  for (int i = 0; i < 3; ++i) {
170  EXPECT_EQ(3, st.MasterUnicharCount(i));
171  // Master font count is the sum of all the font counts in the shape, not
172  // the actual number of different fonts in the shape.
173  EXPECT_EQ(3, st.MasterFontCount(i));
174  }
175  EXPECT_EQ(0, st.MasterDestinationIndex(1));
176  EXPECT_EQ(0, st.MasterDestinationIndex(2));
177  ShapeTable st2;
178  st2.AppendMasterShapes(st, nullptr);
179  EXPECT_EQ(1, st.NumMasterShapes());
180  EXPECT_EQ(1, st2.NumShapes());
181  EXPECT_TRUE(st2.MutableShape(0)->IsEqualUnichars(&shape1));
182  EXPECT_TRUE(st2.AnyMultipleUnichars());
183 #endif
184 }
185 
186 } // namespace
file::JoinPath
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:43
string
std::string string
Definition: equationdetect_test.cc:21
tesseract::Shape
Definition: shapetable.h:184
tesseract::UnicharAndFonts
Definition: shapetable.h:159
include_gunit.h
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
FLAGS_test_tmpdir
const char * FLAGS_test_tmpdir
Definition: include_gunit.h:20
unicharset.h
shapetable.h
tesseract::TFile
Definition: serialis.h:75
UNICHARSET
Definition: unicharset.h:145
tesseract::ShapeTable
Definition: shapetable.h:261
serialis.h
UNICHARSET::unichar_insert
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
Definition: unicharset.cpp:625