15 #include "gmock/gmock.h"
18 using testing::ElementsAreArray;
22 class UnicharsetTest :
public ::testing::Test {
24 void SetUp()
override {
25 std::locale::global(std::locale(
""));
29 TEST(UnicharsetTest, Basics) {
33 EXPECT_EQ(u.
size(), 4);
35 EXPECT_EQ(u.
size(), 5);
37 EXPECT_EQ(u.
size(), 6);
41 EXPECT_EQ(u.
size(), 6);
43 EXPECT_EQ(u.
size(), 7);
45 EXPECT_EQ(u.
size(), 8);
53 EXPECT_TRUE(u.
encode_string(
"affine",
true, &labels,
nullptr,
nullptr));
54 std::vector<int> v(&labels[0], &labels[0] + labels.size());
55 EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6}));
59 u.
encode_string(lig_str.c_str(),
true, &labels,
nullptr,
nullptr));
62 u.
encode_string(lig_str.c_str(),
true, &labels,
nullptr,
nullptr));
63 v = std::vector<int>(&labels[0], &labels[0] + labels.size());
64 EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6}));
67 TEST(UnicharsetTest, Multibyte) {
74 EXPECT_EQ(u.
size(), 4);
76 EXPECT_EQ(u.
size(), 5);
78 EXPECT_EQ(u.
size(), 6);
80 EXPECT_EQ(u.
size(), 7);
82 EXPECT_EQ(u.
size(), 8);
84 EXPECT_EQ(u.
size(), 9);
97 EXPECT_TRUE(u.
encode_string(
"\u0627\u062c\u062c\u062f\u0635\u062b",
true,
98 &labels,
nullptr,
nullptr));
99 std::vector<int> v(&labels[0], &labels[0] + labels.size());
100 EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 8, 7}));
104 std::string src_str =
"\u0627\u062c\ufb01\u0635\u062b";
107 EXPECT_TRUE(u.
encode_string(cleaned.c_str(),
true, &labels, &lengths,
109 EXPECT_EQ(encoded_length, cleaned.size());
111 EXPECT_STREQ(len_str.c_str(),
"\002\002\002\002\002");
112 v = std::vector<int>(&labels[0], &labels[0] + labels.size());
113 EXPECT_THAT(v, ElementsAreArray({3, 4, 6, 8, 7}));
116 TEST(UnicharsetTest, MultibyteBigrams) {
123 EXPECT_EQ(u.
size(), 4);
125 EXPECT_EQ(u.
size(), 5);
127 EXPECT_EQ(u.
size(), 6);
129 EXPECT_EQ(u.
size(), 7);
132 EXPECT_EQ(u.
size(), 7);
135 EXPECT_EQ(u.
size(), 8);
140 fp.
Open(&data[0], data.size());
150 TEST(UnicharsetTest, OldStyle) {
156 LOG(
INFO) <<
"Filename=" << filename;
158 EXPECT_EQ(u.
size(), 111);