tesseract
5.0.0-alpha-619-ge9db
validate_khmer_test.cc
Go to the documentation of this file.
1
// (C) Copyright 2017, Google Inc.
2
// Licensed under the Apache License, Version 2.0 (the "License");
3
// you may not use this file except in compliance with the License.
4
// You may obtain a copy of the License at
5
// http://www.apache.org/licenses/LICENSE-2.0
6
// Unless required by applicable law or agreed to in writing, software
7
// distributed under the License is distributed on an "AS IS" BASIS,
8
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
// See the License for the specific language governing permissions and
10
// limitations under the License.
11
12
#include "
include_gunit.h
"
13
#include "
normstrngs.h
"
14
#include "
normstrngs_test.h
"
15
16
namespace
tesseract
{
17
namespace
{
18
19
// Test some random Khmer words.
20
TEST(ValidateKhmerTest, GoodKhmerWords) {
21
std::string
str =
"ព័ត៏មានប្លែកៗ"
;
22
ExpectGraphemeModeResults
(str,
UnicodeNormMode::kNFC
, 13, 12, 7, str);
23
str =
"ទំនុកច្រៀង"
;
24
ExpectGraphemeModeResults
(str,
UnicodeNormMode::kNFC
, 10, 9, 5, str);
25
str =
"កាលីហ្វូញ៉ា"
;
26
ExpectGraphemeModeResults
(str,
UnicodeNormMode::kNFC
, 11, 10, 4, str);
27
str =
"ចាប់ពីផ្លូវ"
;
28
ExpectGraphemeModeResults
(str,
UnicodeNormMode::kNFC
, 11, 10, 5, str);
29
}
30
31
// Test some random Khmer words with dotted circles.
32
TEST(ValidateKhmerTest, BadKhmerWords) {
33
std::string
result;
34
// Multiple dependent vowels not allowed
35
std::string
str =
"\u1796\u17b6\u17b7"
;
36
EXPECT_FALSE(
NormalizeUTF8String
(
UnicodeNormMode::kNFC
,
OCRNorm::kNone
,
37
GraphemeNorm::kNormalize
, str.c_str(),
38
&result));
39
// Multiple shifters not allowed
40
str =
"\u1798\u17c9\u17ca"
;
41
EXPECT_FALSE(
NormalizeUTF8String
(
UnicodeNormMode::kNFC
,
OCRNorm::kNone
,
42
GraphemeNorm::kNormalize
, str.c_str(),
43
&result));
44
// Multiple signs not allowed
45
str =
"\u1780\u17b6\u17cb\u17cd"
;
46
EXPECT_FALSE(
NormalizeUTF8String
(
UnicodeNormMode::kNFC
,
OCRNorm::kNone
,
47
GraphemeNorm::kNormalize
, str.c_str(),
48
&result));
49
}
50
51
}
// namespace
52
}
// namespace tesseract
tesseract::OCRNorm::kNone
string
std::string string
Definition:
equationdetect_test.cc:21
tesseract::NormalizeUTF8String
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition:
normstrngs.cpp:163
normstrngs_test.h
include_gunit.h
tesseract
Definition:
baseapi.h:65
tesseract::ExpectGraphemeModeResults
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
Definition:
normstrngs_test.h:48
normstrngs.h
tesseract::UnicodeNormMode::kNFC
tesseract::OCRNorm::kNormalize
unittest
validate_khmer_test.cc
Generated on Thu Jan 30 2020 14:22:22 for tesseract by
1.8.16