tesseract  5.0.0-alpha-619-ge9db
validate_khmer_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include "include_gunit.h"
13 #include "normstrngs.h"
14 #include "normstrngs_test.h"
15 
16 namespace tesseract {
17 namespace {
18 
19 // Test some random Khmer words.
20 TEST(ValidateKhmerTest, GoodKhmerWords) {
21  std::string str = "ព័ត៏មានប្លែកៗ";
23  str = "ទំនុកច្រៀង";
25  str = "កាលីហ្វូញ៉ា";
27  str = "ចាប់ពីផ្លូវ";
29 }
30 
31 // Test some random Khmer words with dotted circles.
32 TEST(ValidateKhmerTest, BadKhmerWords) {
33  std::string result;
34  // Multiple dependent vowels not allowed
35  std::string str = "\u1796\u17b6\u17b7";
37  GraphemeNorm::kNormalize, str.c_str(),
38  &result));
39  // Multiple shifters not allowed
40  str = "\u1798\u17c9\u17ca";
42  GraphemeNorm::kNormalize, str.c_str(),
43  &result));
44  // Multiple signs not allowed
45  str = "\u1780\u17b6\u17cb\u17cd";
47  GraphemeNorm::kNormalize, str.c_str(),
48  &result));
49 }
50 
51 } // namespace
52 } // namespace tesseract
tesseract::OCRNorm::kNone
string
std::string string
Definition: equationdetect_test.cc:21
tesseract::NormalizeUTF8String
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:163
normstrngs_test.h
include_gunit.h
tesseract
Definition: baseapi.h:65
tesseract::ExpectGraphemeModeResults
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
Definition: normstrngs_test.h:48
normstrngs.h
tesseract::UnicodeNormMode::kNFC
tesseract::OCRNorm::kNormalize