tesseract  5.0.0-alpha-619-ge9db
validate_myanmar_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include "include_gunit.h"
13 #include "normstrngs.h"
14 #include "normstrngs_test.h"
15 
16 namespace tesseract {
17 namespace {
18 
19 // Test some random Myanmar words.
20 TEST(ValidateMyanmarTest, GoodMyanmarWords) {
21  std::string str = "လျှာကသိသည် "; // No viramas in this one.
23  str = "တုန္လႈပ္မႈ ";
25 }
26 
27 // Test some random Myanmar words with dotted circles.
28 TEST(ValidateMyanmarTest, BadMyanmarWords) {
29  std::string str = "က်န္းမာေရး";
30  std::vector<std::string> glyphs;
31  EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(
33  str.c_str(), &glyphs));
34  std::string result;
36  GraphemeNorm::kNormalize, str.c_str(),
37  &result));
38  // It works if the grapheme normalization is turned off.
40  GraphemeNorm::kNone, str.c_str(), &result));
41  EXPECT_EQ(str, result);
42  str = "ခုႏွစ္";
43  EXPECT_FALSE(NormalizeCleanAndSegmentUTF8(
45  true, str.c_str(), &glyphs));
47  GraphemeNorm::kNormalize, str.c_str(),
48  &result));
49  // It works if the grapheme normalization is turned off.
51  GraphemeNorm::kNone, str.c_str(), &result));
52  EXPECT_EQ(str, result);
53 }
54 
55 } // namespace
56 } // namespace tesseract
tesseract::OCRNorm::kNone
string
std::string string
Definition: equationdetect_test.cc:21
tesseract::NormalizeUTF8String
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:163
normstrngs_test.h
tesseract::NormalizeCleanAndSegmentUTF8
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
Definition: normstrngs.cpp:188
include_gunit.h
tesseract::GraphemeNormMode::kGlyphSplit
tesseract
Definition: baseapi.h:65
tesseract::ExpectGraphemeModeResults
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
Definition: normstrngs_test.h:48
normstrngs.h
tesseract::UnicodeNormMode::kNFC
tesseract::GraphemeNormMode::kCombined
tesseract::OCRNorm::kNormalize