tesseract  5.0.0-alpha-619-ge9db
stringrenderer_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <memory>
13 #include <string>
14 
15 #include "absl/strings/str_split.h" // for absl::StrSplit
16 
17 #include "allheaders.h"
18 #include "boxchar.h"
19 #include "boxread.h"
20 #include "commandlineflags.h"
22 #include "include_gunit.h"
23 #include "stringrenderer.h"
24 #include <tesseract/strngs.h>
25 
26 BOOL_PARAM_FLAG(display, false, "Display image for inspection");
27 
28 // Flags defined in pango_font_info.cpp
29 DECLARE_BOOL_PARAM_FLAG(use_only_legacy_fonts);
30 DECLARE_STRING_PARAM_FLAG(fonts_dir);
31 DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir);
32 
33 namespace {
34 
35 const char kEngText[] = "the quick brown fox jumps over the lazy dog";
36 const char kHinText[] = "पिताने विवाह की | हो गई उद्विग्न वह सोचा";
37 
38 const char kKorText[] = "이는 것으로 다시 넣을 1234 수는 있지만 선택의 의미는";
39 const char kArabicText[] =
40  "والفكر والصراع ، بالتأمل والفهم والتحليل ، "
41  "بالعلم والفن ، وأخيرا بالضحك أوبالبكاء ، ";
42 const char kMixedText[] = "والفكر 123 والصراع abc";
43 
44 const char kEngNonLigatureText[] = "fidelity";
45 // Same as kEngNonLigatureText, but with "fi" replaced with its ligature.
46 const char kEngLigatureText[] = "fidelity";
47 
48 using tesseract::BoxChar;
50 
51 class StringRendererTest : public ::testing::Test {
52  protected:
53  void SetUp() override {
54  static std::locale system_locale("");
55  std::locale::global(system_locale);
56  }
57 
58  static void SetUpTestCase() {
59  l_chooseDisplayProg(L_DISPLAY_WITH_XZGV);
60  FLAGS_fonts_dir = TESTING_DIR;
61  FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
62 #ifdef GOOGLE_TESSERACT
63  FLAGS_use_only_legacy_fonts = false;
64  // Needed for reliable heapchecking of pango layout structures.
65  FLAGS_heap_check_max_pointer_offset = -1;
66 #endif
67  }
68 
69  void DisplayClusterBoxes(Pix* pix) {
70  if (!FLAGS_display) return;
71  const std::vector<BoxChar*>& boxchars = renderer_->GetBoxes();
72  Boxa* boxes = boxaCreate(0);
73  for (const auto& boxchar : boxchars) {
74  if (boxchar->box())
75  boxaAddBox(boxes, const_cast<Box*>(boxchar->box()), L_CLONE);
76  }
77  Pix* box_pix = pixDrawBoxaRandom(pix, boxes, 1);
78  boxaDestroy(&boxes);
79  pixDisplay(box_pix, 0, 0);
80  pixDestroy(&box_pix);
81  }
82  std::unique_ptr<StringRenderer> renderer_;
83 };
84 
85 TEST_F(StringRendererTest, DoesRenderToImage) {
86  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
87  Pix* pix = nullptr;
88  EXPECT_EQ(strlen(kEngText),
89  renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
90  EXPECT_TRUE(pix != nullptr);
91  EXPECT_GT(renderer_->GetBoxes().size(), 0);
92  DisplayClusterBoxes(pix);
93  pixDestroy(&pix);
94 
95  renderer_.reset(new StringRenderer("UnBatang 10", 600, 600));
96  EXPECT_EQ(strlen(kKorText),
97  renderer_->RenderToImage(kKorText, strlen(kKorText), &pix));
98  EXPECT_GT(renderer_->GetBoxes().size(), 0);
99  DisplayClusterBoxes(pix);
100  pixDestroy(&pix);
101 
102  renderer_.reset(new StringRenderer("Lohit Hindi 10", 600, 600));
103  EXPECT_EQ(strlen(kHinText),
104  renderer_->RenderToImage(kHinText, strlen(kHinText), &pix));
105  EXPECT_GT(renderer_->GetBoxes().size(), 0);
106  DisplayClusterBoxes(pix);
107  pixDestroy(&pix);
108 
109  // RTL text
110  renderer_.reset(new StringRenderer("Arab 10", 600, 600));
111  EXPECT_EQ(strlen(kArabicText),
112  renderer_->RenderToImage(kArabicText, strlen(kArabicText), &pix));
113  EXPECT_TRUE(pix != nullptr);
114  EXPECT_GT(renderer_->GetBoxes().size(), 0);
115  DisplayClusterBoxes(pix);
116  pixDestroy(&pix);
117 
118  // Mixed direction Arabic + english text
119  renderer_.reset(new StringRenderer("Arab 10", 600, 600));
120  EXPECT_EQ(strlen(kMixedText),
121  renderer_->RenderToImage(kMixedText, strlen(kMixedText), &pix));
122  EXPECT_TRUE(pix != nullptr);
123  EXPECT_GT(renderer_->GetBoxes().size(), 0);
124  DisplayClusterBoxes(pix);
125  pixDestroy(&pix);
126 }
127 
128 TEST_F(StringRendererTest, DoesRenderToImageWithUnderline) {
129  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
130  // Underline all words but NOT intervening spaces.
131  renderer_->set_underline_start_prob(1.0);
132  renderer_->set_underline_continuation_prob(0);
133  Pix* pix = nullptr;
134  EXPECT_EQ(strlen(kEngText),
135  renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
136  EXPECT_TRUE(pix != nullptr);
137  EXPECT_GT(renderer_->GetBoxes().size(), 0);
138  DisplayClusterBoxes(pix);
139  pixDestroy(&pix);
140  renderer_->ClearBoxes();
141 
142  // Underline all words AND intervening spaces.
143  renderer_->set_underline_start_prob(1.0);
144  renderer_->set_underline_continuation_prob(1.0);
145  EXPECT_EQ(strlen(kEngText),
146  renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
147  EXPECT_TRUE(pix != nullptr);
148  EXPECT_GT(renderer_->GetBoxes().size(), 0);
149  DisplayClusterBoxes(pix);
150  pixDestroy(&pix);
151  renderer_->ClearBoxes();
152 
153  // Underline words and intervening spaces with 0.5 prob.
154  renderer_->set_underline_start_prob(0.5);
155  renderer_->set_underline_continuation_prob(0.5);
156  EXPECT_EQ(strlen(kEngText),
157  renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
158  EXPECT_TRUE(pix != nullptr);
159  EXPECT_GT(renderer_->GetBoxes().size(), 0);
160  DisplayClusterBoxes(pix);
161  pixDestroy(&pix);
162 }
163 
164 TEST_F(StringRendererTest, DoesHandleNewlineCharacters) {
165  const char kRawText[] = "\n\n\n A \nB \nC \n\n\n";
166  const char kStrippedText[] = " A B C "; // text with newline chars removed
167  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
168  Pix* pix = nullptr;
169  EXPECT_EQ(strlen(kRawText),
170  renderer_->RenderToImage(kRawText, strlen(kRawText), &pix));
171  EXPECT_TRUE(pix != nullptr);
172  const std::vector<BoxChar*>& boxchars = renderer_->GetBoxes();
173  // 3 characters + 4 spaces => 7 boxes
174  EXPECT_EQ(7, boxchars.size());
175  if (boxchars.size() == 7) {
176  // Verify the text content of the boxchars
177  for (size_t i = 0; i < boxchars.size(); ++i) {
178  EXPECT_EQ(std::string(1, kStrippedText[i]), boxchars[i]->ch());
179  }
180  }
181  DisplayClusterBoxes(pix);
182  pixDestroy(&pix);
183 }
184 
185 TEST_F(StringRendererTest, DoesRenderLigatures) {
186  renderer_.reset(new StringRenderer("Arab 12", 600, 250));
187  const char kArabicLigature[] = "لا";
188 
189  Pix* pix = nullptr;
190  EXPECT_EQ(
191  strlen(kArabicLigature),
192  renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), &pix));
193  EXPECT_TRUE(pix != nullptr);
194  EXPECT_GT(renderer_->GetBoxes().size(), 0);
195  const std::vector<BoxChar*>& boxes = renderer_->GetBoxes();
196  EXPECT_EQ(1, boxes.size());
197  EXPECT_TRUE(boxes[0]->box() != nullptr);
198  EXPECT_STREQ(kArabicLigature, boxes[0]->ch().c_str());
199  DisplayClusterBoxes(pix);
200  pixDestroy(&pix);
201 
202  renderer_.reset(new StringRenderer("Arab 12", 600, 250));
203  const char kArabicMixedText[] = "والفكر والصراع 1234,\nوالفكر لا والصراع";
204  renderer_->RenderToImage(kArabicMixedText, strlen(kArabicMixedText), &pix);
205  DisplayClusterBoxes(pix);
206  pixDestroy(&pix);
207 }
208 
209 static int FindBoxCharXCoord(const std::vector<BoxChar*>& boxchars,
210  const std::string& ch) {
211  for (const auto& boxchar : boxchars) {
212  if (boxchar->ch() == ch) return boxchar->box()->x;
213  }
214  return INT_MAX;
215 }
216 
217 TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) {
218  renderer_.reset(new StringRenderer("Arab 10", 600, 600));
219  Pix* pix = nullptr;
220  // Arabic letters should be in decreasing x-coordinates
221  const char kArabicWord[] = "\u0644\u0627\u0641\u0643\u0631";
222  const std::string kRevWord = "\u0631\u0643\u0641\u0627\u0644";
223  renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
224  std::string boxes_str = renderer_->GetBoxesStr();
225  // Decode to get the box text strings.
226  EXPECT_FALSE(boxes_str.empty());
227  GenericVector<STRING> texts;
228  EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts,
229  nullptr, nullptr));
230  std::string ltr_str;
231  for (int i = 0; i < texts.size(); ++i) {
232  ltr_str += texts[i].c_str();
233  }
234  // The string should come out perfectly reversed, despite there being a
235  // ligature.
236  EXPECT_EQ(ltr_str, kRevWord);
237  // Just to prove there was a ligature, the number of texts is less than the
238  // number of unicodes.
239  EXPECT_LT(texts.size(), 5);
240  pixDestroy(&pix);
241 }
242 
243 TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) {
244  renderer_.reset(new StringRenderer("Arab 10", 600, 600));
245  Pix* pix = nullptr;
246  // Arabic letters should be in decreasing x-coordinates
247  const char kArabicWord[] = "والفكر";
248  renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
249  EXPECT_GT(renderer_->GetBoxes().size(), 0);
250  const std::vector<BoxChar*>& boxchars = renderer_->GetBoxes();
251  for (size_t i = 1; i < boxchars.size(); ++i) {
252  EXPECT_GT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x)
253  << boxchars[i - 1]->ch();
254  }
255  pixDestroy(&pix);
256 
257  // English letters should be in increasing x-coordinates
258  const char kEnglishWord[] = "Google";
259  renderer_->ClearBoxes();
260  renderer_->RenderToImage(kEnglishWord, strlen(kEnglishWord), &pix);
261  EXPECT_EQ(boxchars.size(), strlen(kEnglishWord));
262  for (size_t i = 1; i < boxchars.size(); ++i) {
263  EXPECT_LT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x)
264  << boxchars[i - 1]->ch();
265  }
266  pixDestroy(&pix);
267 
268  // Mixed text should satisfy both.
269  renderer_->ClearBoxes();
270  renderer_->RenderToImage(kMixedText, strlen(kMixedText), &pix);
271  EXPECT_LT(FindBoxCharXCoord(boxchars, "a"), FindBoxCharXCoord(boxchars, "b"));
272  EXPECT_LT(FindBoxCharXCoord(boxchars, "1"), FindBoxCharXCoord(boxchars, "2"));
273  EXPECT_GT(FindBoxCharXCoord(boxchars, "و"), FindBoxCharXCoord(boxchars, "ر"));
274  pixDestroy(&pix);
275 }
276 
277 TEST_F(StringRendererTest, DoesRenderVerticalText) {
278  Pix* pix = nullptr;
279  renderer_.reset(new StringRenderer("UnBatang 10", 600, 600));
280  renderer_->set_vertical_text(true);
281  EXPECT_EQ(strlen(kKorText),
282  renderer_->RenderToImage(kKorText, strlen(kKorText), &pix));
283  EXPECT_GT(renderer_->GetBoxes().size(), 0);
284  DisplayClusterBoxes(pix);
285  pixDestroy(&pix);
286 }
287 
288 // Checks that we preserve charboxes across RenderToImage calls, with
289 // appropriate page numbers.
290 TEST_F(StringRendererTest, DoesKeepAllImageBoxes) {
291  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
292  Pix* pix = nullptr;
293  int num_boxes_per_page = 0;
294  const int kNumTrials = 2;
295  for (int i = 0; i < kNumTrials; ++i) {
296  EXPECT_EQ(strlen(kEngText),
297  renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
298  EXPECT_TRUE(pix != nullptr);
299  pixDestroy(&pix);
300  EXPECT_GT(renderer_->GetBoxes().size(), 0);
301  if (!num_boxes_per_page) {
302  num_boxes_per_page = renderer_->GetBoxes().size();
303  } else {
304  EXPECT_EQ((i + 1) * num_boxes_per_page, renderer_->GetBoxes().size());
305  }
306  for (int j = i * num_boxes_per_page; j < (i + 1) * num_boxes_per_page;
307  ++j) {
308  EXPECT_EQ(i, renderer_->GetBoxes()[j]->page());
309  }
310  }
311 }
312 
313 TEST_F(StringRendererTest, DoesClearBoxes) {
314  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
315  Pix* pix = nullptr;
316  EXPECT_EQ(strlen(kEngText),
317  renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
318  pixDestroy(&pix);
319  EXPECT_GT(renderer_->GetBoxes().size(), 0);
320  const int num_boxes_per_page = renderer_->GetBoxes().size();
321 
322  renderer_->ClearBoxes();
323  EXPECT_EQ(strlen(kEngText),
324  renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
325  pixDestroy(&pix);
326  EXPECT_EQ(num_boxes_per_page, renderer_->GetBoxes().size());
327 }
328 
329 TEST_F(StringRendererTest, DoesLigatureTextForRendering) {
330  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
331  renderer_->set_add_ligatures(true);
332  Pix* pix = nullptr;
333  EXPECT_EQ(strlen(kEngNonLigatureText),
334  renderer_->RenderToImage(kEngNonLigatureText,
335  strlen(kEngNonLigatureText), &pix));
336  pixDestroy(&pix);
337  // There should be one less box than letters due to the 'fi' ligature.
338  EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size());
339  // The output box text should be ligatured.
340  EXPECT_STREQ("fi", renderer_->GetBoxes()[0]->ch().c_str());
341 }
342 
343 TEST_F(StringRendererTest, DoesRetainInputLigatureForRendering) {
344  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
345  Pix* pix = nullptr;
346  EXPECT_EQ(strlen(kEngLigatureText),
347  renderer_->RenderToImage(kEngLigatureText, strlen(kEngLigatureText),
348  &pix));
349  pixDestroy(&pix);
350  // There should be one less box than letters due to the 'fi' ligature.
351  EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size());
352  // The output box text should be ligatured.
353  EXPECT_STREQ("\uFB01", renderer_->GetBoxes()[0]->ch().c_str());
354 }
355 
356 TEST_F(StringRendererTest, DoesStripUnrenderableWords) {
357  // Verdana should only be able to render the english letters and numbers in
358  // the mixed text.
359  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
360  std::string text(kMixedText);
361  EXPECT_GT(renderer_->StripUnrenderableWords(&text), 0);
362  EXPECT_EQ(" 123 abc", text);
363 }
364 
365 TEST_F(StringRendererTest, DoesRenderWordBoxes) {
366  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
367  renderer_->set_output_word_boxes(true);
368  Pix* pix = nullptr;
369  EXPECT_EQ(strlen(kEngText),
370  renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
371  pixDestroy(&pix);
372  // Verify #boxchars = #words + #spaces
373  std::vector<std::string> words =
374  absl::StrSplit(kEngText, ' ', absl::SkipEmpty());
375  const int kNumSpaces = words.size() - 1;
376  const int kExpectedNumBoxes = words.size() + kNumSpaces;
377  const std::vector<BoxChar*>& boxchars = renderer_->GetBoxes();
378  EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
379  // Verify content of words and spaces
380  for (size_t i = 0; i < boxchars.size(); i += 2) {
381  EXPECT_EQ(words[i / 2], boxchars[i]->ch());
382  if (i < boxchars.size() - 1) {
383  EXPECT_EQ(" ", boxchars[i + 1]->ch());
384  EXPECT_TRUE(boxchars[i + 1]->box() == nullptr);
385  }
386  }
387 }
388 
389 TEST_F(StringRendererTest, DoesRenderWordBoxesFromMultiLineText) {
390  renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
391  renderer_->set_output_word_boxes(true);
392  Pix* pix = nullptr;
393  const char kMultlineText[] = "the quick brown fox\njumps over the lazy dog";
394  EXPECT_EQ(strlen(kMultlineText),
395  renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix));
396  pixDestroy(&pix);
397  // Verify #boxchars = #words + #spaces + #newlines
398  std::vector<std::string> words =
399  absl::StrSplit(kMultlineText, absl::ByAnyChar(" \n"), absl::SkipEmpty());
400  const int kNumSeparators = words.size() - 1;
401  const int kExpectedNumBoxes = words.size() + kNumSeparators;
402  const std::vector<BoxChar*>& boxchars = renderer_->GetBoxes();
403  EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
404  // Verify content of words and spaces
405  for (size_t i = 0; i < boxchars.size(); i += 2) {
406  EXPECT_EQ(words[i / 2], boxchars[i]->ch());
407  if (i + 1 < boxchars.size()) {
408  EXPECT_EQ(" ", boxchars[i + 1]->ch());
409  EXPECT_TRUE(boxchars[i + 1]->box() == nullptr);
410  }
411  }
412 }
413 
414 TEST_F(StringRendererTest, DoesRenderAllFontsToImage) {
415  renderer_.reset(new StringRenderer("Verdana 10", 1200, 1200));
416  size_t offset = 0;
417  std::string font_used;
418  do {
419  Pix* pix = nullptr;
420  font_used.clear();
421  offset += renderer_->RenderAllFontsToImage(
422  1.0, kEngText + offset, strlen(kEngText + offset), &font_used, &pix);
423  if (offset < strlen(kEngText)) {
424  EXPECT_TRUE(pix != nullptr);
425  EXPECT_STRNE("", font_used.c_str());
426  }
427  if (FLAGS_display) pixDisplay(pix, 0, 0);
428  pixDestroy(&pix);
429  } while (offset < strlen(kEngText));
430 }
431 
432 TEST_F(StringRendererTest, DoesNotRenderWordJoiner) {
433  renderer_.reset(new StringRenderer("Verdana 10", 500, 200));
434  const std::string word = "A- -B C-D A BC";
435  const std::string joined_word = StringRenderer::InsertWordJoiners(word);
436  Pix* pix = nullptr;
437  renderer_->RenderToImage(joined_word.c_str(), joined_word.length(), &pix);
438  pixDestroy(&pix);
439  const std::vector<BoxChar*>& boxchars = renderer_->GetBoxes();
440  const std::string kWordJoinerUTF8 = "\u2060";
441  ASSERT_EQ(word.length(), boxchars.size());
442  for (size_t i = 0; i < boxchars.size(); ++i) {
443  EXPECT_NE(kWordJoinerUTF8, boxchars[i]->ch());
444  EXPECT_EQ(word.substr(i, 1), boxchars[i]->ch());
445  }
446 }
447 
448 TEST_F(StringRendererTest, DISABLED_DoesDropUncoveredChars) {
449  renderer_.reset(new StringRenderer("Verdana 10", 500, 200));
450  renderer_->set_drop_uncovered_chars(true);
451  const std::string kWord = "office";
452  const std::string kCleanWord = "oice";
453  Pix* pix = nullptr;
454  EXPECT_FALSE(
455  renderer_->font().CanRenderString(kWord.c_str(), kWord.length()));
456  EXPECT_FALSE(renderer_->font().CoversUTF8Text(kWord.c_str(), kWord.length()));
457  int offset = renderer_->RenderToImage(kWord.c_str(), kWord.length(), &pix);
458  pixDestroy(&pix);
459  const std::vector<BoxChar*>& boxchars = renderer_->GetBoxes();
460  EXPECT_EQ(kWord.length(), offset);
461  ASSERT_EQ(kCleanWord.length(), boxchars.size());
462  for (size_t i = 0; i < boxchars.size(); ++i) {
463  EXPECT_EQ(kCleanWord.substr(i, 1), boxchars[i]->ch());
464  }
465 }
466 
467 // ------------ StringRenderer::ConvertBasicLatinToFullwidthLatin() ------------
468 
469 TEST(ConvertBasicLatinToFullwidthLatinTest, DoesConvertBasicLatin) {
470  const std::string kHalfAlpha = "ABCD";
471  const std::string kFullAlpha = "ABCD";
472  EXPECT_EQ(kFullAlpha,
473  StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfAlpha));
474 
475  const std::string kHalfDigit = "0123";
476  const std::string kFullDigit = "0123";
477  EXPECT_EQ(kFullDigit,
478  StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfDigit));
479 
480  const std::string kHalfSym = "()[]:;!?";
481  const std::string kFullSym = "()[]:;!?";
482  EXPECT_EQ(kFullSym,
483  StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfSym));
484 }
485 
486 TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertFullwidthLatin) {
487  const std::string kFullAlpha = "ABCD";
488  EXPECT_EQ(kFullAlpha,
489  StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullAlpha));
490 
491  const std::string kFullDigit = "0123";
492  EXPECT_EQ(kFullDigit,
493  StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullDigit));
494 
495  const std::string kFullSym = "()[]:;!?";
496  EXPECT_EQ(kFullSym,
497  StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullSym));
498 }
499 
500 TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertNonLatin) {
501  const std::string kHalfKana = "アイウエオ";
502  const std::string kFullKana = "アイウエオ";
503  EXPECT_EQ(kHalfKana,
504  StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfKana));
505  EXPECT_EQ(kFullKana,
506  StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullKana));
507 }
508 
509 TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertSpace) {
510  const std::string kHalfSpace = " ";
511  const std::string kFullSpace = " ";
512  EXPECT_EQ(kHalfSpace,
513  StringRenderer::ConvertBasicLatinToFullwidthLatin(kHalfSpace));
514  EXPECT_EQ(kFullSpace,
515  StringRenderer::ConvertBasicLatinToFullwidthLatin(kFullSpace));
516 }
517 
518 // ------------ StringRenderer::ConvertFullwidthLatinToBasicLatin() ------------
519 
520 TEST(ConvertFullwidthLatinToBasicLatinTest, DoesConvertFullwidthLatin) {
521  const std::string kHalfAlpha = "ABCD";
522  const std::string kFullAlpha = "ABCD";
523  EXPECT_EQ(kHalfAlpha,
524  StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullAlpha));
525 
526  const std::string kHalfDigit = "0123";
527  const std::string kFullDigit = "0123";
528  EXPECT_EQ(kHalfDigit,
529  StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullDigit));
530 
531  const std::string kHalfSym = "()[]:;!?";
532  const std::string kFullSym = "()[]:;!?";
533  EXPECT_EQ(kHalfSym,
534  StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullSym));
535 }
536 
537 TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertBasicLatin) {
538  const std::string kHalfAlpha = "ABCD";
539  EXPECT_EQ(kHalfAlpha,
540  StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfAlpha));
541 
542  const std::string kHalfDigit = "0123";
543  EXPECT_EQ(kHalfDigit,
544  StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfDigit));
545 
546  const std::string kHalfSym = "()[]:;!?";
547  EXPECT_EQ(kHalfSym,
548  StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfSym));
549 }
550 
551 TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertNonLatin) {
552  const std::string kHalfKana = "アイウエオ";
553  const std::string kFullKana = "アイウエオ";
554  EXPECT_EQ(kHalfKana,
555  StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfKana));
556  EXPECT_EQ(kFullKana,
557  StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullKana));
558 }
559 
560 TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertSpace) {
561  const std::string kHalfSpace = " ";
562  const std::string kFullSpace = " ";
563  EXPECT_EQ(kHalfSpace,
564  StringRenderer::ConvertFullwidthLatinToBasicLatin(kHalfSpace));
565  EXPECT_EQ(kFullSpace,
566  StringRenderer::ConvertFullwidthLatinToBasicLatin(kFullSpace));
567 }
568 } // namespace
string
std::string string
Definition: equationdetect_test.cc:21
strngs.h
boxread.h
BOOL_PARAM_FLAG
BOOL_PARAM_FLAG(display, false, "Display image for inspection")
include_gunit.h
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
DECLARE_STRING_PARAM_FLAG
DECLARE_STRING_PARAM_FLAG(fonts_dir)
boxchar.h
genericvector.h
FLAGS_test_tmpdir
const char * FLAGS_test_tmpdir
Definition: include_gunit.h:20
tesseract::BoxChar
Definition: boxchar.h:35
ReadMemBoxes
bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure, GenericVector< TBOX > *boxes, GenericVector< STRING > *texts, GenericVector< STRING > *box_texts, GenericVector< int > *pages)
Definition: boxread.cpp:87
GenericVector< STRING >
tesseract::StringRenderer
Definition: stringrenderer.h:49
DECLARE_BOOL_PARAM_FLAG
DECLARE_BOOL_PARAM_FLAG(use_only_legacy_fonts)
commandlineflags.h
stringrenderer.h