4 #include "allheaders.h"
11 #include "absl/strings/str_format.h"
26 for (
int i = 0; i < from.
size(); i++) to->push_back(from[i]);
32 for (
int i = 0; i < from.
size(); i++) to->push_back(from[i]);
36 class ResultIteratorTest :
public testing::Test {
48 ResultIteratorTest() { src_pix_ =
nullptr; }
49 ~ResultIteratorTest() {}
51 void SetImage(
const char* filename) {
52 src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
57 api_.SetImage(src_pix_);
58 pixDestroy(&src_pix_);
59 src_pix_ = api_.GetInputImage();
67 int width = pixGetWidth(src_pix_);
68 int height = pixGetHeight(src_pix_);
69 int depth = pixGetDepth(src_pix_);
70 Pix* pix = pixCreate(width, height, depth);
71 EXPECT_TRUE(depth == 1 || depth == 8);
72 if (depth == 8) pixSetAll(pix);
74 int left, top, right, bottom;
77 if (!it->BoundingBox(level, &left, &top, &right, &bottom)) {
79 EXPECT_TRUE(it->BoundingBox(im_level, &left, &top, &right, &bottom));
81 LOG(
INFO) <<
"BBox: [L:" << left <<
", T:" << top <<
", R:" << right
82 <<
", B:" << bottom <<
"]" <<
"\n";
85 block_pix = it->GetBinaryImage(im_level);
86 pixRasterop(pix, left, top, right - left, bottom - top,
87 PIX_SRC ^ PIX_DST, block_pix, 0, 0);
89 block_pix = it->GetImage(im_level, 2, src_pix_, &left, &top);
90 pixRasterop(pix, left, top, pixGetWidth(block_pix),
91 pixGetHeight(block_pix), PIX_SRC & PIX_DST, block_pix, 0,
94 CHECK(block_pix !=
nullptr);
95 pixDestroy(&block_pix);
96 }
while (it->Next(level));
99 pixRasterop(pix, 0, 0, width, height, PIX_SRC ^ PIX_DST, src_pix_, 0, 0);
101 Pix* binary_pix = pixThresholdToBinary(pix, 128);
103 pixInvert(binary_pix, binary_pix);
109 pixCountPixels(pix, &pixcount,
nullptr);
110 if (pixcount > max_diff) {
111 std::string outfile = OutputNameToPath(
"failedxor.png");
112 LOG(
INFO) <<
"outfile = " << outfile <<
"\n";
113 pixWrite(outfile.c_str(), pix, IFF_PNG);
116 LOG(
INFO) << absl::StrFormat(
"At level %d: pix diff = %d\n", level, pixcount);
117 EXPECT_LE(pixcount, max_diff);
124 ResultIterator* it) {
125 LOG(
INFO) <<
"Text Test Level " << level <<
"\n";
129 char* text = it->GetUTF8Text(level);
143 }
while (it->Next(level));
144 EXPECT_STREQ(truth.c_str(), result.c_str())
145 <<
"Rebuild failed at Text Level " << level;
148 void VerifyRebuilds(
int block_limit,
int para_limit,
int line_limit,
149 int word_limit,
int symbol_limit, PageIterator* it) {
157 void VerifyAllText(
const std::string& truth, ResultIterator* it) {
170 void ExpectTextlineReadingOrder(
bool in_ltr_context,
172 int num_words,
int* expected_reading_order,
173 int num_reading_order_entries)
const {
175 for (
int i = 0; i < num_words; i++) {
180 ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
183 std::vector<int> correct_order(
184 expected_reading_order,
185 expected_reading_order + num_reading_order_entries);
186 std::vector<int> calculated_order;
187 ToVector(output, &calculated_order);
188 EXPECT_EQ(correct_order, calculated_order);
195 void VerifySaneTextlineOrder(
bool in_ltr_context,
197 int num_words)
const {
199 for (
int i = 0; i < num_words; i++) {
204 ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
206 ASSERT_GE(output.
size(), num_words);
211 while (j < output_copy.size() && output_copy[j] < 0) j++;
212 for (
int i = 0; i < num_words; i++, j++) {
213 if (output_copy[j] != i) {
218 if (j != output_copy.size()) {
222 std::vector<int> output_copy2, empty;
223 ToVector(output, &output_copy2);
224 EXPECT_EQ(output_copy2, empty)
225 <<
" permutation of 0.." << num_words - 1 <<
" not found in "
226 << (in_ltr_context ?
"ltr" :
"rtl") <<
" context.";
279 TEST_F(ResultIteratorTest, EasyTest) {
280 SetImage(
"phototest.tif");
282 PageIterator* p_it = api_.AnalyseLayout();
283 EXPECT_FALSE(p_it ==
nullptr);
291 LOG(
INFO) <<
"Verifying image rebuilds 1 (pageiterator)" <<
"\n";
292 VerifyRebuilds(10, 10, 0, 0, 0, p_it);
295 char* result = api_.GetUTF8Text();
298 ResultIterator* r_it = api_.GetIterator();
300 LOG(
INFO) <<
"Verifying image rebuilds 2a (resultiterator)" <<
"\n";
301 VerifyRebuilds(8, 8, 0, 0, 40, r_it);
303 LOG(
INFO) <<
"Verifying text rebuilds 1 (resultiterator)" <<
"\n";
304 VerifyAllText(ocr_text_, r_it);
307 LOG(
INFO) <<
"Verifying image rebuilds 2b (resultiterator)" <<
"\n";
308 VerifyRebuilds(8, 8, 0, 0, 40, r_it);
314 LOG(
INFO) << absl::StrFormat(
"Baseline (%d,%d)->(%d,%d)", x1, y1, x2, y2) <<
"\n";
316 EXPECT_GE(x2, x1 + 400);
324 int product = x2 * y3 - x3 * y2;
325 EXPECT_LE(abs(product), x2);
329 bool bold, italic, underlined, monospace, serif, smallcaps;
330 int pointsize, font_id;
332 r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
333 &serif, &smallcaps, &pointsize, &font_id);
335 EXPECT_GE(confidence, 80.0f);
337 LOG(
INFO) << absl::StrFormat(
"Word %s in font %s, id %d, size %d, conf %g",
338 word_str, font, font_id, pointsize, confidence) <<
"\n";
341 EXPECT_FALSE(italic);
342 EXPECT_FALSE(underlined);
343 EXPECT_FALSE(monospace);
348 EXPECT_GE(pointsize, 11.16 - 1.50);
349 EXPECT_LE(pointsize, 11.16 + 1.50);
355 TEST_F(ResultIteratorTest, ComplexTest) {
356 SetImage(
"8087_054.3B.tif");
358 PageIterator* it = api_.AnalyseLayout();
359 EXPECT_FALSE(it ==
nullptr);
361 VerifyRebuilds(400, 400, 400, 400, 650, it);
366 TEST_F(ResultIteratorTest, GreyTest) {
367 SetImage(
"8087_054.3G.tif");
369 PageIterator* it = api_.AnalyseLayout();
370 EXPECT_FALSE(it ==
nullptr);
372 VerifyRebuilds(600, 600, 600, 600, 600, it);
377 TEST_F(ResultIteratorTest, SmallCapDropCapTest) {
378 SetImage(
"8071_093.3B.tif");
379 char* result = api_.GetUTF8Text();
381 ResultIterator* r_it = api_.GetIterator();
383 int found_dropcaps = 0;
384 int found_smallcaps = 0;
385 int false_positives = 0;
387 bool bold, italic, underlined, monospace, serif, smallcaps;
388 int pointsize, font_id;
389 r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
390 &smallcaps, &pointsize, &font_id);
392 if (word_str !=
nullptr) {
393 LOG(
INFO) << absl::StrFormat(
"Word %s is %s", word_str,
394 smallcaps ?
"SMALLCAPS" :
"Normal") <<
"\n";
395 if (r_it->SymbolIsDropcap()) {
398 if (strcmp(word_str,
"SHE") == 0 || strcmp(word_str,
"MOPED") == 0 ||
399 strcmp(word_str,
"RALPH") == 0 ||
400 strcmp(word_str,
"KINNEY") == 0 ||
401 strcmp(word_str,
"BENNETT") == 0) {
402 EXPECT_TRUE(smallcaps) << word_str;
405 if (smallcaps) ++false_positives;
408 ResultIterator s_it(*r_it);
411 if (s_it.SymbolIsDropcap()) {
413 LOG(
ERROR) << absl::StrFormat(
"Symbol %s of word %s is dropcap", sym_str,
417 EXPECT_FALSE(s_it.SymbolIsDropcap());
423 EXPECT_EQ(1, found_dropcaps);
424 EXPECT_GE(4, found_smallcaps);
425 EXPECT_LE(false_positives, 3);
436 TEST_F(ResultIteratorTest, SubSuperTest) {
437 SetImage(
"0146_281.3B.tif");
438 char* result = api_.GetUTF8Text();
440 ResultIterator* r_it = api_.GetIterator();
444 const char kAllowedSupers[] =
"O0123456789-";
446 int found_supers = 0;
447 int found_normal = 0;
449 if (r_it->SymbolIsSubscript()) {
451 }
else if (r_it->SymbolIsSuperscript()) {
453 if (strchr(kAllowedSupers, result[0]) ==
nullptr) {
455 LOG(
ERROR) << absl::StrFormat(
"Char %s in word %s is unexpected super!",
458 EXPECT_TRUE(strchr(kAllowedSupers, result[0]) !=
nullptr);
467 LOG(
INFO) << absl::StrFormat(
"Subs = %d, supers= %d, normal = %d",
468 found_subs, found_supers, found_normal) <<
"\n";
469 EXPECT_GE(found_subs, 25);
470 EXPECT_GE(found_supers, 25);
471 EXPECT_GE(found_normal, 1350);
483 TEST_F(ResultIteratorTest, DualStartTextlineOrderTest) {
485 int reading_order_rtl_context[] = {7, 6, 5, 4, ResultIterator::kMinorRunStart,
486 0, 1, 2, 3, ResultIterator::kMinorRunEnd};
487 int reading_order_ltr_context[] = {0, 1,
489 4, ResultIterator::kMinorRunStart,
491 5, ResultIterator::kMinorRunEnd};
493 ExpectTextlineReadingOrder(
true, word_dirs, ABSL_ARRAYSIZE(word_dirs),
494 reading_order_ltr_context,
495 ABSL_ARRAYSIZE(reading_order_ltr_context));
496 ExpectTextlineReadingOrder(
false, word_dirs, ABSL_ARRAYSIZE(word_dirs),
497 reading_order_rtl_context,
498 ABSL_ARRAYSIZE(reading_order_rtl_context));
503 TEST_F(ResultIteratorTest, LeftwardTextlineOrderTest) {
506 int reading_order_ltr_context[] = {0, 1, 2, 3, 4, 5, 6, 7};
509 int reading_order_rtl_context[] = {
510 ResultIterator::kMinorRunStart, 0, 1, 2, 3, 4, 5, 6, 7,
511 ResultIterator::kMinorRunEnd};
513 ExpectTextlineReadingOrder(
true, word_dirs, ABSL_ARRAYSIZE(word_dirs),
514 reading_order_ltr_context,
515 ABSL_ARRAYSIZE(reading_order_ltr_context));
516 ExpectTextlineReadingOrder(
false, word_dirs, ABSL_ARRAYSIZE(word_dirs),
517 reading_order_rtl_context,
518 ABSL_ARRAYSIZE(reading_order_rtl_context));
523 TEST_F(ResultIteratorTest, RightwardTextlineOrderTest) {
526 int reading_order_rtl_context[] = {7, 6, 5, 4, 3, 2, 1, 0};
527 ExpectTextlineReadingOrder(
false, word_dirs, ABSL_ARRAYSIZE(word_dirs),
528 reading_order_rtl_context,
529 ABSL_ARRAYSIZE(reading_order_rtl_context));
532 TEST_F(ResultIteratorTest, TextlineOrderSanityCheck) {
535 const int kNumWords(7);
536 const int kNumCombos = 1 << (2 * kNumWords);
538 for (
int i = 0; i < kNumCombos; i++) {
541 for (
int j = 0; j < kNumWords; j++) {
542 word_dirs[j] = static_cast<StrongScriptDirection>(tmp % 4);
545 VerifySaneTextlineOrder(
true, word_dirs, kNumWords);
546 VerifySaneTextlineOrder(
false, word_dirs, kNumWords);
551 TEST_F(ResultIteratorTest, DISABLED_NonNullChoicesTest) {
552 SetImage(
"5318c4b679264.jpg");
553 char* result = api_.GetUTF8Text();
555 ResultIterator* r_it = api_.GetIterator();
559 if (word_str !=
nullptr) {
560 LOG(
INFO) << absl::StrFormat(
"Word %s:", word_str) <<
"\n";
561 ResultIterator s_it = *r_it;
565 const char* char_str = c_it.GetUTF8Text();
566 if (char_str ==
nullptr)
567 LOG(
INFO) <<
"Null char choice" <<
"\n";
569 LOG(
INFO) <<
"Char choice " << char_str <<
"\n";
570 CHECK(char_str !=
nullptr);
571 }
while (c_it.Next());
582 TEST_F(ResultIteratorTest, NonNullConfidencesTest) {
584 SetImage(
"trainingitalline.tif");
588 char* result = api_.GetUTF8Text();
590 ResultIterator* r_it = api_.GetIterator();
594 if (word_str !=
nullptr) {
597 ResultIterator s_it = *r_it;
600 CHECK(char_str !=
nullptr);
602 LOG(
INFO) << absl::StrFormat(
"Char %s has confidence %g\n", char_str,
610 LOG(
INFO) <<
"Empty word found" <<
"\n";