tesseract  5.0.0-alpha-619-ge9db
equationdetect_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <memory>
13 #include <string>
14 #include <utility>
15 #include "allheaders.h"
16 #include "colpartitiongrid.h"
17 #include "equationdetect.h"
18 #include "tesseractclass.h"
19 #include "include_gunit.h"
20 
21 using string = std::string;
22 #include <tensorflow/core/lib/gtl/map_util.h>
23 #include <tensorflow/core/lib/gtl/stl_util.h>
24 
25 namespace tesseract {
26 
28  public:
29  TestableEquationDetect(const char* tessdata, Tesseract* lang_tesseract)
30  : EquationDetect(tessdata, "equ") {
31  SetLangTesseract(lang_tesseract);
32  }
33 
34  // Insert a certain math and digit blobs into part.
35  void AddMathDigitBlobs(const int math_blobs, const int digit_blobs,
36  const int total_blobs, ColPartition* part) {
37  CHECK(part != nullptr);
38  CHECK_LE(math_blobs + digit_blobs, total_blobs);
39  int count = 0;
40  for (int i = 0; i < math_blobs; i++, count++) {
41  BLOBNBOX* blob = new BLOBNBOX();
43  part->AddBox(blob);
44  }
45  for (int i = 0; i < digit_blobs; i++, count++) {
46  BLOBNBOX* blob = new BLOBNBOX();
48  part->AddBox(blob);
49  }
50  for (int i = count; i < total_blobs; i++) {
51  BLOBNBOX* blob = new BLOBNBOX();
53  part->AddBox(blob);
54  }
55  }
56 
57  // Set up pix_binary for lang_tesseract_.
58  void SetPixBinary(Pix* pix) {
59  CHECK_EQ(1, pixGetDepth(pix));
61  }
62 
63  void RunIdentifySpecialText(BLOBNBOX* blob, const int height_th) {
64  IdentifySpecialText(blob, height_th);
65  }
66 
68  const UNICHARSET& unicharset = lang_tesseract_->unicharset;
69  return EstimateTypeForUnichar(unicharset, unicharset.unichar_to_id(val));
70  }
71 
73  ColPartition* part) {
74  this->part_grid_ = part_grid;
75  return IsIndented(part);
76  }
77 
78  bool RunIsNearSmallNeighbor(const TBOX& seed_box, const TBOX& part_box) {
79  return IsNearSmallNeighbor(seed_box, part_box);
80  }
81 
83  return CheckSeedBlobsCount(part);
84  }
85 
86  float RunComputeForegroundDensity(const TBOX& tbox) {
87  return ComputeForegroundDensity(tbox);
88  }
89 
90  int RunCountAlignment(const GenericVector<int>& sorted_vec, const int val) {
91  return CountAlignment(sorted_vec, val);
92  }
93 
95  GenericVector<TBOX>* splitted_boxes) {
96  SplitCPHorLite(part, splitted_boxes);
97  }
98 
100  GenericVector<ColPartition*>* parts_splitted) {
101  SplitCPHor(part, parts_splitted);
102  }
103 
104  void TestComputeCPsSuperBBox(const TBOX& box, ColPartitionGrid* part_grid) {
105  CHECK(part_grid != nullptr);
106  part_grid_ = part_grid;
108  EXPECT_TRUE(*cps_super_bbox_ == box);
109  }
110 };
111 
112 class EquationFinderTest : public testing::Test {
113  protected:
114  std::unique_ptr<TestableEquationDetect> equation_det_;
115  std::unique_ptr<Tesseract> tesseract_;
116 
117  // The directory for testdata;
119 
120  void SetUp() {
121  std::locale::global(std::locale(""));
122  tesseract_.reset(new Tesseract());
123  tesseract_->init_tesseract(TESSDATA_DIR, "eng", OEM_TESSERACT_ONLY);
124  tesseract_->set_source_resolution(300);
125  equation_det_.reset(
126  new TestableEquationDetect(TESSDATA_DIR, tesseract_.get()));
127  equation_det_->SetResolution(300);
128 
129  testdata_dir_ = TESTDATA_DIR;
130  }
131 
132  void TearDown() {
133  tesseract_.reset(nullptr);
134  equation_det_.reset(nullptr);
135  }
136 
137  // Add a BLOCK covering the whole page.
138  void AddPageBlock(Pix* pix, BLOCK_LIST* blocks) {
139  CHECK(pix != nullptr);
140  CHECK(blocks != nullptr);
141  BLOCK_IT block_it(blocks);
142  BLOCK* block =
143  new BLOCK("", true, 0, 0, 0, 0, pixGetWidth(pix), pixGetHeight(pix));
144  block_it.add_to_end(block);
145  }
146 
147  // Create col partitions, add into part_grid, and put them into all_parts.
148  void CreateColParts(const int rows, const int cols,
149  ColPartitionGrid* part_grid,
150  std::vector<ColPartition*>* all_parts) {
151  const int kWidth = 10, kHeight = 10;
152  ClearParts(all_parts);
153  for (int y = 0; y < rows; ++y) {
154  for (int x = 0; x < cols; ++x) {
155  int left = x * kWidth * 2, bottom = y * kHeight * 2;
156  TBOX box(left, bottom, left + kWidth, bottom + kHeight);
159  part_grid->InsertBBox(true, true, part);
160  all_parts->push_back(part);
161  }
162  }
163  }
164 
165  void ClearParts(std::vector<ColPartition*>* all_parts) {
166  for (size_t i = 0; i < all_parts->size(); ++i) {
167  (*all_parts)[i]->DeleteBoxes();
168  delete ((*all_parts)[i]);
169  }
170  }
171 
172  // Create a BLOBNBOX object with bounding box tbox, and add it into part.
173  void AddBlobIntoPart(const TBOX& tbox, ColPartition* part) {
174  CHECK(part != nullptr);
175  BLOBNBOX* blob = new BLOBNBOX();
176  blob->set_bounding_box(tbox);
177  part->AddBox(blob);
178  }
179 };
180 
181 TEST_F(EquationFinderTest, IdentifySpecialText) {
182 #if 1
183  GTEST_SKIP();
184 #else // TODO: missing equ_gt1.tif
185  // Load Image.
186  std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
187  Pix* pix_binary = pixRead(imagefile.c_str());
188  CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
189 
190  // Get components.
191  BLOCK_LIST blocks;
192  TO_BLOCK_LIST to_blocks;
193  AddPageBlock(pix_binary, &blocks);
194  Textord* textord = tesseract_->mutable_textord();
195  textord->find_components(pix_binary, &blocks, &to_blocks);
196 
197  // Identify special texts from to_blocks.
198  TO_BLOCK_IT to_block_it(&to_blocks);
199  std::map<int, int> stt_count;
200  for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list();
201  to_block_it.forward()) {
202  TO_BLOCK* to_block = to_block_it.data();
203  BLOBNBOX_IT blob_it(&(to_block->blobs));
204  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
205  BLOBNBOX* blob = blob_it.data();
206  // blob->set_special_text_type(BSTT_NONE);
207  equation_det_->RunIdentifySpecialText(blob, 0);
208  tensorflow::gtl::InsertIfNotPresent(&stt_count, blob->special_text_type(), 0);
209  stt_count[blob->special_text_type()]++;
210  }
211  }
212 
213  // Verify the number, but allow a range of +/- kCountRange before squealing.
214  const int kCountRange = 3;
215  EXPECT_GE(39 + kCountRange, stt_count[BSTT_NONE]);
216  EXPECT_LE(39 - kCountRange, stt_count[BSTT_NONE]);
217 
218  // if you count all the subscripts etc, there are ~45 italic chars.
219  EXPECT_GE(45 + kCountRange, stt_count[BSTT_ITALIC]);
220  EXPECT_LE(45 - kCountRange, stt_count[BSTT_ITALIC]);
221  EXPECT_GE(41 + kCountRange, stt_count[BSTT_DIGIT]);
222  EXPECT_LE(41 - kCountRange, stt_count[BSTT_DIGIT]);
223  EXPECT_GE(50 + kCountRange, stt_count[BSTT_MATH]);
224  EXPECT_LE(50 - kCountRange, stt_count[BSTT_MATH]);
225  EXPECT_GE(10 + kCountRange, stt_count[BSTT_UNCLEAR]);
226  EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
227 
228  // Release memory.
229  pixDestroy(&pix_binary);
230 #endif
231 }
232 
233 TEST_F(EquationFinderTest, EstimateTypeForUnichar) {
234  // Test abc characters.
235  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("a"));
236  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("c"));
237 
238  // Test punctuation characters.
239  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("'"));
240  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar(","));
241 
242  // Test digits.
243  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("1"));
244  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("4"));
245  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("|"));
246 
247  // Test math symbols.
248  EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("("));
249  EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("+"));
250 }
251 
253  ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
254 
255  // Create five ColPartitions:
256  // part 1: ************
257  // part 2: *********
258  // part 3: *******
259  // part 4: *****
260  //
261  // part 5: ********
262  TBOX box1(0, 950, 999, 999);
263  ColPartition* part1 =
265  part_grid.InsertBBox(true, true, part1);
266  TBOX box2(300, 920, 900, 940);
267  ColPartition* part2 =
269  part_grid.InsertBBox(true, true, part2);
270  TBOX box3(0, 900, 600, 910);
271  ColPartition* part3 =
273  part_grid.InsertBBox(true, true, part3);
274  TBOX box4(300, 890, 600, 899);
275  ColPartition* part4 =
277  part_grid.InsertBBox(true, true, part4);
278  TBOX box5(300, 500, 900, 510);
279  ColPartition* part5 =
281  part_grid.InsertBBox(true, true, part5);
282 
283  // Test
284  // part1 should be no indent.
285  EXPECT_EQ(EquationDetect::NO_INDENT,
286  equation_det_->RunIsIndented(&part_grid, part1));
287  // part2 should be left indent in terms of part1.
288  EXPECT_EQ(EquationDetect::LEFT_INDENT,
289  equation_det_->RunIsIndented(&part_grid, part2));
290  // part3 should be right indent.
292  equation_det_->RunIsIndented(&part_grid, part3));
293  // part4 should be both indented.
294  EXPECT_EQ(EquationDetect::BOTH_INDENT,
295  equation_det_->RunIsIndented(&part_grid, part4));
296  // part5 should be no indent because it is too far from part1.
297  EXPECT_EQ(EquationDetect::NO_INDENT,
298  equation_det_->RunIsIndented(&part_grid, part5));
299 
300  // Release memory.
301  part1->DeleteBoxes();
302  delete (part1);
303  part2->DeleteBoxes();
304  delete (part2);
305  part3->DeleteBoxes();
306  delete (part3);
307  part4->DeleteBoxes();
308  delete (part4);
309  part5->DeleteBoxes();
310  delete (part5);
311 }
312 
313 TEST_F(EquationFinderTest, IsNearSmallNeighbor) {
314  // Create four tboxes:
315  // part 1, part 2
316  // ***** *****
317  // part 3: *****
318  //
319  // part 4: *****************
320  TBOX box1(0, 950, 499, 999);
321  TBOX box2(500, 950, 999, 998);
322  TBOX box3(0, 900, 499, 949);
323  TBOX box4(0, 550, 499, 590);
324 
325  // Test
326  // box2 should be box1's near neighbor but not vice versa.
327  EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box2));
328  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box1));
329  // box1 and box3 should be near neighbors of each other.
330  EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box3));
331  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
332  // box2 and box3 should not be near neighbors of each other.
333  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
334  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box2));
335 
336  // box4 should not be the near neighbor of any one.
337  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box1, box4));
338  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box4));
339  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box4));
340 }
341 
342 TEST_F(EquationFinderTest, CheckSeedBlobsCount) {
343  TBOX box(0, 950, 999, 999);
344  ColPartition* part1 =
346  ColPartition* part2 =
348  ColPartition* part3 =
350  ColPartition* part4 =
352 
353  // Part 1: 8 math, 0 digit, 20 total.
354  equation_det_->AddMathDigitBlobs(8, 0, 20, part1);
355  EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part1));
356 
357  // Part 2: 1 math, 8 digit, 20 total.
358  equation_det_->AddMathDigitBlobs(1, 8, 20, part2);
359  EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part2));
360 
361  // Part 3: 3 math, 8 digit, 8 total.
362  equation_det_->AddMathDigitBlobs(3, 8, 20, part3);
363  EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part3));
364 
365  // Part 4: 8 math, 0 digit, 8 total.
366  equation_det_->AddMathDigitBlobs(0, 0, 8, part4);
367  EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part4));
368 
369  // Release memory.
370  part1->DeleteBoxes();
371  delete (part1);
372  part2->DeleteBoxes();
373  delete (part2);
374  part3->DeleteBoxes();
375  delete (part3);
376  part4->DeleteBoxes();
377  delete (part4);
378 }
379 
380 TEST_F(EquationFinderTest, ComputeForegroundDensity) {
381  // Create the pix with top half foreground, bottom half background.
382  int width = 1024, height = 768;
383  Pix* pix = pixCreate(width, height, 1);
384  pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
385  TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
386  box3(100, height - 40, 140, height);
387  equation_det_->SetPixBinary(pix);
388 
389  // Verify
390  EXPECT_NEAR(0.0, equation_det_->RunComputeForegroundDensity(box1), 0.0001f);
391  EXPECT_NEAR(0.5, equation_det_->RunComputeForegroundDensity(box2), 0.0001f);
392  EXPECT_NEAR(1.0, equation_det_->RunComputeForegroundDensity(box3), 0.0001f);
393 }
394 
395 TEST_F(EquationFinderTest, CountAlignment) {
396  GenericVector<int> vec;
397  vec.push_back(1);
398  vec.push_back(1);
399  vec.push_back(1);
400  vec.push_back(100);
401  vec.push_back(200);
402  vec.push_back(200);
403 
404  // Test the right point.
405  EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 1));
406  EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 100));
407  EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 200));
408 
409  // Test the near neighbors.
410  EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 3));
411  EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 99));
412  EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 202));
413 
414  // Test the far neighbors.
415  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 150));
416  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 50));
417  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 250));
418 }
419 
420 TEST_F(EquationFinderTest, ComputeCPsSuperBBox) {
421  Pix* pix = pixCreate(1001, 1001, 1);
422  equation_det_->SetPixBinary(pix);
423  ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
424 
425  TBOX box1(0, 0, 999, 99);
426  ColPartition* part1 =
428  TBOX box2(0, 100, 499, 199);
429  ColPartition* part2 =
431  TBOX box3(500, 100, 999, 199);
432  ColPartition* part3 =
434  TBOX box4(0, 200, 999, 299);
435  ColPartition* part4 =
437  TBOX box5(0, 900, 999, 999);
438  ColPartition* part5 =
440 
441  // Add part1->part3 into part_grid and test.
442  part_grid.InsertBBox(true, true, part1);
443  part_grid.InsertBBox(true, true, part2);
444  part_grid.InsertBBox(true, true, part3);
445  TBOX super_box(0, 0, 999, 199);
446  equation_det_->TestComputeCPsSuperBBox(super_box, &part_grid);
447 
448  // Add part4 and test.
449  part_grid.InsertBBox(true, true, part4);
450  TBOX super_box2(0, 0, 999, 299);
451  equation_det_->TestComputeCPsSuperBBox(super_box2, &part_grid);
452 
453  // Add part5 and test.
454  part_grid.InsertBBox(true, true, part5);
455  TBOX super_box3(0, 0, 999, 999);
456  equation_det_->TestComputeCPsSuperBBox(super_box3, &part_grid);
457 
458  // Release memory.
459  part1->DeleteBoxes();
460  delete (part1);
461  part2->DeleteBoxes();
462  delete (part2);
463  part3->DeleteBoxes();
464  delete (part3);
465  part4->DeleteBoxes();
466  delete (part4);
467  part5->DeleteBoxes();
468  delete (part5);
469 }
470 
471 TEST_F(EquationFinderTest, SplitCPHorLite) {
472  TBOX box(0, 0, 999, 99);
473  ColPartition* part =
475  part->DeleteBoxes();
476  part->set_median_width(10);
477  GenericVector<TBOX> splitted_boxes;
478 
479  // Test an empty part.
480  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
481  EXPECT_TRUE(splitted_boxes.empty());
482 
483  // Test with one blob.
484  AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
485  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
486  EXPECT_EQ(1, splitted_boxes.size());
487  EXPECT_TRUE(TBOX(0, 0, 10, 50) == splitted_boxes[0]);
488 
489  // Add more blob and test.
490  AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
491  AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
492  AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
493  AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
494  AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
495  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
496  // Verify.
497  EXPECT_EQ(3, splitted_boxes.size());
498  EXPECT_TRUE(TBOX(0, 0, 30, 60) == splitted_boxes[0]);
499  EXPECT_TRUE(TBOX(100, 0, 140, 45) == splitted_boxes[1]);
500  EXPECT_TRUE(TBOX(500, 0, 540, 35) == splitted_boxes[2]);
501 
502  part->DeleteBoxes();
503  delete (part);
504 }
505 
507  TBOX box(0, 0, 999, 99);
508  ColPartition* part =
510  part->DeleteBoxes();
511  part->set_median_width(10);
512  GenericVector<ColPartition*> parts_splitted;
513 
514  // Test an empty part.
515  equation_det_->RunSplitCPHor(part, &parts_splitted);
516  EXPECT_TRUE(parts_splitted.empty());
517  // Test with one blob.
518  AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
519 
520  equation_det_->RunSplitCPHor(part, &parts_splitted);
521  EXPECT_EQ(1, parts_splitted.size());
522  EXPECT_TRUE(TBOX(0, 0, 10, 50) == parts_splitted[0]->bounding_box());
523 
524  // Add more blob and test.
525  AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
526  AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
527  AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
528  AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
529  AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
530  equation_det_->RunSplitCPHor(part, &parts_splitted);
531 
532  // Verify.
533  EXPECT_EQ(3, parts_splitted.size());
534  EXPECT_TRUE(TBOX(0, 0, 30, 60) == parts_splitted[0]->bounding_box());
535  EXPECT_TRUE(TBOX(100, 0, 140, 45) == parts_splitted[1]->bounding_box());
536  EXPECT_TRUE(TBOX(500, 0, 540, 35) == parts_splitted[2]->bounding_box());
537 
538  parts_splitted.delete_data_pointers();
539  part->DeleteBoxes();
540  delete (part);
541 }
542 
543 } // namespace tesseract
tesseract::EquationFinderTest::equation_det_
std::unique_ptr< TestableEquationDetect > equation_det_
Definition: equationdetect_test.cc:114
tesseract::TestableEquationDetect::RunCheckSeedBlobsCount
bool RunCheckSeedBlobsCount(ColPartition *part)
Definition: equationdetect_test.cc:82
TBOX
Definition: cleanapi_test.cc:19
file::JoinPath
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:43
tesseract::EquationFinderTest::AddBlobIntoPart
void AddBlobIntoPart(const TBOX &tbox, ColPartition *part)
Definition: equationdetect_test.cc:173
string
std::string string
Definition: equationdetect_test.cc:21
GenericVector::delete_data_pointers
void delete_data_pointers()
Definition: genericvector.h:872
BTFT_NONE
Definition: blobbox.h:114
tesseract::EquationFinderTest::CreateColParts
void CreateColParts(const int rows, const int cols, ColPartitionGrid *part_grid, std::vector< ColPartition * > *all_parts)
Definition: equationdetect_test.cc:148
tesseract::EquationFinderTest::SetUp
void SetUp()
Definition: equationdetect_test.cc:120
tesseract::BBGrid::InsertBBox
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:486
tesseract::EquationDetect
Definition: equationdetect.h:38
tesseract::TestableEquationDetect::RunIsNearSmallNeighbor
bool RunIsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box)
Definition: equationdetect_test.cc:78
BLOBNBOX::set_bounding_box
void set_bounding_box(const TBOX &new_box)
Definition: blobbox.h:234
tesseractclass.h
tesseract::EquationFinderTest::AddPageBlock
void AddPageBlock(Pix *pix, BLOCK_LIST *blocks)
Definition: equationdetect_test.cc:138
tesseract::TestableEquationDetect::SetPixBinary
void SetPixBinary(Pix *pix)
Definition: equationdetect_test.cc:58
ICOORD
integer coordinate
Definition: points.h:30
tesseract::Tesseract
Definition: tesseractclass.h:172
BlobSpecialTextType
BlobSpecialTextType
Definition: blobbox.h:95
tesseract::EquationDetect::LEFT_INDENT
Definition: equationdetect.h:46
tesseract::EquationDetect::SetLangTesseract
void SetLangTesseract(Tesseract *lang_tesseract)
Definition: equationdetect.cpp:123
TO_BLOCK::blobs
BLOBNBOX_LIST blobs
Definition: blobbox.h:771
BLOBNBOX::set_special_text_type
void set_special_text_type(BlobSpecialTextType new_type)
Definition: blobbox.h:291
TO_BLOCK
Definition: blobbox.h:691
tesseract::EquationDetect::SplitCPHor
void SplitCPHor(ColPartition *part, GenericVector< ColPartition * > *parts_splitted)
Definition: equationdetect.cpp:647
tesseract::EquationFinderTest::testdata_dir_
std::string testdata_dir_
Definition: equationdetect_test.cc:118
tesseract::TestableEquationDetect::RunSplitCPHorLite
void RunSplitCPHorLite(ColPartition *part, GenericVector< TBOX > *splitted_boxes)
Definition: equationdetect_test.cc:94
tesseract::TestableEquationDetect::RunEstimateTypeForUnichar
BlobSpecialTextType RunEstimateTypeForUnichar(const char *val)
Definition: equationdetect_test.cc:67
tesseract::EquationFinderTest
Definition: equationdetect_test.cc:112
include_gunit.h
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
tesseract::EquationFinderTest::tesseract_
std::unique_ptr< Tesseract > tesseract_
Definition: equationdetect_test.cc:115
BSTT_MATH
Definition: blobbox.h:99
BLOBNBOX
Definition: blobbox.h:142
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
tesseract::ColPartition
Definition: colpartition.h:67
tesseract::TestableEquationDetect::RunCountAlignment
int RunCountAlignment(const GenericVector< int > &sorted_vec, const int val)
Definition: equationdetect_test.cc:90
BSTT_DIGIT
Definition: blobbox.h:98
BSTT_UNCLEAR
Definition: blobbox.h:100
BLOBNBOX::special_text_type
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:288
tesseract::Textord::find_components
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:215
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
BLOCK
Definition: ocrblock.h:28
CHECK
#define CHECK(test)
Definition: include_gunit.h:57
tesseract::EquationDetect::NO_INDENT
Definition: equationdetect.h:45
tesseract::EquationDetect::EstimateTypeForUnichar
BlobSpecialTextType EstimateTypeForUnichar(const UNICHARSET &unicharset, const UNICHAR_ID id) const
Definition: equationdetect.cpp:224
tesseract::EquationFinderTest::ClearParts
void ClearParts(std::vector< ColPartition * > *all_parts)
Definition: equationdetect_test.cc:165
tesseract::TestableEquationDetect::TestComputeCPsSuperBBox
void TestComputeCPsSuperBBox(const TBOX &box, ColPartitionGrid *part_grid)
Definition: equationdetect_test.cc:104
tesseract::EquationDetect::IsNearSmallNeighbor
bool IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const
Definition: equationdetect.cpp:1270
BRT_TEXT
Definition: blobbox.h:79
equationdetect.h
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
tesseract::EquationDetect::SplitCPHorLite
void SplitCPHorLite(ColPartition *part, GenericVector< TBOX > *splitted_boxes)
Definition: equationdetect.cpp:697
tesseract::EquationDetect::IdentifySpecialText
void IdentifySpecialText()
Definition: equationdetect.cpp:258
tesseract::EquationDetect::CountAlignment
int CountAlignment(const GenericVector< int > &sorted_vec, const int val) const
Definition: equationdetect.cpp:759
GenericVector::empty
bool empty() const
Definition: genericvector.h:86
UNICHARSET
Definition: unicharset.h:145
tesseract::TestableEquationDetect
Definition: equationdetect_test.cc:27
tesseract::TestableEquationDetect::RunIsIndented
EquationDetect::IndentType RunIsIndented(ColPartitionGrid *part_grid, ColPartition *part)
Definition: equationdetect_test.cc:72
tesseract::EquationDetect::IsIndented
IndentType IsIndented(ColPartition *part)
Definition: equationdetect.cpp:1020
tesseract::EquationDetect::cps_super_bbox_
TBOX * cps_super_bbox_
Definition: equationdetect.h:259
tesseract::EquationDetect::RIGHT_INDENT
Definition: equationdetect.h:47
tesseract
Definition: baseapi.h:65
tesseract::EquationDetect::ComputeForegroundDensity
float ComputeForegroundDensity(const TBOX &tbox)
Definition: equationdetect.cpp:611
tesseract::EquationDetect::lang_tesseract_
Tesseract * lang_tesseract_
Definition: equationdetect.h:247
tesseract::EquationDetect::CheckSeedBlobsCount
bool CheckSeedBlobsCount(ColPartition *part)
Definition: equationdetect.cpp:983
tesseract::EquationDetect::IndentType
IndentType
Definition: equationdetect.h:44
tesseract::Tesseract::mutable_pix_binary
Pix ** mutable_pix_binary()
Definition: tesseractclass.h:196
tesseract::EquationFinderTest::TearDown
void TearDown()
Definition: equationdetect_test.cc:132
tesseract::TestableEquationDetect::RunSplitCPHor
void RunSplitCPHor(ColPartition *part, GenericVector< ColPartition * > *parts_splitted)
Definition: equationdetect_test.cc:99
tesseract::ColPartition::FakePartition
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
Definition: colpartition.cpp:95
GenericVector< int >
tesseract::EquationDetect::ComputeCPsSuperBBox
void ComputeCPsSuperBBox()
Definition: equationdetect.cpp:791
tesseract::EquationDetect::part_grid_
ColPartitionGrid * part_grid_
Definition: equationdetect.h:251
CHECK_LE
#define CHECK_LE(test, value)
Definition: include_gunit.h:61
count
int count(LIST var_list)
Definition: oldlist.cpp:79
tesseract::ColPartition::AddBox
void AddBox(BLOBNBOX *box)
Definition: colpartition.cpp:169
tesseract::EquationDetect::BOTH_INDENT
Definition: equationdetect.h:48
tesseract::Textord
Definition: textord.h:68
tesseract::ColPartitionGrid
Definition: colpartitiongrid.h:32
PT_FLOWING_TEXT
Definition: capi.h:109
tesseract::ColPartition::DeleteBoxes
void DeleteBoxes()
Definition: colpartition.cpp:305
BSTT_ITALIC
Definition: blobbox.h:97
tesseract::TestableEquationDetect::RunIdentifySpecialText
void RunIdentifySpecialText(BLOBNBOX *blob, const int height_th)
Definition: equationdetect_test.cc:63
tesseract::TestableEquationDetect::RunComputeForegroundDensity
float RunComputeForegroundDensity(const TBOX &tbox)
Definition: equationdetect_test.cc:86
tesseract::TestableEquationDetect::AddMathDigitBlobs
void AddMathDigitBlobs(const int math_blobs, const int digit_blobs, const int total_blobs, ColPartition *part)
Definition: equationdetect_test.cc:35
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::OEM_TESSERACT_ONLY
Definition: publictypes.h:266
BSTT_NONE
Definition: blobbox.h:96
colpartitiongrid.h
CHECK_EQ
#define CHECK_EQ(test, value)
Definition: include_gunit.h:58
tesseract::TestableEquationDetect::TestableEquationDetect
TestableEquationDetect(const char *tessdata, Tesseract *lang_tesseract)
Definition: equationdetect_test.cc:29
tesseract::ColPartition::set_median_width
void set_median_width(int width)
Definition: colpartition.h:145
TBOX
Definition: rect.h:33