tesseract  4.0.0-1-g2a2b
pageiterator.cpp
Go to the documentation of this file.
1 // File: pageiterator.cpp
3 // Description: Iterator for tesseract page structure that avoids using
4 // tesseract internal data structures.
5 // Author: Ray Smith
6 // Created: Fri Feb 26 14:32:09 PST 2010
7 //
8 // (C) Copyright 2010, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #include "pageiterator.h"
22 #include "allheaders.h"
23 #include "helpers.h"
24 #include "pageres.h"
25 #include "tesseractclass.h"
26 
27 #include <algorithm>
28 
29 namespace tesseract {
30 
32  int scaled_yres, int rect_left, int rect_top,
33  int rect_width, int rect_height)
34  : page_res_(page_res),
35  tesseract_(tesseract),
36  word_(nullptr),
37  word_length_(0),
38  blob_index_(0),
39  cblob_it_(nullptr),
40  include_upper_dots_(false),
41  include_lower_dots_(false),
42  scale_(scale),
43  scaled_yres_(scaled_yres),
44  rect_left_(rect_left),
45  rect_top_(rect_top),
46  rect_width_(rect_width),
47  rect_height_(rect_height) {
48  it_ = new PAGE_RES_IT(page_res);
50 }
51 
53  delete it_;
54  delete cblob_it_;
55 }
56 
63  : page_res_(src.page_res_),
64  tesseract_(src.tesseract_),
65  word_(nullptr),
66  word_length_(src.word_length_),
67  blob_index_(src.blob_index_),
68  cblob_it_(nullptr),
69  include_upper_dots_(src.include_upper_dots_),
70  include_lower_dots_(src.include_lower_dots_),
71  scale_(src.scale_),
72  scaled_yres_(src.scaled_yres_),
73  rect_left_(src.rect_left_),
74  rect_top_(src.rect_top_),
75  rect_width_(src.rect_width_),
76  rect_height_(src.rect_height_) {
77  it_ = new PAGE_RES_IT(*src.it_);
79 }
80 
82  page_res_ = src.page_res_;
83  tesseract_ = src.tesseract_;
86  scale_ = src.scale_;
88  rect_left_ = src.rect_left_;
89  rect_top_ = src.rect_top_;
92  delete it_;
93  it_ = new PAGE_RES_IT(*src.it_);
95  return *this;
96 }
97 
99  return (it_ == nullptr && it_ == other) ||
100  ((other != nullptr) && (it_ != nullptr) && (*it_ == *other));
101 }
102 
103 // ============= Moving around within the page ============.
104 
108  BeginWord(0);
109 }
110 
112  if (it_->block() == nullptr) return; // At end of the document.
113  PAGE_RES_IT para(page_res_);
114  PAGE_RES_IT next_para(para);
115  next_para.forward_paragraph();
116  while (next_para.cmp(*it_) <= 0) {
117  para = next_para;
118  next_para.forward_paragraph();
119  }
120  *it_ = para;
121  BeginWord(0);
122 }
123 
125  PageIterator p_start(*this);
126  p_start.RestartParagraph();
127  return p_start.it_->row() == it_->row();
128 }
129 
131  it_->restart_row();
132  BeginWord(0);
133 }
134 
149  if (it_->block() == nullptr) return false; // Already at the end!
150  if (it_->word() == nullptr)
151  level = RIL_BLOCK;
152 
153  switch (level) {
154  case RIL_BLOCK:
155  it_->forward_block();
156  break;
157  case RIL_PARA:
159  break;
160  case RIL_TEXTLINE:
161  for (it_->forward_with_empties(); it_->row() == it_->prev_row();
163  break;
164  case RIL_WORD:
166  break;
167  case RIL_SYMBOL:
168  if (cblob_it_ != nullptr)
169  cblob_it_->forward();
170  ++blob_index_;
171  if (blob_index_ >= word_length_)
173  else
174  return true;
175  break;
176  }
177  BeginWord(0);
178  return it_->block() != nullptr;
179 }
180 
187  if (it_->block() == nullptr) return false; // Already at the end!
188  if (it_->word() == nullptr) return true; // In an image block.
189  switch (level) {
190  case RIL_BLOCK:
191  return blob_index_ == 0 && it_->block() != it_->prev_block();
192  case RIL_PARA:
193  return blob_index_ == 0 &&
194  (it_->block() != it_->prev_block() ||
195  it_->row()->row->para() != it_->prev_row()->row->para());
196  case RIL_TEXTLINE:
197  return blob_index_ == 0 && it_->row() != it_->prev_row();
198  case RIL_WORD:
199  return blob_index_ == 0;
200  case RIL_SYMBOL:
201  return true;
202  }
203  return false;
204 }
205 
211  PageIteratorLevel element) const {
212  if (Empty(element)) return true; // Already at the end!
213  // The result is true if we step forward by element and find we are
214  // at the the end of the page or at beginning of *all* levels in:
215  // [level, element).
216  // When there is more than one level difference between element and level,
217  // we could for instance move forward one symbol and still be at the first
218  // word on a line, so we also have to be at the first symbol in a word.
219  PageIterator next(*this);
220  next.Next(element);
221  if (next.Empty(element)) return true; // Reached the end of the page.
222  while (element > level) {
223  element = static_cast<PageIteratorLevel>(element - 1);
224  if (!next.IsAtBeginningOf(element))
225  return false;
226  }
227  return true;
228 }
229 
236 int PageIterator::Cmp(const PageIterator &other) const {
237  int word_cmp = it_->cmp(*other.it_);
238  if (word_cmp != 0)
239  return word_cmp;
240  if (blob_index_ < other.blob_index_)
241  return -1;
242  if (blob_index_ == other.blob_index_)
243  return 0;
244  return 1;
245 }
246 
247 // ============= Accessing data ==============.
248 // Coordinate system:
249 // Integer coordinates are at the cracks between the pixels.
250 // The top-left corner of the top-left pixel in the image is at (0,0).
251 // The bottom-right corner of the bottom-right pixel in the image is at
252 // (width, height).
253 // Every bounding box goes from the top-left of the top-left contained
254 // pixel to the bottom-right of the bottom-right contained pixel, so
255 // the bounding box of the single top-left pixel in the image is:
256 // (0,0)->(1,1).
257 // If an image rectangle has been set in the API, then returned coordinates
258 // relate to the original (full) image, rather than the rectangle.
259 
267  int* left, int* top,
268  int* right, int* bottom) const {
269  if (Empty(level))
270  return false;
271  TBOX box;
272  PARA *para = nullptr;
273  switch (level) {
274  case RIL_BLOCK:
277  break;
278  case RIL_PARA:
279  para = it_->row()->row->para();
280  // explicit fall-through.
281  case RIL_TEXTLINE:
284  break;
285  case RIL_WORD:
288  break;
289  case RIL_SYMBOL:
290  if (cblob_it_ == nullptr)
291  box = it_->word()->box_word->BlobBox(blob_index_);
292  else
293  box = cblob_it_->data()->bounding_box();
294  }
295  if (level == RIL_PARA) {
296  PageIterator other = *this;
297  other.Begin();
298  do {
299  if (other.it_->block() &&
300  other.it_->block()->block == it_->block()->block &&
301  other.it_->row() && other.it_->row()->row &&
302  other.it_->row()->row->para() == para) {
303  box = box.bounding_union(other.it_->row()->row->bounding_box());
304  }
305  } while (other.Next(RIL_TEXTLINE));
306  }
307  if (level != RIL_SYMBOL || cblob_it_ != nullptr)
308  box.rotate(it_->block()->block->re_rotation());
309  // Now we have a box in tesseract coordinates relative to the image rectangle,
310  // we have to convert the coords to a top-down system.
311  const int pix_height = pixGetHeight(tesseract_->pix_binary());
312  const int pix_width = pixGetWidth(tesseract_->pix_binary());
313  *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
314  *top = ClipToRange(pix_height - box.top(), 0, pix_height);
315  *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
316  *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
317  return true;
318 }
319 
327  int* left, int* top,
328  int* right, int* bottom) const {
329  return BoundingBox(level, 0, left, top, right, bottom);
330 }
331 
332 bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
333  int* left, int* top,
334  int* right, int* bottom) const {
335  if (!BoundingBoxInternal(level, left, top, right, bottom))
336  return false;
337  // Convert to the coordinate system of the original image.
338  *left = ClipToRange(*left / scale_ + rect_left_ - padding,
340  *top = ClipToRange(*top / scale_ + rect_top_ - padding,
342  *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
343  *left, rect_left_ + rect_width_);
344  *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
345  *top, rect_top_ + rect_height_);
346  return true;
347 }
348 
351  if (it_->block() == nullptr) return true; // Already at the end!
352  if (it_->word() == nullptr && level != RIL_BLOCK) return true; // image block
353  if (level == RIL_SYMBOL && blob_index_ >= word_length_)
354  return true; // Zero length word, or already at the end of it.
355  return false;
356 }
357 
360  if (it_->block() == nullptr || it_->block()->block == nullptr)
361  return PT_UNKNOWN; // Already at the end!
362  if (it_->block()->block->pdblk.poly_block() == nullptr)
363  return PT_FLOWING_TEXT; // No layout analysis used - assume text.
364  return it_->block()->block->pdblk.poly_block()->isA();
365 }
366 
370  if (it_->block() == nullptr || it_->block()->block == nullptr)
371  return nullptr; // Already at the end!
372  if (it_->block()->block->pdblk.poly_block() == nullptr)
373  return nullptr; // No layout analysis used - no polygon.
374  ICOORDELT_IT it(it_->block()->block->pdblk.poly_block()->points());
375  Pta* pta = ptaCreate(it.length());
376  int num_pts = 0;
377  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
378  ICOORD* pt = it.data();
379  // Convert to top-down coords within the input image.
380  float x = static_cast<float>(pt->x()) / scale_ + rect_left_;
381  float y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_;
382  ptaAddPt(pta, x, y);
383  }
384  return pta;
385 }
386 
410  int left, top, right, bottom;
411  if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
412  return nullptr;
413  if (level == RIL_SYMBOL && cblob_it_ != nullptr &&
414  cblob_it_->data()->area() != 0)
415  return cblob_it_->data()->render();
416  Box* box = boxCreate(left, top, right - left, bottom - top);
417  Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
418  boxDestroy(&box);
419  if (level == RIL_BLOCK || level == RIL_PARA) {
420  // Clip to the block polygon as well.
421  TBOX mask_box;
422  Pix* mask = it_->block()->block->render_mask(&mask_box);
423  int mask_x = left - mask_box.left();
424  int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
425  // AND the mask and pix, putting the result in pix.
426  pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix),
427  pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x),
428  std::max(0, mask_y));
429  pixDestroy(&mask);
430  }
431  return pix;
432 }
433 
446  Pix* original_img,
447  int* left, int* top) const {
448  int right, bottom;
449  if (!BoundingBox(level, left, top, &right, &bottom))
450  return nullptr;
451  if (original_img == nullptr)
452  return GetBinaryImage(level);
453 
454  // Expand the box.
455  *left = std::max(*left - padding, 0);
456  *top = std::max(*top - padding, 0);
457  right = std::min(right + padding, rect_width_);
458  bottom = std::min(bottom + padding, rect_height_);
459  Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
460  Pix* grey_pix = pixClipRectangle(original_img, box, nullptr);
461  boxDestroy(&box);
462  if (level == RIL_BLOCK || level == RIL_PARA) {
463  // Clip to the block polygon as well.
464  TBOX mask_box;
465  Pix* mask = it_->block()->block->render_mask(&mask_box);
466  // Copy the mask registered correctly into an image the size of grey_pix.
467  int mask_x = *left - mask_box.left();
468  int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
469  int width = pixGetWidth(grey_pix);
470  int height = pixGetHeight(grey_pix);
471  Pix* resized_mask = pixCreate(width, height, 1);
472  pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height,
473  PIX_SRC, mask, std::max(0, mask_x), std::max(0, mask_y));
474  pixDestroy(&mask);
475  pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
476  2 * padding + 1);
477  pixInvert(resized_mask, resized_mask);
478  pixSetMasked(grey_pix, resized_mask, UINT32_MAX);
479  pixDestroy(&resized_mask);
480  }
481  return grey_pix;
482 }
483 
490  int* x1, int* y1, int* x2, int* y2) const {
491  if (it_->word() == nullptr) return false; // Already at the end!
492  ROW* row = it_->row()->row;
493  WERD* word = it_->word()->word;
494  TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
495  ? word->bounding_box()
496  : row->bounding_box();
497  int left = box.left();
498  ICOORD startpt(left, static_cast<int16_t>(row->base_line(left) + 0.5));
499  int right = box.right();
500  ICOORD endpt(right, static_cast<int16_t>(row->base_line(right) + 0.5));
501  // Rotate to image coordinates and convert to global image coords.
502  startpt.rotate(it_->block()->block->re_rotation());
503  endpt.rotate(it_->block()->block->re_rotation());
504  *x1 = startpt.x() / scale_ + rect_left_;
505  *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
506  *x2 = endpt.x() / scale_ + rect_left_;
507  *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
508  return true;
509 }
510 
512  tesseract::WritingDirection *writing_direction,
513  tesseract::TextlineOrder *textline_order,
514  float *deskew_angle) const {
515  BLOCK* block = it_->block()->block;
516 
517  // Orientation
518  FCOORD up_in_image(0.0, 1.0);
519  up_in_image.unrotate(block->classify_rotation());
520  up_in_image.rotate(block->re_rotation());
521 
522  if (up_in_image.x() == 0.0F) {
523  if (up_in_image.y() > 0.0F) {
524  *orientation = ORIENTATION_PAGE_UP;
525  } else {
526  *orientation = ORIENTATION_PAGE_DOWN;
527  }
528  } else if (up_in_image.x() > 0.0F) {
529  *orientation = ORIENTATION_PAGE_RIGHT;
530  } else {
531  *orientation = ORIENTATION_PAGE_LEFT;
532  }
533 
534  // Writing direction
535  bool is_vertical_text = (block->classify_rotation().x() == 0.0);
536  bool right_to_left = block->right_to_left();
537  *writing_direction =
538  is_vertical_text
540  : (right_to_left
543 
544  // Textline Order
545  const bool is_mongolian = false; // TODO(eger): fix me
546  *textline_order = is_vertical_text
547  ? (is_mongolian
551 
552  // Deskew angle
553  FCOORD skew = block->skew(); // true horizontal for textlines
554  *deskew_angle = -skew.angle();
555 }
556 
558  bool *is_list_item,
559  bool *is_crown,
560  int *first_line_indent) const {
562  if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
563  !it_->row()->row->para()->model)
564  return;
565 
566  PARA *para = it_->row()->row->para();
567  *is_list_item = para->is_list_item;
568  *is_crown = para->is_very_first_or_continuation;
569  *first_line_indent = para->model->first_indent() -
570  para->model->body_indent();
571  *just = para->model->justification();
572 }
573 
578 void PageIterator::BeginWord(int offset) {
579  WERD_RES* word_res = it_->word();
580  if (word_res == nullptr) {
581  // This is a non-text block, so there is no word.
582  word_length_ = 0;
583  blob_index_ = 0;
584  word_ = nullptr;
585  return;
586  }
587  if (word_res->best_choice != nullptr) {
588  // Recognition has been done, so we are using the box_word, which
589  // is already baseline denormalized.
590  word_length_ = word_res->best_choice->length();
591  if (word_res->box_word != nullptr) {
592  if (word_res->box_word->length() != word_length_) {
593  tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
595  word_res->box_word->length());
596  word_res->box_word->bounding_box().print();
597  }
598  ASSERT_HOST(word_res->box_word->length() == word_length_);
599  }
600  word_ = nullptr;
601  // We will be iterating the box_word.
602  delete cblob_it_;
603  cblob_it_ = nullptr;
604  } else {
605  // No recognition yet, so a "symbol" is a cblob.
606  word_ = word_res->word;
607  ASSERT_HOST(word_->cblob_list() != nullptr);
608  word_length_ = word_->cblob_list()->length();
609  if (cblob_it_ == nullptr) cblob_it_ = new C_BLOB_IT;
610  cblob_it_->set_to_list(word_->cblob_list());
611  }
612  for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
613  if (cblob_it_ != nullptr)
614  cblob_it_->forward();
615  }
616 }
617 
619  if (it_->word() != nullptr) {
620  it_->word()->blamer_bundle = blamer_bundle;
621  return true;
622  } else {
623  return false;
624  }
625 }
626 
627 } // namespace tesseract.
BLOCK_RES * block() const
Definition: pageres.h:757
bool IsWithinFirstTextlineOfParagraph() const
PolyBlockType BlockType() const
int body_indent() const
Definition: ocrpara.h:169
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
ROW_RES * row() const
Definition: pageres.h:754
void rotate(const FCOORD &vec)
Definition: rect.h:197
bool right_to_left() const
Definition: ocrblock.h:81
FCOORD re_rotation() const
Definition: ocrblock.h:136
bool PositionedAtSameWord(const PAGE_RES_IT *other) const
tesseract::ParagraphJustification justification() const
Definition: ocrpara.h:164
WERD_RES * forward_paragraph()
Definition: pageres.cpp:1652
ParagraphJustification
Definition: publictypes.h:251
void print() const
Definition: rect.h:278
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
const char * string() const
Definition: strngs.cpp:196
TBOX bounding_box() const
Definition: werd.cpp:159
float base_line(float xpos) const
Definition: ocrrow.h:59
bool is_list_item
Definition: ocrpara.h:38
void rotate(const FCOORD vec)
Definition: points.h:764
int16_t y() const
access_function
Definition: points.h:57
Definition: rect.h:34
PolyBlockType
Definition: publictypes.h:53
float angle() const
find angle
Definition: points.h:248
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
FCOORD skew() const
Definition: ocrblock.h:148
TESS_LOCAL void BeginWord(int offset)
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
WERD_RES * forward_block()
Definition: pageres.cpp:1667
BLOCK * block
Definition: pageres.h:117
int16_t left() const
Definition: rect.h:72
WERD_RES * restart_row()
Definition: pageres.cpp:1637
int16_t top() const
Definition: rect.h:58
ROW_RES * prev_row() const
Definition: pageres.h:745
PolyBlockType isA() const
Definition: polyblk.h:45
Pix * render_mask(TBOX *mask_box)
Definition: ocrblock.h:162
Pta * BlockPolygon() const
Definition: ocrpara.h:29
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: ocrblock.cpp:89
int Cmp(const PageIterator &other) const
const TBOX & bounding_box() const
Definition: boxword.h:80
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: ocrrow.cpp:85
WERD_RES * forward_with_empties()
Definition: pageres.h:735
int first_indent() const
Definition: ocrpara.h:168
FCOORD classify_rotation() const
Definition: ocrblock.h:142
WERD_RES * word() const
Definition: pageres.h:751
POLY_BLOCK * poly_block() const
Definition: pdblock.h:56
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
Definition: werd.h:59
Pix * pix_binary() const
TBOX bounding_box() const
Definition: ocrrow.h:88
Definition: ocrrow.h:36
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: ocrblock.h:30
int length() const
Definition: ratngs.h:303
C_BLOB_LIST * cblob_list()
Definition: werd.h:98
virtual void RestartRow()
BLOCK_RES * prev_block() const
Definition: pageres.h:748
int cmp(const PAGE_RES_IT &other) const
Definition: pageres.cpp:1201
void rotate(const FCOORD &vec)
Definition: points.h:537
void ParagraphInfo(tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
virtual void RestartParagraph()
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: werd.cpp:163
const ParagraphModel * model
Definition: ocrpara.h:36
int length() const
Definition: boxword.h:83
BlamerBundle * blamer_bundle
Definition: pageres.h:246
Definition: points.h:189
void unrotate(const FCOORD &vec)
Definition: points.h:773
Pix * GetBinaryImage(PageIteratorLevel level) const
const STRING & unichar_string() const
Definition: ratngs.h:541
int16_t right() const
Definition: rect.h:79
float x() const
Definition: points.h:208
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle)
bool Empty(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
ICOORDELT_LIST * points()
Definition: polyblk.h:39
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:111
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
const PageIterator & operator=(const PageIterator &src)
int16_t bottom() const
Definition: rect.h:65
WERD_RES * restart_page_with_empties()
Definition: pageres.h:701
PDBLK pdblk
Definition: ocrblock.h:192
WERD_CHOICE * best_choice
Definition: pageres.h:235
bool is_very_first_or_continuation
Definition: ocrpara.h:43
tesseract::BoxWord * box_word
Definition: pageres.h:266
float y() const
Definition: points.h:211
ROW * row
Definition: pageres.h:143
#define ASSERT_HOST(x)
Definition: errcode.h:84
PARA * para() const
Definition: ocrrow.h:118
WERD * word
Definition: pageres.h:189
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const