tesseract  5.0.0-alpha-619-ge9db
pageiterator.cpp
Go to the documentation of this file.
1 // File: pageiterator.cpp
3 // Description: Iterator for tesseract page structure that avoids using
4 // tesseract internal data structures.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2010, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include <tesseract/pageiterator.h>
21 #include "allheaders.h"
22 #include <tesseract/helpers.h>
23 #include "pageres.h"
24 #include "tesseractclass.h"
25 
26 #include <algorithm>
27 
28 namespace tesseract {
29 
31  int scaled_yres, int rect_left, int rect_top,
32  int rect_width, int rect_height)
33  : page_res_(page_res),
34  tesseract_(tesseract),
35  word_(nullptr),
36  word_length_(0),
37  blob_index_(0),
38  cblob_it_(nullptr),
39  include_upper_dots_(false),
40  include_lower_dots_(false),
41  scale_(scale),
42  scaled_yres_(scaled_yres),
43  rect_left_(rect_left),
44  rect_top_(rect_top),
45  rect_width_(rect_width),
46  rect_height_(rect_height) {
47  it_ = new PAGE_RES_IT(page_res);
49 }
50 
52  delete it_;
53  delete cblob_it_;
54 }
55 
62  : page_res_(src.page_res_),
63  tesseract_(src.tesseract_),
64  word_(nullptr),
65  word_length_(src.word_length_),
66  blob_index_(src.blob_index_),
67  cblob_it_(nullptr),
68  include_upper_dots_(src.include_upper_dots_),
69  include_lower_dots_(src.include_lower_dots_),
70  scale_(src.scale_),
71  scaled_yres_(src.scaled_yres_),
72  rect_left_(src.rect_left_),
73  rect_top_(src.rect_top_),
74  rect_width_(src.rect_width_),
75  rect_height_(src.rect_height_) {
76  it_ = new PAGE_RES_IT(*src.it_);
78 }
79 
81  page_res_ = src.page_res_;
82  tesseract_ = src.tesseract_;
85  scale_ = src.scale_;
87  rect_left_ = src.rect_left_;
88  rect_top_ = src.rect_top_;
91  delete it_;
92  it_ = new PAGE_RES_IT(*src.it_);
94  return *this;
95 }
96 
98  return (it_ == nullptr && it_ == other) ||
99  ((other != nullptr) && (it_ != nullptr) && (*it_ == *other));
100 }
101 
102 // ============= Moving around within the page ============.
103 
107  BeginWord(0);
108 }
109 
111  if (it_->block() == nullptr) return; // At end of the document.
112  PAGE_RES_IT para(page_res_);
113  PAGE_RES_IT next_para(para);
114  next_para.forward_paragraph();
115  while (next_para.cmp(*it_) <= 0) {
116  para = next_para;
117  next_para.forward_paragraph();
118  }
119  *it_ = para;
120  BeginWord(0);
121 }
122 
124  PageIterator p_start(*this);
125  p_start.RestartParagraph();
126  return p_start.it_->row() == it_->row();
127 }
128 
130  it_->restart_row();
131  BeginWord(0);
132 }
133 
148  if (it_->block() == nullptr) return false; // Already at the end!
149  if (it_->word() == nullptr)
150  level = RIL_BLOCK;
151 
152  switch (level) {
153  case RIL_BLOCK:
154  it_->forward_block();
155  break;
156  case RIL_PARA:
158  break;
159  case RIL_TEXTLINE:
160  for (it_->forward_with_empties(); it_->row() == it_->prev_row();
162  break;
163  case RIL_WORD:
165  break;
166  case RIL_SYMBOL:
167  if (cblob_it_ != nullptr)
168  cblob_it_->forward();
169  ++blob_index_;
170  if (blob_index_ >= word_length_)
172  else
173  return true;
174  break;
175  }
176  BeginWord(0);
177  return it_->block() != nullptr;
178 }
179 
186  if (it_->block() == nullptr) return false; // Already at the end!
187  if (it_->word() == nullptr) return true; // In an image block.
188  switch (level) {
189  case RIL_BLOCK:
190  return blob_index_ == 0 && it_->block() != it_->prev_block();
191  case RIL_PARA:
192  return blob_index_ == 0 &&
193  (it_->block() != it_->prev_block() ||
194  it_->row()->row->para() != it_->prev_row()->row->para());
195  case RIL_TEXTLINE:
196  return blob_index_ == 0 && it_->row() != it_->prev_row();
197  case RIL_WORD:
198  return blob_index_ == 0;
199  case RIL_SYMBOL:
200  return true;
201  }
202  return false;
203 }
204 
210  PageIteratorLevel element) const {
211  if (Empty(element)) return true; // Already at the end!
212  // The result is true if we step forward by element and find we are
213  // at the the end of the page or at beginning of *all* levels in:
214  // [level, element).
215  // When there is more than one level difference between element and level,
216  // we could for instance move forward one symbol and still be at the first
217  // word on a line, so we also have to be at the first symbol in a word.
218  PageIterator next(*this);
219  next.Next(element);
220  if (next.Empty(element)) return true; // Reached the end of the page.
221  while (element > level) {
222  element = static_cast<PageIteratorLevel>(element - 1);
223  if (!next.IsAtBeginningOf(element))
224  return false;
225  }
226  return true;
227 }
228 
235 int PageIterator::Cmp(const PageIterator &other) const {
236  int word_cmp = it_->cmp(*other.it_);
237  if (word_cmp != 0)
238  return word_cmp;
239  if (blob_index_ < other.blob_index_)
240  return -1;
241  if (blob_index_ == other.blob_index_)
242  return 0;
243  return 1;
244 }
245 
246 // ============= Accessing data ==============.
247 // Coordinate system:
248 // Integer coordinates are at the cracks between the pixels.
249 // The top-left corner of the top-left pixel in the image is at (0,0).
250 // The bottom-right corner of the bottom-right pixel in the image is at
251 // (width, height).
252 // Every bounding box goes from the top-left of the top-left contained
253 // pixel to the bottom-right of the bottom-right contained pixel, so
254 // the bounding box of the single top-left pixel in the image is:
255 // (0,0)->(1,1).
256 // If an image rectangle has been set in the API, then returned coordinates
257 // relate to the original (full) image, rather than the rectangle.
258 
266  int* left, int* top,
267  int* right, int* bottom) const {
268  if (Empty(level))
269  return false;
270  TBOX box;
271  PARA *para = nullptr;
272  switch (level) {
273  case RIL_BLOCK:
276  break;
277  case RIL_PARA:
278  para = it_->row()->row->para();
279  // Fall through.
280  case RIL_TEXTLINE:
283  break;
284  case RIL_WORD:
287  break;
288  case RIL_SYMBOL:
289  if (cblob_it_ == nullptr)
290  box = it_->word()->box_word->BlobBox(blob_index_);
291  else
292  box = cblob_it_->data()->bounding_box();
293  }
294  if (level == RIL_PARA) {
295  PageIterator other = *this;
296  other.Begin();
297  do {
298  if (other.it_->block() &&
299  other.it_->block()->block == it_->block()->block &&
300  other.it_->row() && other.it_->row()->row &&
301  other.it_->row()->row->para() == para) {
302  box = box.bounding_union(other.it_->row()->row->bounding_box());
303  }
304  } while (other.Next(RIL_TEXTLINE));
305  }
306  if (level != RIL_SYMBOL || cblob_it_ != nullptr)
307  box.rotate(it_->block()->block->re_rotation());
308  // Now we have a box in tesseract coordinates relative to the image rectangle,
309  // we have to convert the coords to a top-down system.
310  const int pix_height = pixGetHeight(tesseract_->pix_binary());
311  const int pix_width = pixGetWidth(tesseract_->pix_binary());
312  *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
313  *top = ClipToRange(pix_height - box.top(), 0, pix_height);
314  *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
315  *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
316  return true;
317 }
318 
326  int* left, int* top,
327  int* right, int* bottom) const {
328  return BoundingBox(level, 0, left, top, right, bottom);
329 }
330 
331 bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
332  int* left, int* top,
333  int* right, int* bottom) const {
334  if (!BoundingBoxInternal(level, left, top, right, bottom))
335  return false;
336  // Convert to the coordinate system of the original image.
337  *left = ClipToRange(*left / scale_ + rect_left_ - padding,
339  *top = ClipToRange(*top / scale_ + rect_top_ - padding,
341  *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
342  *left, rect_left_ + rect_width_);
343  *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
344  *top, rect_top_ + rect_height_);
345  return true;
346 }
347 
350  if (it_->block() == nullptr) return true; // Already at the end!
351  if (it_->word() == nullptr && level != RIL_BLOCK) return true; // image block
352  if (level == RIL_SYMBOL && blob_index_ >= word_length_)
353  return true; // Zero length word, or already at the end of it.
354  return false;
355 }
356 
359  if (it_->block() == nullptr || it_->block()->block == nullptr)
360  return PT_UNKNOWN; // Already at the end!
361  if (it_->block()->block->pdblk.poly_block() == nullptr)
362  return PT_FLOWING_TEXT; // No layout analysis used - assume text.
363  return it_->block()->block->pdblk.poly_block()->isA();
364 }
365 
369  if (it_->block() == nullptr || it_->block()->block == nullptr)
370  return nullptr; // Already at the end!
371  if (it_->block()->block->pdblk.poly_block() == nullptr)
372  return nullptr; // No layout analysis used - no polygon.
373  // Copy polygon, so we can unrotate it to image coordinates.
374  POLY_BLOCK* internal_poly = it_->block()->block->pdblk.poly_block();
375  ICOORDELT_LIST vertices;
376  vertices.deep_copy(internal_poly->points(), ICOORDELT::deep_copy);
377  POLY_BLOCK poly(&vertices, internal_poly->isA());
378  poly.rotate(it_->block()->block->re_rotation());
379  ICOORDELT_IT it(poly.points());
380  Pta* pta = ptaCreate(it.length());
381  int num_pts = 0;
382  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
383  ICOORD* pt = it.data();
384  // Convert to top-down coords within the input image.
385  int x = static_cast<float>(pt->x()) / scale_ + rect_left_;
386  int y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_;
389  ptaAddPt(pta, x, y);
390  }
391  return pta;
392 }
393 
417  int left, top, right, bottom;
418  if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
419  return nullptr;
420  if (level == RIL_SYMBOL && cblob_it_ != nullptr &&
421  cblob_it_->data()->area() != 0)
422  return cblob_it_->data()->render();
423  Box* box = boxCreate(left, top, right - left, bottom - top);
424  Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
425  boxDestroy(&box);
426  if (level == RIL_BLOCK || level == RIL_PARA) {
427  // Clip to the block polygon as well.
428  TBOX mask_box;
429  Pix* mask = it_->block()->block->render_mask(&mask_box);
430  int mask_x = left - mask_box.left();
431  int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
432  // AND the mask and pix, putting the result in pix.
433  pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix),
434  pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x),
435  std::max(0, mask_y));
436  pixDestroy(&mask);
437  }
438  return pix;
439 }
440 
453  Pix* original_img,
454  int* left, int* top) const {
455  int right, bottom;
456  if (!BoundingBox(level, left, top, &right, &bottom))
457  return nullptr;
458  if (original_img == nullptr)
459  return GetBinaryImage(level);
460 
461  // Expand the box.
462  *left = std::max(*left - padding, 0);
463  *top = std::max(*top - padding, 0);
464  right = std::min(right + padding, rect_width_);
465  bottom = std::min(bottom + padding, rect_height_);
466  Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
467  Pix* grey_pix = pixClipRectangle(original_img, box, nullptr);
468  boxDestroy(&box);
469  if (level == RIL_BLOCK || level == RIL_PARA) {
470  // Clip to the block polygon as well.
471  TBOX mask_box;
472  Pix* mask = it_->block()->block->render_mask(&mask_box);
473  // Copy the mask registered correctly into an image the size of grey_pix.
474  int mask_x = *left - mask_box.left();
475  int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
476  int width = pixGetWidth(grey_pix);
477  int height = pixGetHeight(grey_pix);
478  Pix* resized_mask = pixCreate(width, height, 1);
479  pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height,
480  PIX_SRC, mask, std::max(0, mask_x), std::max(0, mask_y));
481  pixDestroy(&mask);
482  pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
483  2 * padding + 1);
484  pixInvert(resized_mask, resized_mask);
485  pixSetMasked(grey_pix, resized_mask, UINT32_MAX);
486  pixDestroy(&resized_mask);
487  }
488  return grey_pix;
489 }
490 
497  int* x1, int* y1, int* x2, int* y2) const {
498  if (it_->word() == nullptr) return false; // Already at the end!
499  ROW* row = it_->row()->row;
500  WERD* word = it_->word()->word;
501  TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
502  ? word->bounding_box()
503  : row->bounding_box();
504  int left = box.left();
505  ICOORD startpt(left, static_cast<int16_t>(row->base_line(left) + 0.5));
506  int right = box.right();
507  ICOORD endpt(right, static_cast<int16_t>(row->base_line(right) + 0.5));
508  // Rotate to image coordinates and convert to global image coords.
509  startpt.rotate(it_->block()->block->re_rotation());
510  endpt.rotate(it_->block()->block->re_rotation());
511  *x1 = startpt.x() / scale_ + rect_left_;
512  *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
513  *x2 = endpt.x() / scale_ + rect_left_;
514  *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
515  return true;
516 }
517 
519  tesseract::WritingDirection *writing_direction,
520  tesseract::TextlineOrder *textline_order,
521  float *deskew_angle) const {
522  BLOCK* block = it_->block()->block;
523 
524  // Orientation
525  FCOORD up_in_image(0.0, 1.0);
526  up_in_image.unrotate(block->classify_rotation());
527  up_in_image.rotate(block->re_rotation());
528 
529  if (up_in_image.x() == 0.0F) {
530  if (up_in_image.y() > 0.0F) {
531  *orientation = ORIENTATION_PAGE_UP;
532  } else {
533  *orientation = ORIENTATION_PAGE_DOWN;
534  }
535  } else if (up_in_image.x() > 0.0F) {
536  *orientation = ORIENTATION_PAGE_RIGHT;
537  } else {
538  *orientation = ORIENTATION_PAGE_LEFT;
539  }
540 
541  // Writing direction
542  bool is_vertical_text = (block->classify_rotation().x() == 0.0);
543  bool right_to_left = block->right_to_left();
544  *writing_direction =
545  is_vertical_text
547  : (right_to_left
550 
551  // Textline Order
552  const bool is_mongolian = false; // TODO(eger): fix me
553  *textline_order = is_vertical_text
554  ? (is_mongolian
558 
559  // Deskew angle
560  FCOORD skew = block->skew(); // true horizontal for textlines
561  *deskew_angle = -skew.angle();
562 }
563 
565  bool *is_list_item,
566  bool *is_crown,
567  int *first_line_indent) const {
569  if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
570  !it_->row()->row->para()->model)
571  return;
572 
573  PARA *para = it_->row()->row->para();
574  *is_list_item = para->is_list_item;
575  *is_crown = para->is_very_first_or_continuation;
576  *first_line_indent = para->model->first_indent() -
577  para->model->body_indent();
578  *just = para->model->justification();
579 }
580 
585 void PageIterator::BeginWord(int offset) {
586  WERD_RES* word_res = it_->word();
587  if (word_res == nullptr) {
588  // This is a non-text block, so there is no word.
589  word_length_ = 0;
590  blob_index_ = 0;
591  word_ = nullptr;
592  return;
593  }
594  if (word_res->best_choice != nullptr) {
595  // Recognition has been done, so we are using the box_word, which
596  // is already baseline denormalized.
597  word_length_ = word_res->best_choice->length();
598  if (word_res->box_word != nullptr) {
599  if (word_res->box_word->length() != word_length_) {
600  tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
602  word_res->box_word->length());
603  word_res->box_word->bounding_box().print();
604  }
605  ASSERT_HOST(word_res->box_word->length() == word_length_);
606  }
607  word_ = nullptr;
608  // We will be iterating the box_word.
609  delete cblob_it_;
610  cblob_it_ = nullptr;
611  } else {
612  // No recognition yet, so a "symbol" is a cblob.
613  word_ = word_res->word;
614  ASSERT_HOST(word_->cblob_list() != nullptr);
615  word_length_ = word_->cblob_list()->length();
616  if (cblob_it_ == nullptr) cblob_it_ = new C_BLOB_IT;
617  cblob_it_->set_to_list(word_->cblob_list());
618  }
619  for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
620  if (cblob_it_ != nullptr)
621  cblob_it_->forward();
622  }
623 }
624 
626  if (it_->word() != nullptr) {
627  it_->word()->blamer_bundle = blamer_bundle;
628  return true;
629  } else {
630  return false;
631  }
632 }
633 
634 } // namespace tesseract.
FCOORD::unrotate
void unrotate(const FCOORD &vec)
Definition: points.h:745
tesseract::PageIterator::Cmp
int Cmp(const PageIterator &other) const
Definition: pageiterator.cpp:235
ParagraphModel::body_indent
int body_indent() const
Definition: ocrpara.h:169
WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:529
tesseract::WRITING_DIRECTION_LEFT_TO_RIGHT
Definition: publictypes.h:132
ClipToRange
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:106
ROW::para
PARA * para() const
Definition: ocrrow.h:117
tesseract::RIL_WORD
Definition: publictypes.h:220
WERD_RES::box_word
tesseract::BoxWord * box_word
Definition: pageres.h:266
FCOORD::angle
float angle() const
find angle
Definition: points.h:246
tesseract::PageIterator::BlockPolygon
Pta * BlockPolygon() const
Definition: pageiterator.cpp:368
pageres.h
tesseract::PageIterator::RestartRow
virtual void RestartRow()
Definition: pageiterator.cpp:129
PAGE_RES_IT::forward_with_empties
WERD_RES * forward_with_empties()
Definition: pageres.h:732
BLOCK::skew
FCOORD skew() const
Definition: ocrblock.h:145
ROW::base_line
float base_line(float xpos) const
Definition: ocrrow.h:58
tesseract::PageIterator::rect_height_
int rect_height_
Definition: pageiterator.h:357
tesseract::PageIterator::it_
PAGE_RES_IT * it_
Definition: pageiterator.h:332
tesseractclass.h
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
PAGE_RES_IT::forward_paragraph
WERD_RES * forward_paragraph()
Definition: pageres.cpp:1637
tesseract::TEXTLINE_ORDER_RIGHT_TO_LEFT
Definition: publictypes.h:150
PARA::is_list_item
bool is_list_item
Definition: ocrpara.h:38
tesseract::PageIterator::page_res_
PAGE_RES * page_res_
Definition: pageiterator.h:325
tesseract::RIL_BLOCK
Definition: publictypes.h:217
tesseract::PageIterator::RestartParagraph
virtual void RestartParagraph()
Definition: pageiterator.cpp:110
PAGE_RES_IT::block
BLOCK_RES * block() const
Definition: pageres.h:754
FCOORD::y
float y() const
Definition: points.h:209
PAGE_RES_IT::row
ROW_RES * row() const
Definition: pageres.h:751
ICOORD
integer coordinate
Definition: points.h:30
ICOORD::rotate
void rotate(const FCOORD &vec)
Definition: points.h:522
tesseract::Tesseract
Definition: tesseractclass.h:172
tesseract::PageIterator::operator=
const PageIterator & operator=(const PageIterator &src)
Definition: pageiterator.cpp:80
TBOX::print
void print() const
Definition: rect.h:277
tesseract::PageIterator
Definition: pageiterator.h:52
tesseract::ParagraphJustification
ParagraphJustification
Definition: publictypes.h:248
tesseract::PageIterator::scale_
int scale_
Definition: pageiterator.h:352
FCOORD::x
float x() const
Definition: points.h:206
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::ORIENTATION_PAGE_RIGHT
Definition: publictypes.h:118
TBOX::bounding_union
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:124
WERD_RES
Definition: pageres.h:160
BLOCK::render_mask
Pix * render_mask(TBOX *mask_box)
Definition: ocrblock.h:159
tesseract::WRITING_DIRECTION_TOP_TO_BOTTOM
Definition: publictypes.h:134
PAGE_RES_IT::prev_row
ROW_RES * prev_row() const
Definition: pageres.h:742
tesseract::WritingDirection
WritingDirection
Definition: publictypes.h:131
tesseract::ORIENTATION_PAGE_LEFT
Definition: publictypes.h:120
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
FCOORD
Definition: points.h:187
TBOX::rotate
void rotate(const FCOORD &vec)
Definition: rect.h:196
tesseract::PageIterator::IsWithinFirstTextlineOfParagraph
bool IsWithinFirstTextlineOfParagraph() const
Definition: pageiterator.cpp:123
BLOCK::right_to_left
bool right_to_left() const
Definition: ocrblock.h:78
tesseract::PageIterator::BoundingBoxInternal
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
Definition: pageiterator.cpp:265
ParagraphModel::first_indent
int first_indent() const
Definition: ocrpara.h:168
WERD::cblob_list
C_BLOB_LIST * cblob_list()
Definition: werd.h:94
WERD_RES::blamer_bundle
BlamerBundle * blamer_bundle
Definition: pageres.h:246
tesseract::RIL_SYMBOL
Definition: publictypes.h:221
tesseract::ORIENTATION_PAGE_DOWN
Definition: publictypes.h:119
tesseract::PageIterator::include_lower_dots_
bool include_lower_dots_
Definition: pageiterator.h:350
tesseract::PageIterator::GetImage
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
Definition: pageiterator.cpp:452
POLY_BLOCK::rotate
void rotate(FCOORD rotation)
Definition: polyblk.cpp:183
tesseract::PageIterator::Orientation
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
Definition: pageiterator.cpp:518
tesseract::PageIterator::word_length_
int word_length_
Definition: pageiterator.h:339
tesseract::PageIterator::SetWordBlamerBundle
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle)
Definition: pageiterator.cpp:625
BLOCK
Definition: ocrblock.h:28
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
PAGE_RES_IT::restart_row
WERD_RES * restart_row()
Definition: pageres.cpp:1623
tesseract::TextlineOrder
TextlineOrder
Definition: publictypes.h:148
PAGE_RES_IT::forward_block
WERD_RES * forward_block()
Definition: pageres.cpp:1651
tesseract::WRITING_DIRECTION_RIGHT_TO_LEFT
Definition: publictypes.h:133
tesseract::BoxWord::BlobBox
const TBOX & BlobBox(int index) const
Definition: boxword.h:83
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
ROW::restricted_bounding_box
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: ocrrow.cpp:81
tesseract::PageIterator::Empty
bool Empty(PageIteratorLevel level) const
Definition: pageiterator.cpp:349
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::PageIterator::Next
virtual bool Next(PageIteratorLevel level)
Definition: pageiterator.cpp:147
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
ROW_RES::row
ROW * row
Definition: pageres.h:136
tesseract::JUSTIFICATION_UNKNOWN
Definition: publictypes.h:249
tesseract::PageIterator::tesseract_
Tesseract * tesseract_
Definition: pageiterator.h:327
tesseract::PageIterator::IsAtFinalElement
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
Definition: pageiterator.cpp:209
tesseract::PageIterator::BlockType
PolyBlockType BlockType() const
Definition: pageiterator.cpp:358
tesseract::TEXTLINE_ORDER_LEFT_TO_RIGHT
Definition: publictypes.h:149
tesseract::PageIterator::GetBinaryImage
Pix * GetBinaryImage(PageIteratorLevel level) const
Definition: pageiterator.cpp:416
ICOORDELT::deep_copy
static ICOORDELT * deep_copy(const ICOORDELT *src)
Definition: points.h:178
tesseract::TEXTLINE_ORDER_TOP_TO_BOTTOM
Definition: publictypes.h:151
PAGE_RES_IT::prev_block
BLOCK_RES * prev_block() const
Definition: pageres.h:745
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::PageIteratorLevel
PageIteratorLevel
Definition: publictypes.h:216
pageiterator.h
tesseract::PageIterator::word_
WERD * word_
Definition: pageiterator.h:337
tesseract::Tesseract::ImageHeight
int ImageHeight() const
Definition: tesseractclass.h:253
ROW::bounding_box
TBOX bounding_box() const
Definition: ocrrow.h:87
BLOCK::restricted_bounding_box
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: ocrblock.cpp:84
tesseract::PageIterator::IsAtBeginningOf
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
Definition: pageiterator.cpp:185
helpers.h
tesseract
Definition: baseapi.h:65
PAGE_RES_IT::word
WERD_RES * word() const
Definition: pageres.h:748
PAGE_RES
Definition: pageres.h:73
FCOORD::rotate
void rotate(const FCOORD vec)
Definition: points.h:736
tesseract::PageIterator::include_upper_dots_
bool include_upper_dots_
Definition: pageiterator.h:349
tesseract::PageIterator::cblob_it_
C_BLOB_IT * cblob_it_
Definition: pageiterator.h:347
PT_UNKNOWN
Definition: capi.h:108
tesseract::RIL_TEXTLINE
Definition: publictypes.h:219
PAGE_RES_IT
Definition: pageres.h:668
POLY_BLOCK::points
ICOORDELT_LIST * points()
Definition: polyblk.h:52
tesseract::ORIENTATION_PAGE_UP
Definition: publictypes.h:117
tesseract::PageIterator::Begin
virtual void Begin()
Definition: pageiterator.cpp:105
tesseract::PageIterator::BoundingBox
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
Definition: pageiterator.cpp:325
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
tesseract::BoxWord::length
int length() const
Definition: boxword.h:82
tesseract::PageIterator::PositionedAtSameWord
bool PositionedAtSameWord(const PAGE_RES_IT *other) const
Definition: pageiterator.cpp:97
ParagraphModel::justification
tesseract::ParagraphJustification justification() const
Definition: ocrpara.h:164
WERD
Definition: werd.h:55
PAGE_RES_IT::cmp
int cmp(const PAGE_RES_IT &other) const
Definition: pageres.cpp:1141
BLOCK_RES::block
BLOCK * block
Definition: pageres.h:113
tesseract::Tesseract::pix_binary
Pix * pix_binary() const
Definition: tesseractclass.h:200
TBOX::left
int16_t left() const
Definition: rect.h:71
WERD::restricted_bounding_box
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: werd.cpp:151
ROW
Definition: ocrrow.h:35
tesseract::PageIterator::rect_top_
int rect_top_
Definition: pageiterator.h:355
PT_FLOWING_TEXT
Definition: capi.h:109
TBOX::right
int16_t right() const
Definition: rect.h:78
PARA::model
const ParagraphModel * model
Definition: ocrpara.h:36
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
BLOCK::classify_rotation
FCOORD classify_rotation() const
Definition: ocrblock.h:139
tesseract::PageIterator::~PageIterator
virtual ~PageIterator()
Definition: pageiterator.cpp:51
tesseract::PageIterator::scaled_yres_
int scaled_yres_
Definition: pageiterator.h:353
POLY_BLOCK
Definition: polyblk.h:26
PARA
Definition: ocrpara.h:29
PARA::is_very_first_or_continuation
bool is_very_first_or_continuation
Definition: ocrpara.h:43
WERD_RES::word
WERD * word
Definition: pageres.h:180
tesseract::PageIterator::BeginWord
TESS_LOCAL void BeginWord(int offset)
Definition: pageiterator.cpp:585
PolyBlockType
PolyBlockType
Definition: publictypes.h:52
BLOCK::re_rotation
FCOORD re_rotation() const
Definition: ocrblock.h:133
POLY_BLOCK::isA
PolyBlockType isA() const
Definition: polyblk.h:58
tesseract::Orientation
Orientation
Definition: publictypes.h:116
BlamerBundle
Definition: blamer.h:103
tesseract::BoxWord::bounding_box
const TBOX & bounding_box() const
Definition: boxword.h:79
tesseract::PageIterator::rect_width_
int rect_width_
Definition: pageiterator.h:356
tesseract::PageIterator::rect_left_
int rect_left_
Definition: pageiterator.h:354
tesseract::RIL_PARA
Definition: publictypes.h:218
PAGE_RES_IT::restart_page_with_empties
WERD_RES * restart_page_with_empties()
Definition: pageres.h:698
tesseract::PageIterator::ParagraphInfo
void ParagraphInfo(tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
Definition: pageiterator.cpp:564
tesseract::PageIterator::Baseline
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
Definition: pageiterator.cpp:496
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
TBOX
Definition: rect.h:33
tesseract::PageIterator::blob_index_
int blob_index_
Definition: pageiterator.h:341
tesseract::PageIterator::PageIterator
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
Definition: pageiterator.cpp:30