22 #include "allheaders.h" 32 int scaled_yres,
int rect_left,
int rect_top,
33 int rect_width,
int rect_height)
34 : page_res_(page_res),
40 include_upper_dots_(false),
41 include_lower_dots_(false),
43 scaled_yres_(scaled_yres),
44 rect_left_(rect_left),
46 rect_width_(rect_width),
47 rect_height_(rect_height) {
63 : page_res_(src.page_res_),
64 tesseract_(src.tesseract_),
66 word_length_(src.word_length_),
67 blob_index_(src.blob_index_),
69 include_upper_dots_(src.include_upper_dots_),
70 include_lower_dots_(src.include_lower_dots_),
72 scaled_yres_(src.scaled_yres_),
73 rect_left_(src.rect_left_),
74 rect_top_(src.rect_top_),
75 rect_width_(src.rect_width_),
76 rect_height_(src.rect_height_) {
99 return (
it_ ==
nullptr &&
it_ == other) ||
100 ((other !=
nullptr) && (
it_ !=
nullptr) && (*
it_ == *other));
112 if (
it_->
block() ==
nullptr)
return;
116 while (next_para.
cmp(*
it_) <= 0) {
149 if (
it_->
block() ==
nullptr)
return false;
187 if (
it_->
block() ==
nullptr)
return false;
188 if (
it_->
word() ==
nullptr)
return true;
212 if (
Empty(element))
return true;
221 if (next.
Empty(element))
return true;
222 while (element > level) {
268 int* right,
int* bottom)
const {
272 PARA *para =
nullptr;
328 int* right,
int* bottom)
const {
329 return BoundingBox(level, 0, left, top, right, bottom);
334 int* right,
int* bottom)
const {
351 if (
it_->
block() ==
nullptr)
return true;
375 Pta* pta = ptaCreate(it.length());
377 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
410 int left, top, right, bottom;
416 Box* box = boxCreate(left, top, right - left, bottom - top);
423 int mask_x = left - mask_box.
left();
426 pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix),
427 pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x),
428 std::max(0, mask_y));
447 int* left,
int* top)
const {
449 if (!
BoundingBox(level, left, top, &right, &bottom))
451 if (original_img ==
nullptr)
455 *left = std::max(*left - padding, 0);
456 *top = std::max(*top - padding, 0);
459 Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
460 Pix* grey_pix = pixClipRectangle(original_img, box,
nullptr);
467 int mask_x = *left - mask_box.
left();
468 int mask_y = *top - (pixGetHeight(original_img) - mask_box.
top());
469 int width = pixGetWidth(grey_pix);
470 int height = pixGetHeight(grey_pix);
471 Pix* resized_mask = pixCreate(width, height, 1);
472 pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height,
473 PIX_SRC, mask, std::max(0, mask_x), std::max(0, mask_y));
475 pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
477 pixInvert(resized_mask, resized_mask);
478 pixSetMasked(grey_pix, resized_mask, UINT32_MAX);
479 pixDestroy(&resized_mask);
490 int* x1,
int* y1,
int* x2,
int* y2)
const {
491 if (
it_->
word() ==
nullptr)
return false;
497 int left = box.
left();
498 ICOORD startpt(left, static_cast<int16_t>(row->
base_line(left) + 0.5));
499 int right = box.
right();
500 ICOORD endpt(right, static_cast<int16_t>(row->
base_line(right) + 0.5));
514 float *deskew_angle)
const {
518 FCOORD up_in_image(0.0, 1.0);
522 if (up_in_image.
x() == 0.0F) {
523 if (up_in_image.
y() > 0.0F) {
528 }
else if (up_in_image.
x() > 0.0F) {
545 const bool is_mongolian =
false;
546 *textline_order = is_vertical_text
554 *deskew_angle = -skew.
angle();
560 int *first_line_indent)
const {
580 if (word_res ==
nullptr) {
591 if (word_res->
box_word !=
nullptr) {
593 tprintf(
"Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
BLOCK_RES * block() const
bool IsWithinFirstTextlineOfParagraph() const
PolyBlockType BlockType() const
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
void rotate(const FCOORD &vec)
bool right_to_left() const
FCOORD re_rotation() const
bool PositionedAtSameWord(const PAGE_RES_IT *other) const
tesseract::ParagraphJustification justification() const
WERD_RES * forward_paragraph()
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
const char * string() const
TBOX bounding_box() const
float base_line(float xpos) const
void rotate(const FCOORD vec)
int16_t y() const
access_function
float angle() const
find angle
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
TESS_LOCAL void BeginWord(int offset)
const TBOX & BlobBox(int index) const
WERD_RES * forward_block()
ROW_RES * prev_row() const
PolyBlockType isA() const
Pix * render_mask(TBOX *mask_box)
Pta * BlockPolygon() const
int16_t x() const
access function
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
int Cmp(const PageIterator &other) const
const TBOX & bounding_box() const
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
WERD_RES * forward_with_empties()
FCOORD classify_rotation() const
POLY_BLOCK * poly_block() const
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
TBOX bounding_box() const
DLLSYM void tprintf(const char *format,...)
C_BLOB_LIST * cblob_list()
virtual void RestartRow()
BLOCK_RES * prev_block() const
int cmp(const PAGE_RES_IT &other) const
void rotate(const FCOORD &vec)
void ParagraphInfo(tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
virtual void RestartParagraph()
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
const ParagraphModel * model
BlamerBundle * blamer_bundle
void unrotate(const FCOORD &vec)
Pix * GetBinaryImage(PageIteratorLevel level) const
const STRING & unichar_string() const
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle)
bool Empty(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
TBOX bounding_union(const TBOX &box) const
ICOORDELT_LIST * points()
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
const PageIterator & operator=(const PageIterator &src)
WERD_RES * restart_page_with_empties()
WERD_CHOICE * best_choice
bool is_very_first_or_continuation
tesseract::BoxWord * box_word
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const