tesseract
4.0.0-1-g2a2b
|
#include <resultiterator.h>
Public Member Functions | |
virtual | ~ResultIterator ()=default |
virtual void | Begin () |
virtual bool | Next (PageIteratorLevel level) |
virtual bool | IsAtBeginningOf (PageIteratorLevel level) const |
virtual bool | IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const |
int | BlanksBeforeWord () const |
virtual char * | GetUTF8Text (PageIteratorLevel level) const |
virtual std::vector< std::vector< std::pair< const char *, float > > > * | GetBestLSTMSymbolChoices () const |
bool | ParagraphIsLtr () const |
Public Member Functions inherited from tesseract::LTRResultIterator | |
LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) | |
virtual | ~LTRResultIterator () |
char * | GetUTF8Text (PageIteratorLevel level) const |
void | SetLineSeparator (const char *new_line) |
void | SetParagraphSeparator (const char *new_para) |
float | Confidence (PageIteratorLevel level) const |
void | RowAttributes (float *row_height, float *descenders, float *ascenders) const |
const char * | WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const |
const char * | WordRecognitionLanguage () const |
StrongScriptDirection | WordDirection () const |
bool | WordIsFromDictionary () const |
int | BlanksBeforeWord () const |
bool | WordIsNumeric () const |
bool | HasBlamerInfo () const |
const void * | GetParamsTrainingBundle () const |
const char * | GetBlamerDebug () const |
const char * | GetBlamerMisadaptionDebug () const |
bool | HasTruthString () const |
bool | EquivalentToTruth (const char *str) const |
char * | WordTruthUTF8Text () const |
char * | WordNormedUTF8Text () const |
const char * | WordLattice (int *lattice_size) const |
bool | SymbolIsSuperscript () const |
bool | SymbolIsSubscript () const |
bool | SymbolIsDropcap () const |
Public Member Functions inherited from tesseract::PageIterator | |
PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) | |
virtual | ~PageIterator () |
PageIterator (const PageIterator &src) | |
const PageIterator & | operator= (const PageIterator &src) |
bool | PositionedAtSameWord (const PAGE_RES_IT *other) const |
virtual void | RestartParagraph () |
bool | IsWithinFirstTextlineOfParagraph () const |
virtual void | RestartRow () |
int | Cmp (const PageIterator &other) const |
void | SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots) |
bool | BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const |
bool | BoundingBox (PageIteratorLevel level, const int padding, int *left, int *top, int *right, int *bottom) const |
bool | BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const |
bool | Empty (PageIteratorLevel level) const |
PolyBlockType | BlockType () const |
Pta * | BlockPolygon () const |
Pix * | GetBinaryImage (PageIteratorLevel level) const |
Pix * | GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const |
bool | Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const |
void | Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const |
void | ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const |
bool | SetWordBlamerBundle (BlamerBundle *blamer_bundle) |
Static Public Member Functions | |
static ResultIterator * | StartOfParagraph (const LTRResultIterator &resit) |
static void | CalculateTextlineOrder (bool paragraph_is_ltr, const GenericVector< StrongScriptDirection > &word_dirs, GenericVectorEqEq< int > *reading_order) |
Static Public Attributes | |
static const int | kMinorRunStart = -1 |
static const int | kMinorRunEnd = -2 |
static const int | kComplexWord = -3 |
Protected Member Functions | |
TESS_LOCAL | ResultIterator (const LTRResultIterator &resit) |
Protected Member Functions inherited from tesseract::PageIterator | |
TESS_LOCAL void | BeginWord (int offset) |
Additional Inherited Members | |
Protected Attributes inherited from tesseract::LTRResultIterator | |
const char * | line_separator_ |
const char * | paragraph_separator_ |
Protected Attributes inherited from tesseract::PageIterator | |
PAGE_RES * | page_res_ |
Tesseract * | tesseract_ |
PAGE_RES_IT * | it_ |
WERD * | word_ |
int | word_length_ |
int | blob_index_ |
C_BLOB_IT * | cblob_it_ |
bool | include_upper_dots_ |
bool | include_lower_dots_ |
int | scale_ |
int | scaled_yres_ |
int | rect_left_ |
int | rect_top_ |
int | rect_width_ |
int | rect_height_ |
Definition at line 41 of file resultiterator.h.
|
virtualdefault |
ResultIterator is copy constructible! The default copy constructor works just fine for us.
|
explicitprotected |
We presume the data associated with the given iterator will outlive us. NB: This is private because it does something that is non-obvious: it resets to the beginning of the paragraph instead of staying wherever resit might have pointed.
Definition at line 35 of file resultiterator.cpp.
|
virtual |
Moves the iterator to point to the start of the page to begin an iteration.
Reimplemented from tesseract::PageIterator.
Definition at line 415 of file resultiterator.cpp.
int tesseract::ResultIterator::BlanksBeforeWord | ( | ) | const |
Definition at line 555 of file resultiterator.cpp.
|
static |
Yields the reading order as a sequence of indices and (optional) meta-marks for a set of words (given left-to-right). The meta marks are passed as negative values: kMinorRunStart Start of minor direction text. kMinorRunEnd End of minor direction text. kComplexWord The next indexed word contains both left-to-right and right-to-left characters and was treated as neutral.
For example, suppose we have five words in a text line, indexed [0,1,2,3,4] from the leftmost side of the text line. The following are all believable reading_orders:
Left-to-Right (in ltr paragraph): { 0, 1, 2, 3, 4 } Left-to-Right (in rtl paragraph): { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd } Right-to-Left (in rtl paragraph): { 4, 3, 2, 1, 0 } Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph: { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
Definition at line 257 of file resultiterator.cpp.
|
virtual |
Returns the LSTM choices for every LSTM timestep for the current word.
Definition at line 607 of file resultiterator.cpp.
|
virtual |
Returns the null terminated UTF-8 encoded text string for the current object at the given level. Use delete [] to free after use.
Definition at line 564 of file resultiterator.cpp.
|
virtual |
IsAtBeginningOf() returns whether we're at the logical beginning of the given level. (as opposed to ResultIterator's left-to-right top-to-bottom order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf(). For a full description, see pageiterator.h
Reimplemented from tesseract::PageIterator.
Definition at line 498 of file resultiterator.cpp.
|
virtual |
Implement PageIterator's IsAtFinalElement correctly in a BiDi context. For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we point at the last word in a paragraph. See PageIterator for full comment.
NOTE! This is an exact copy of PageIterator::IsAtFinalElement with the change that the variable next is now a ResultIterator instead of a PageIterator.
Reimplemented from tesseract::PageIterator.
Definition at line 534 of file resultiterator.cpp.
|
virtual |
Moves to the start of the next object at the given level in the page hierarchy in the appropriate reading order and returns false if the end of the page was reached. NOTE that RIL_SYMBOL will skip non-text blocks, but all other PageIteratorLevel level values will visit each non-text block once. Think of non text blocks as containing a single para, with a single line, with a single imaginary word. Calls to Next with different levels may be freely intermixed. This function iterates words in right-to-left scripts correctly, if the appropriate language has been loaded into Tesseract.
Reimplemented from tesseract::PageIterator.
Definition at line 423 of file resultiterator.cpp.
bool tesseract::ResultIterator::ParagraphIsLtr | ( | ) | const |
Return whether the current paragraph's dominant reading direction is left-to-right (as opposed to right-to-left).
Definition at line 55 of file resultiterator.cpp.
|
static |
Definition at line 50 of file resultiterator.cpp.
|
static |
Definition at line 142 of file resultiterator.h.
|
static |
Definition at line 141 of file resultiterator.h.
|
static |
Definition at line 140 of file resultiterator.h.