|
| ~ResultIterator () override=default |
|
void | Begin () override |
|
bool | Next (PageIteratorLevel level) override |
|
bool | IsAtBeginningOf (PageIteratorLevel level) const override |
|
bool | IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const override |
|
int | BlanksBeforeWord () const |
|
virtual char * | GetUTF8Text (PageIteratorLevel level) const |
|
virtual std::vector< std::vector< std::vector< std::pair< const char *, float > > > > * | GetRawLSTMTimesteps () const |
|
virtual std::vector< std::vector< std::pair< const char *, float > > > * | GetBestLSTMSymbolChoices () const |
|
bool | ParagraphIsLtr () const |
|
| LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) |
|
| ~LTRResultIterator () override |
|
char * | GetUTF8Text (PageIteratorLevel level) const |
|
void | SetLineSeparator (const char *new_line) |
|
void | SetParagraphSeparator (const char *new_para) |
|
float | Confidence (PageIteratorLevel level) const |
|
void | RowAttributes (float *row_height, float *descenders, float *ascenders) const |
|
const char * | WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const |
|
const char * | WordRecognitionLanguage () const |
|
StrongScriptDirection | WordDirection () const |
|
bool | WordIsFromDictionary () const |
|
int | BlanksBeforeWord () const |
|
bool | WordIsNumeric () const |
|
bool | HasBlamerInfo () const |
|
const void * | GetParamsTrainingBundle () const |
|
const char * | GetBlamerDebug () const |
|
const char * | GetBlamerMisadaptionDebug () const |
|
bool | HasTruthString () const |
|
bool | EquivalentToTruth (const char *str) const |
|
char * | WordTruthUTF8Text () const |
|
char * | WordNormedUTF8Text () const |
|
const char * | WordLattice (int *lattice_size) const |
|
bool | SymbolIsSuperscript () const |
|
bool | SymbolIsSubscript () const |
|
bool | SymbolIsDropcap () const |
|
| PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) |
|
virtual | ~PageIterator () |
|
| PageIterator (const PageIterator &src) |
|
const PageIterator & | operator= (const PageIterator &src) |
|
bool | PositionedAtSameWord (const PAGE_RES_IT *other) const |
|
virtual void | RestartParagraph () |
|
bool | IsWithinFirstTextlineOfParagraph () const |
|
virtual void | RestartRow () |
|
int | Cmp (const PageIterator &other) const |
|
void | SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots) |
|
bool | BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const |
|
bool | BoundingBox (PageIteratorLevel level, int padding, int *left, int *top, int *right, int *bottom) const |
|
bool | BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const |
|
bool | Empty (PageIteratorLevel level) const |
|
PolyBlockType | BlockType () const |
|
Pta * | BlockPolygon () const |
|
Pix * | GetBinaryImage (PageIteratorLevel level) const |
|
Pix * | GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const |
|
bool | Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const |
|
void | Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const |
|
void | ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const |
|
bool | SetWordBlamerBundle (BlamerBundle *blamer_bundle) |
|
Definition at line 44 of file resultiterator.h.
Yields the reading order as a sequence of indices and (optional) meta-marks for a set of words (given left-to-right). The meta marks are passed as negative values: kMinorRunStart Start of minor direction text. kMinorRunEnd End of minor direction text. kComplexWord The next indexed word contains both left-to-right and right-to-left characters and was treated as neutral.
For example, suppose we have five words in a text line, indexed [0,1,2,3,4] from the leftmost side of the text line. The following are all believable reading_orders:
Left-to-Right (in ltr paragraph): { 0, 1, 2, 3, 4 } Left-to-Right (in rtl paragraph): { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd } Right-to-Left (in rtl paragraph): { 4, 3, 2, 1, 0 } Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph: { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
Definition at line 272 of file resultiterator.cpp.
277 if (word_dirs.
size() == 0)
282 int minor_direction, major_direction, major_step, start, end;
283 if (paragraph_is_ltr) {
285 end = word_dirs.
size();
290 start = word_dirs.
size() - 1;
299 int neutral_end = start;
300 while (neutral_end > 0 && word_dirs[neutral_end] ==
DIR_NEUTRAL) {
306 int left = neutral_end;
312 for (
int i = left; i < word_dirs.
size(); i++) {
322 for (
int i = start; i != end;) {
323 if (word_dirs[i] == minor_direction) {
325 while (j != end && word_dirs[j] != major_direction) j += major_step;
328 while (j != i && word_dirs[j] != minor_direction) j -= major_step;
331 for (
int k = j; k != i; k -= major_step) {