tesseract  4.0.0-1-g2a2b
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~LTRResultIterator ()
 
char * GetUTF8Text (PageIteratorLevel level) const
 
void SetLineSeparator (const char *new_line)
 
void SetParagraphSeparator (const char *new_para)
 
float Confidence (PageIteratorLevel level) const
 
void RowAttributes (float *row_height, float *descenders, float *ascenders) const
 
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
 
const char * WordRecognitionLanguage () const
 
StrongScriptDirection WordDirection () const
 
bool WordIsFromDictionary () const
 
int BlanksBeforeWord () const
 
bool WordIsNumeric () const
 
bool HasBlamerInfo () const
 
const void * GetParamsTrainingBundle () const
 
const char * GetBlamerDebug () const
 
const char * GetBlamerMisadaptionDebug () const
 
bool HasTruthString () const
 
bool EquivalentToTruth (const char *str) const
 
char * WordTruthUTF8Text () const
 
char * WordNormedUTF8Text () const
 
const char * WordLattice (int *lattice_size) const
 
bool SymbolIsSuperscript () const
 
bool SymbolIsSubscript () const
 
bool SymbolIsDropcap () const
 
- Public Member Functions inherited from tesseract::PageIterator
 PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~PageIterator ()
 
 PageIterator (const PageIterator &src)
 
const PageIteratoroperator= (const PageIterator &src)
 
bool PositionedAtSameWord (const PAGE_RES_IT *other) const
 
virtual void Begin ()
 
virtual void RestartParagraph ()
 
bool IsWithinFirstTextlineOfParagraph () const
 
virtual void RestartRow ()
 
virtual bool Next (PageIteratorLevel level)
 
virtual bool IsAtBeginningOf (PageIteratorLevel level) const
 
virtual bool IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const
 
int Cmp (const PageIterator &other) const
 
void SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots)
 
bool BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBox (PageIteratorLevel level, const int padding, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool Empty (PageIteratorLevel level) const
 
PolyBlockType BlockType () const
 
Pta * BlockPolygon () const
 
Pix * GetBinaryImage (PageIteratorLevel level) const
 
Pix * GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
 
bool Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
 
void Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
 
void ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
 
bool SetWordBlamerBundle (BlamerBundle *blamer_bundle)
 

Protected Attributes

const char * line_separator_
 
const char * paragraph_separator_
 
- Protected Attributes inherited from tesseract::PageIterator
PAGE_RESpage_res_
 
Tesseracttesseract_
 
PAGE_RES_ITit_
 
WERDword_
 
int word_length_
 
int blob_index_
 
C_BLOB_IT * cblob_it_
 
bool include_upper_dots_
 
bool include_lower_dots_
 
int scale_
 
int scaled_yres_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 

Friends

class ChoiceIterator
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::PageIterator
TESS_LOCAL void BeginWord (int offset)
 

Detailed Description

Definition at line 48 of file ltrresultiterator.h.

Constructor & Destructor Documentation

◆ LTRResultIterator()

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 30 of file ltrresultiterator.cpp.

34  : PageIterator(page_res, tesseract, scale, scaled_yres,
35  rect_left, rect_top, rect_width, rect_height),
36  line_separator_("\n"),
37  paragraph_separator_("\n") {
38 }
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)

◆ ~LTRResultIterator()

tesseract::LTRResultIterator::~LTRResultIterator ( )
virtualdefault

Member Function Documentation

◆ BlanksBeforeWord()

int tesseract::LTRResultIterator::BlanksBeforeWord ( ) const

Definition at line 226 of file ltrresultiterator.cpp.

226  {
227  if (it_->word() == nullptr) return 1;
228  return it_->word()->word->space();
229 }
uint8_t space()
Definition: werd.h:102
WERD_RES * word() const
Definition: pageres.h:751
WERD * word
Definition: pageres.h:189

◆ Confidence()

float tesseract::LTRResultIterator::Confidence ( PageIteratorLevel  level) const

Definition at line 96 of file ltrresultiterator.cpp.

96  {
97  if (it_->word() == nullptr) return 0.0f; // Already at the end!
98  float mean_certainty = 0.0f;
99  int certainty_count = 0;
100  PAGE_RES_IT res_it(*it_);
101  WERD_CHOICE* best_choice = res_it.word()->best_choice;
102  ASSERT_HOST(best_choice != nullptr);
103  switch (level) {
104  case RIL_BLOCK:
105  do {
106  best_choice = res_it.word()->best_choice;
107  ASSERT_HOST(best_choice != nullptr);
108  mean_certainty += best_choice->certainty();
109  ++certainty_count;
110  res_it.forward();
111  } while (res_it.block() == res_it.prev_block());
112  break;
113  case RIL_PARA:
114  do {
115  best_choice = res_it.word()->best_choice;
116  ASSERT_HOST(best_choice != nullptr);
117  mean_certainty += best_choice->certainty();
118  ++certainty_count;
119  res_it.forward();
120  } while (res_it.block() == res_it.prev_block() &&
121  res_it.row()->row->para() == res_it.prev_row()->row->para());
122  break;
123  case RIL_TEXTLINE:
124  do {
125  best_choice = res_it.word()->best_choice;
126  ASSERT_HOST(best_choice != nullptr);
127  mean_certainty += best_choice->certainty();
128  ++certainty_count;
129  res_it.forward();
130  } while (res_it.row() == res_it.prev_row());
131  break;
132  case RIL_WORD:
133  mean_certainty += best_choice->certainty();
134  ++certainty_count;
135  break;
136  case RIL_SYMBOL:
137  mean_certainty += best_choice->certainty(blob_index_);
138  ++certainty_count;
139  }
140  if (certainty_count > 0) {
141  mean_certainty /= certainty_count;
142  float confidence = 100 + 5 * mean_certainty;
143  if (confidence < 0.0f) confidence = 0.0f;
144  if (confidence > 100.0f) confidence = 100.0f;
145  return confidence;
146  }
147  return 0.0f;
148 }
float certainty() const
Definition: ratngs.h:330
WERD_RES * word() const
Definition: pageres.h:751
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ EquivalentToTruth()

bool tesseract::LTRResultIterator::EquivalentToTruth ( const char *  str) const

Definition at line 275 of file ltrresultiterator.cpp.

275  {
276  if (!HasTruthString()) return false;
277  ASSERT_HOST(it_->word()->uch_set != nullptr);
278  WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
279  return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
280 }
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
WERD_RES * word() const
Definition: pageres.h:751
const UNICHARSET * uch_set
Definition: pageres.h:206
BlamerBundle * blamer_bundle
Definition: pageres.h:246
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ GetBlamerDebug()

const char * tesseract::LTRResultIterator::GetBlamerDebug ( ) const

Definition at line 253 of file ltrresultiterator.cpp.

253  {
254  return it_->word()->blamer_bundle->debug().string();
255 }
const STRING & debug() const
Definition: blamer.h:128
const char * string() const
Definition: strngs.cpp:196
WERD_RES * word() const
Definition: pageres.h:751
BlamerBundle * blamer_bundle
Definition: pageres.h:246

◆ GetBlamerMisadaptionDebug()

const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug ( ) const

Definition at line 259 of file ltrresultiterator.cpp.

259  {
261 }
const char * string() const
Definition: strngs.cpp:196
WERD_RES * word() const
Definition: pageres.h:751
const STRING & misadaption_debug() const
Definition: blamer.h:131
BlamerBundle * blamer_bundle
Definition: pageres.h:246

◆ GetParamsTrainingBundle()

const void * tesseract::LTRResultIterator::GetParamsTrainingBundle ( ) const

Definition at line 246 of file ltrresultiterator.cpp.

246  {
247  return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr) ?
248  &(it_->word()->blamer_bundle->params_training_bundle()) : nullptr;
249 }
const tesseract::ParamsTrainingBundle & params_training_bundle() const
Definition: blamer.h:162
WERD_RES * word() const
Definition: pageres.h:751
BlamerBundle * blamer_bundle
Definition: pageres.h:246

◆ GetUTF8Text()

char * tesseract::LTRResultIterator::GetUTF8Text ( PageIteratorLevel  level) const

Definition at line 47 of file ltrresultiterator.cpp.

47  {
48  if (it_->word() == nullptr) return nullptr; // Already at the end!
49  STRING text;
50  PAGE_RES_IT res_it(*it_);
51  WERD_CHOICE* best_choice = res_it.word()->best_choice;
52  ASSERT_HOST(best_choice != nullptr);
53  if (level == RIL_SYMBOL) {
54  text = res_it.word()->BestUTF8(blob_index_, false);
55  } else if (level == RIL_WORD) {
56  text = best_choice->unichar_string();
57  } else {
58  bool eol = false; // end of line?
59  bool eop = false; // end of paragraph?
60  do { // for each paragraph in a block
61  do { // for each text line in a paragraph
62  do { // for each word in a text line
63  best_choice = res_it.word()->best_choice;
64  ASSERT_HOST(best_choice != nullptr);
65  text += best_choice->unichar_string();
66  text += " ";
67  res_it.forward();
68  eol = res_it.row() != res_it.prev_row();
69  } while (!eol);
70  text.truncate_at(text.length() - 1);
71  text += line_separator_;
72  eop = res_it.block() != res_it.prev_block() ||
73  res_it.row()->row->para() != res_it.prev_row()->row->para();
74  } while (level != RIL_TEXTLINE && !eop);
75  if (eop) text += paragraph_separator_;
76  } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
77  }
78  int length = text.length() + 1;
79  char* result = new char[length];
80  strncpy(result, text.string(), length);
81  return result;
82 }
const char * string() const
Definition: strngs.cpp:196
WERD_RES * word() const
Definition: pageres.h:751
void truncate_at(int32_t index)
Definition: strngs.cpp:267
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:541
int32_t length() const
Definition: strngs.cpp:191
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ HasBlamerInfo()

bool tesseract::LTRResultIterator::HasBlamerInfo ( ) const

Definition at line 239 of file ltrresultiterator.cpp.

239  {
240  return it_->word() != nullptr && it_->word()->blamer_bundle != nullptr &&
242 }
WERD_RES * word() const
Definition: pageres.h:751
BlamerBundle * blamer_bundle
Definition: pageres.h:246
bool HasDebugInfo() const
Definition: blamer.h:125

◆ HasTruthString()

bool tesseract::LTRResultIterator::HasTruthString ( ) const

Definition at line 264 of file ltrresultiterator.cpp.

264  {
265  if (it_->word() == nullptr) return false; // Already at the end!
266  if (it_->word()->blamer_bundle == nullptr ||
267  it_->word()->blamer_bundle->NoTruth()) {
268  return false; // no truth information for this word
269  }
270  return true;
271 }
WERD_RES * word() const
Definition: pageres.h:751
bool NoTruth() const
Definition: blamer.h:121
BlamerBundle * blamer_bundle
Definition: pageres.h:246

◆ RowAttributes()

void tesseract::LTRResultIterator::RowAttributes ( float *  row_height,
float *  descenders,
float *  ascenders 
) const

Definition at line 150 of file ltrresultiterator.cpp.

151  {
152  *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
153  it_->row()->row->descenders();
154  *descenders = it_->row()->row->descenders();
155  *ascenders = it_->row()->row->ascenders();
156 }
ROW_RES * row() const
Definition: pageres.h:754
float ascenders() const
Definition: ocrrow.h:82
float x_height() const
Definition: ocrrow.h:64
float descenders() const
Definition: ocrrow.h:85
ROW * row
Definition: pageres.h:143

◆ SetLineSeparator()

void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 85 of file ltrresultiterator.cpp.

85  {
86  line_separator_ = new_line;
87 }

◆ SetParagraphSeparator()

void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 90 of file ltrresultiterator.cpp.

90  {
91  paragraph_separator_ = new_para;
92 }

◆ SymbolIsDropcap()

bool tesseract::LTRResultIterator::SymbolIsDropcap ( ) const

Definition at line 341 of file ltrresultiterator.cpp.

341  {
342  if (cblob_it_ == nullptr && it_->word() != nullptr)
344  return false;
345 }
WERD_RES * word() const
Definition: pageres.h:751
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:322
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ SymbolIsSubscript()

bool tesseract::LTRResultIterator::SymbolIsSubscript ( ) const

Definition at line 332 of file ltrresultiterator.cpp.

332  {
333  if (cblob_it_ == nullptr && it_->word() != nullptr)
335  return false;
336 }
WERD_RES * word() const
Definition: pageres.h:751
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:322
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ SymbolIsSuperscript()

bool tesseract::LTRResultIterator::SymbolIsSuperscript ( ) const

Definition at line 322 of file ltrresultiterator.cpp.

322  {
323  if (cblob_it_ == nullptr && it_->word() != nullptr)
326  return false;
327 }
WERD_RES * word() const
Definition: pageres.h:751
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:322
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ WordDirection()

StrongScriptDirection tesseract::LTRResultIterator::WordDirection ( ) const

Definition at line 204 of file ltrresultiterator.cpp.

204  {
205  if (it_->word() == nullptr) return DIR_NEUTRAL;
206  bool has_rtl = it_->word()->AnyRtlCharsInWord();
207  bool has_ltr = it_->word()->AnyLtrCharsInWord();
208  if (has_rtl && !has_ltr)
209  return DIR_RIGHT_TO_LEFT;
210  if (has_ltr && !has_rtl)
211  return DIR_LEFT_TO_RIGHT;
212  if (!has_ltr && !has_rtl)
213  return DIR_NEUTRAL;
214  return DIR_MIX;
215 }
bool AnyRtlCharsInWord() const
Definition: pageres.h:391
WERD_RES * word() const
Definition: pageres.h:751
bool AnyLtrCharsInWord() const
Definition: pageres.h:408

◆ WordFontAttributes()

const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int *  pointsize,
int *  font_id 
) const

Definition at line 166 of file ltrresultiterator.cpp.

173  {
174  if (it_->word() == nullptr) return nullptr; // Already at the end!
175  float row_height = it_->row()->row->x_height() +
176  it_->row()->row->ascenders() - it_->row()->row->descenders();
177  // Convert from pixels to printers points.
178  *pointsize = scaled_yres_ > 0
179  ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
180  : 0;
181  if (it_->word()->fontinfo == nullptr) {
182  *font_id = -1;
183  return nullptr; // No font information.
184  }
185  const FontInfo& font_info = *it_->word()->fontinfo;
186  *font_id = font_info.universal_id;
187  *is_bold = font_info.is_bold();
188  *is_italic = font_info.is_italic();
189  *is_underlined = false; // TODO(rays) fix this!
190  *is_monospace = font_info.is_fixed_pitch();
191  *is_serif = font_info.is_serif();
192  *is_smallcaps = it_->word()->small_caps;
193 
194  return font_info.name;
195 }
ROW_RES * row() const
Definition: pageres.h:754
constexpr int kPointsPerInch
Definition: publictypes.h:33
const FontInfo * fontinfo
Definition: pageres.h:304
bool is_bold() const
Definition: fontinfo.h:112
bool small_caps
Definition: pageres.h:299
float ascenders() const
Definition: ocrrow.h:82
float x_height() const
Definition: ocrrow.h:64
bool is_italic() const
Definition: fontinfo.h:111
WERD_RES * word() const
Definition: pageres.h:751
float descenders() const
Definition: ocrrow.h:85
bool is_serif() const
Definition: fontinfo.h:114
int32_t universal_id
Definition: fontinfo.h:123
bool is_fixed_pitch() const
Definition: fontinfo.h:113
ROW * row
Definition: pageres.h:143

◆ WordIsFromDictionary()

bool tesseract::LTRResultIterator::WordIsFromDictionary ( ) const

Definition at line 218 of file ltrresultiterator.cpp.

218  {
219  if (it_->word() == nullptr) return false; // Already at the end!
220  int permuter = it_->word()->best_choice->permuter();
221  return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
222  permuter == USER_DAWG_PERM;
223 }
uint8_t permuter() const
Definition: ratngs.h:346
WERD_RES * word() const
Definition: pageres.h:751
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ WordIsNumeric()

bool tesseract::LTRResultIterator::WordIsNumeric ( ) const

Definition at line 232 of file ltrresultiterator.cpp.

232  {
233  if (it_->word() == nullptr) return false; // Already at the end!
234  int permuter = it_->word()->best_choice->permuter();
235  return permuter == NUMBER_PERM;
236 }
uint8_t permuter() const
Definition: ratngs.h:346
WERD_RES * word() const
Definition: pageres.h:751
WERD_CHOICE * best_choice
Definition: pageres.h:235

◆ WordLattice()

const char * tesseract::LTRResultIterator::WordLattice ( int *  lattice_size) const

Definition at line 312 of file ltrresultiterator.cpp.

312  {
313  if (it_->word() == nullptr) return nullptr; // Already at the end!
314  if (it_->word()->blamer_bundle == nullptr) return nullptr;
315  *lattice_size = it_->word()->blamer_bundle->lattice_size();
316  return it_->word()->blamer_bundle->lattice_data();
317 }
const char * lattice_data() const
Definition: blamer.h:150
WERD_RES * word() const
Definition: pageres.h:751
BlamerBundle * blamer_bundle
Definition: pageres.h:246
int lattice_size() const
Definition: blamer.h:153

◆ WordNormedUTF8Text()

char * tesseract::LTRResultIterator::WordNormedUTF8Text ( ) const

Definition at line 295 of file ltrresultiterator.cpp.

295  {
296  if (it_->word() == nullptr) return nullptr; // Already at the end!
297  STRING ocr_text;
298  WERD_CHOICE* best_choice = it_->word()->best_choice;
299  const UNICHARSET *unicharset = it_->word()->uch_set;
300  ASSERT_HOST(best_choice != nullptr);
301  for (int i = 0; i < best_choice->length(); ++i) {
302  ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
303  }
304  int length = ocr_text.length() + 1;
305  char* result = new char[length];
306  strncpy(result, ocr_text.string(), length);
307  return result;
308 }
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:823
const char * string() const
Definition: strngs.cpp:196
WERD_RES * word() const
Definition: pageres.h:751
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
int length() const
Definition: ratngs.h:303
Definition: strngs.h:45
const UNICHARSET * uch_set
Definition: pageres.h:206
int32_t length() const
Definition: strngs.cpp:191
WERD_CHOICE * best_choice
Definition: pageres.h:235
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ WordRecognitionLanguage()

const char * tesseract::LTRResultIterator::WordRecognitionLanguage ( ) const

Definition at line 198 of file ltrresultiterator.cpp.

198  {
199  if (it_->word() == nullptr || it_->word()->tesseract == nullptr) return nullptr;
200  return it_->word()->tesseract->lang.string();
201 }
const char * string() const
Definition: strngs.cpp:196
STRING lang
Definition: ccutil.h:66
WERD_RES * word() const
Definition: pageres.h:751
tesseract::Tesseract * tesseract
Definition: pageres.h:282

◆ WordTruthUTF8Text()

char * tesseract::LTRResultIterator::WordTruthUTF8Text ( ) const

Definition at line 284 of file ltrresultiterator.cpp.

284  {
285  if (!HasTruthString()) return nullptr;
286  STRING truth_text = it_->word()->blamer_bundle->TruthString();
287  int length = truth_text.length() + 1;
288  char* result = new char[length];
289  strncpy(result, truth_text.string(), length);
290  return result;
291 }
const char * string() const
Definition: strngs.cpp:196
STRING TruthString() const
Definition: blamer.h:112
WERD_RES * word() const
Definition: pageres.h:751
Definition: strngs.h:45
BlamerBundle * blamer_bundle
Definition: pageres.h:246
int32_t length() const
Definition: strngs.cpp:191

Friends And Related Function Documentation

◆ ChoiceIterator

friend class ChoiceIterator
friend

Definition at line 49 of file ltrresultiterator.h.

Member Data Documentation

◆ line_separator_

const char* tesseract::LTRResultIterator::line_separator_
protected

Definition at line 186 of file ltrresultiterator.h.

◆ paragraph_separator_

const char* tesseract::LTRResultIterator::paragraph_separator_
protected

Definition at line 187 of file ltrresultiterator.h.


The documentation for this class was generated from the following files: