tesseract  5.0.0-alpha-619-ge9db
UnicodeText::const_iterator Class Reference

#include <unicodetext.h>

Public Types

typedef std::bidirectional_iterator_tag iterator_category
 
typedef char32 value_type
 
typedef ptrdiff_t difference_type
 
typedef void pointer
 

Public Member Functions

 const_iterator ()
 
 const_iterator (const const_iterator &other)
 
const_iteratoroperator= (const const_iterator &other)
 
char32 operator* () const
 
const_iteratoroperator++ ()
 
const_iterator operator++ (int)
 
const_iteratoroperator-- ()
 
const_iterator operator-- (int)
 
int get_utf8 (char *buf) const
 
string get_utf8_string () const
 
int utf8_length () const
 
const char * utf8_data () const
 
string DebugString () const
 

Public Attributes

const typedef char32 reference
 

Friends

class UnicodeText
 
class UnicodeTextUtils
 
class UTF8StateTableProperty
 
bool operator== (const CI &lhs, const CI &rhs)
 
bool operator!= (const CI &lhs, const CI &rhs)
 
bool operator< (const CI &lhs, const CI &rhs)
 
bool operator> (const CI &lhs, const CI &rhs)
 
bool operator<= (const CI &lhs, const CI &rhs)
 
bool operator>= (const CI &lhs, const CI &rhs)
 
difference_type distance (const CI &first, const CI &last)
 

Detailed Description

Definition at line 176 of file unicodetext.h.

Member Typedef Documentation

◆ difference_type

Definition at line 181 of file unicodetext.h.

◆ iterator_category

typedef std::bidirectional_iterator_tag UnicodeText::const_iterator::iterator_category

Definition at line 179 of file unicodetext.h.

◆ pointer

Definition at line 182 of file unicodetext.h.

◆ value_type

Definition at line 180 of file unicodetext.h.

Constructor & Destructor Documentation

◆ const_iterator() [1/2]

UnicodeText::const_iterator::const_iterator ( )

Definition at line 395 of file unicodetext.cc.

395 : it_(nullptr) {}

◆ const_iterator() [2/2]

UnicodeText::const_iterator::const_iterator ( const const_iterator other)

Definition at line 397 of file unicodetext.cc.

398  : it_(other.it_) {
399 }

Member Function Documentation

◆ DebugString()

string UnicodeText::const_iterator::DebugString ( ) const

Definition at line 495 of file unicodetext.cc.

495  {
496  return tensorflow::strings::Printf("{iter %p}", it_);
497 }

◆ get_utf8()

int UnicodeText::const_iterator::get_utf8 ( char *  buf) const

Definition at line 460 of file unicodetext.cc.

460  {
461  utf8_output[0] = it_[0]; if ((it_[0] & 0xff) < 0x80) return 1;
462  utf8_output[1] = it_[1]; if ((it_[0] & 0xff) < 0xE0) return 2;
463  utf8_output[2] = it_[2]; if ((it_[0] & 0xff) < 0xF0) return 3;
464  utf8_output[3] = it_[3];
465  return 4;
466 }

◆ get_utf8_string()

string UnicodeText::const_iterator::get_utf8_string ( ) const

Definition at line 468 of file unicodetext.cc.

468  {
469  return string(utf8_data(), utf8_length());
470 }

◆ operator*()

char32 UnicodeText::const_iterator::operator* ( ) const

Definition at line 421 of file unicodetext.cc.

421  {
422  // (We could call chartorune here, but that does some
423  // error-checking, and we're guaranteed that our data is valid
424  // UTF-8. Also, we expect this routine to be called very often. So
425  // for speed, we do the calculation ourselves.)
426 
427  // Convert from UTF-8
428  unsigned char byte1 = it_[0];
429  if (byte1 < 0x80)
430  return byte1;
431 
432  unsigned char byte2 = it_[1];
433  if (byte1 < 0xE0)
434  return ((byte1 & 0x1F) << 6)
435  | (byte2 & 0x3F);
436 
437  unsigned char byte3 = it_[2];
438  if (byte1 < 0xF0)
439  return ((byte1 & 0x0F) << 12)
440  | ((byte2 & 0x3F) << 6)
441  | (byte3 & 0x3F);
442 
443  unsigned char byte4 = it_[3];
444  return ((byte1 & 0x07) << 18)
445  | ((byte2 & 0x3F) << 12)
446  | ((byte3 & 0x3F) << 6)
447  | (byte4 & 0x3F);
448 }

◆ operator++() [1/2]

UnicodeText::const_iterator & UnicodeText::const_iterator::operator++ ( )

Definition at line 450 of file unicodetext.cc.

450  {
451  it_ += UniLib::OneCharLen(it_);
452  return *this;
453 }

◆ operator++() [2/2]

const_iterator UnicodeText::const_iterator::operator++ ( int  )
inline

Definition at line 195 of file unicodetext.h.

195  { // (iter++)
196  const_iterator result(*this);
197  ++*this;
198  return result;
199  }

◆ operator--() [1/2]

UnicodeText::const_iterator & UnicodeText::const_iterator::operator-- ( )

Definition at line 455 of file unicodetext.cc.

455  {
456  while (UniLib::IsTrailByte(*--it_));
457  return *this;
458 }

◆ operator--() [2/2]

const_iterator UnicodeText::const_iterator::operator-- ( int  )
inline

Definition at line 202 of file unicodetext.h.

202  { // (iter--)
203  const_iterator result(*this);
204  --*this;
205  return result;
206  }

◆ operator=()

UnicodeText::const_iterator & UnicodeText::const_iterator::operator= ( const const_iterator other)

Definition at line 402 of file unicodetext.cc.

402  {
403  if (&other != this)
404  it_ = other.it_;
405  return *this;
406 }

◆ utf8_data()

const char* UnicodeText::const_iterator::utf8_data ( ) const
inline

Definition at line 233 of file unicodetext.h.

233 { return it_; }

◆ utf8_length()

int UnicodeText::const_iterator::utf8_length ( ) const

Definition at line 472 of file unicodetext.cc.

472  {
473  if ((it_[0] & 0xff) < 0x80) {
474  return 1;
475  } else if ((it_[0] & 0xff) < 0xE0) {
476  return 2;
477  } else if ((it_[0] & 0xff) < 0xF0) {
478  return 3;
479  } else {
480  return 4;
481  }
482 }

Friends And Related Function Documentation

◆ distance

difference_type distance ( const CI first,
const CI last 
)
friend

Definition at line 44 of file unicodetext.cc.

45  {
46  return CodepointDistance(first.it_, last.it_);
47 }

◆ operator!=

bool operator!= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 211 of file unicodetext.h.

211  {
212  return !(lhs == rhs); }

◆ operator<

bool operator< ( const CI lhs,
const CI rhs 
)
friend

Definition at line 416 of file unicodetext.cc.

417  {
418  return lhs.it_ < rhs.it_;
419 }

◆ operator<=

bool operator<= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 216 of file unicodetext.h.

216  {
217  return !(rhs < lhs); }

◆ operator==

bool operator== ( const CI lhs,
const CI rhs 
)
friend

Definition at line 209 of file unicodetext.h.

209  {
210  return lhs.it_ == rhs.it_; }

◆ operator>

bool operator> ( const CI lhs,
const CI rhs 
)
friend

Definition at line 214 of file unicodetext.h.

214  {
215  return rhs < lhs; }

◆ operator>=

bool operator>= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 218 of file unicodetext.h.

218  {
219  return !(lhs < rhs); }

◆ UnicodeText

friend class UnicodeText
friend

Definition at line 238 of file unicodetext.h.

◆ UnicodeTextUtils

friend class UnicodeTextUtils
friend

Definition at line 239 of file unicodetext.h.

◆ UTF8StateTableProperty

friend class UTF8StateTableProperty
friend

Definition at line 240 of file unicodetext.h.

Member Data Documentation

◆ reference

const typedef char32 UnicodeText::const_iterator::reference

Definition at line 183 of file unicodetext.h.


The documentation for this class was generated from the following files:
string
std::string string
Definition: equationdetect_test.cc:21
UnicodeText::const_iterator::const_iterator
const_iterator()
Definition: unicodetext.cc:395
UnicodeText::const_iterator::utf8_data
const char * utf8_data() const
Definition: unicodetext.h:233
last
LIST last(LIST var_list)
Definition: oldlist.cpp:151
UniLib::IsTrailByte
bool IsTrailByte(char x)
Definition: unilib_utf8_utils.h:58
UnicodeText::const_iterator::utf8_length
int utf8_length() const
Definition: unicodetext.cc:472
UniLib::OneCharLen
int OneCharLen(const char *src)
Definition: unilib_utf8_utils.h:53