tesseract  4.0.0-1-g2a2b
tesseract::UNICHAR::const_iterator Class Reference

#include <unichar.h>

Public Member Functions

const_iteratoroperator++ ()
 
int operator* () const
 
int get_utf8 (char *buf) const
 
int utf8_len () const
 
bool is_legal () const
 
const char * utf8_data () const
 

Friends

class UNICHAR
 
bool operator== (const CI &lhs, const CI &rhs)
 
bool operator!= (const CI &lhs, const CI &rhs)
 

Detailed Description

Definition at line 108 of file unichar.h.

Member Function Documentation

◆ get_utf8()

int tesseract::UNICHAR::const_iterator::get_utf8 ( char *  buf) const

Definition at line 176 of file unichar.cpp.

176  {
177  ASSERT_HOST(it_ != nullptr);
178  const int len = utf8_step(it_);
179  if (len == 0) {
180  tprintf("WARNING: Illegal UTF8 encountered\n");
181  utf8_output[0] = ' ';
182  return 1;
183  }
184  strncpy(utf8_output, it_, len);
185  return len;
186 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:136
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ is_legal()

bool tesseract::UNICHAR::const_iterator::is_legal ( ) const

Definition at line 198 of file unichar.cpp.

198  {
199  return utf8_step(it_) > 0;
200 }
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:136

◆ operator*()

int tesseract::UNICHAR::const_iterator::operator* ( ) const

Definition at line 165 of file unichar.cpp.

165  {
166  ASSERT_HOST(it_ != nullptr);
167  const int len = utf8_step(it_);
168  if (len == 0) {
169  tprintf("WARNING: Illegal UTF8 encountered\n");
170  return ' ';
171  }
172  UNICHAR uch(it_, len);
173  return uch.first_uni();
174 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:136
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ operator++()

UNICHAR::const_iterator & tesseract::UNICHAR::const_iterator::operator++ ( )

Definition at line 151 of file unichar.cpp.

151  {
152  ASSERT_HOST(it_ != nullptr);
153  int step = utf8_step(it_);
154  if (step == 0) {
155  tprintf("ERROR: Illegal UTF8 encountered.\n");
156  for (int i = 0; i < 5 && it_[i] != '\0'; ++i) {
157  tprintf("Index %d char = 0x%x\n", i, it_[i]);
158  }
159  step = 1;
160  }
161  it_ += step;
162  return *this;
163 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:136
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ utf8_data()

const char* tesseract::UNICHAR::const_iterator::utf8_data ( ) const
inline

Definition at line 136 of file unichar.h.

136 { return it_; }

◆ utf8_len()

int tesseract::UNICHAR::const_iterator::utf8_len ( ) const

Definition at line 188 of file unichar.cpp.

188  {
189  ASSERT_HOST(it_ != nullptr);
190  const int len = utf8_step(it_);
191  if (len == 0) {
192  tprintf("WARNING: Illegal UTF8 encountered\n");
193  return 1;
194  }
195  return len;
196 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:136
#define ASSERT_HOST(x)
Definition: errcode.h:84

Friends And Related Function Documentation

◆ operator!=

bool operator!= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 142 of file unichar.h.

142  {
143  return !(lhs == rhs);
144  }

◆ operator==

bool operator== ( const CI lhs,
const CI rhs 
)
friend

Definition at line 139 of file unichar.h.

139  {
140  return lhs.it_ == rhs.it_;
141  }

◆ UNICHAR

friend class UNICHAR
friend

Definition at line 147 of file unichar.h.


The documentation for this class was generated from the following files: