tesseract  5.0.0-alpha-619-ge9db
unicodetext.h File Reference
#include <stddef.h>
#include <iterator>
#include <string>
#include <utility>
#include "syntaxnet/base.h"

Go to the source code of this file.

Classes

class  UnicodeText
 
class  UnicodeText::const_iterator
 
class  UnicodeText::const_reverse_iterator
 

Macros

#define arraysize(array)   (sizeof(ArraySizeHelper(array)))
 

Typedefs

typedef pair< UnicodeText::const_iterator, UnicodeText::const_iteratorUnicodeTextRange
 

Functions

bool operator== (const UnicodeText &lhs, const UnicodeText &rhs)
 
bool operator!= (const UnicodeText &lhs, const UnicodeText &rhs)
 
bool UnicodeTextRangeIsEmpty (const UnicodeTextRange &r)
 
UnicodeText MakeUnicodeTextAcceptingOwnership (char *utf8_buffer, int byte_length, int byte_capacity)
 
UnicodeText MakeUnicodeTextWithoutAcceptingOwnership (const char *utf8_buffer, int byte_length)
 
UnicodeText UTF8ToUnicodeText (const char *utf8_buf, int len, bool do_copy)
 
UnicodeText UTF8ToUnicodeText (const string &utf_string, bool do_copy)
 
UnicodeText UTF8ToUnicodeText (const char *utf8_buf, int len)
 
UnicodeText UTF8ToUnicodeText (const string &utf8_string)
 
string UnicodeTextToUTF8 (const UnicodeText &t)
 
template<typename T , size_t N>
char(& ArraySizeHelper (T(&array)[N]))[N]
 
string CodepointString (const UnicodeText &t)
 

Macro Definition Documentation

◆ arraysize

#define arraysize (   array)    (sizeof(ArraySizeHelper(array)))

Definition at line 470 of file unicodetext.h.

Typedef Documentation

◆ UnicodeTextRange

Function Documentation

◆ ArraySizeHelper()

template<typename T , size_t N>
char(& ArraySizeHelper ( T(&)  array[N]) )[N]

◆ CodepointString()

string CodepointString ( const UnicodeText t)

Definition at line 502 of file unicodetext.cc.

502  {
503  string s;
504  UnicodeText::const_iterator it = t.begin(), end = t.end();
505  while (it != end) tensorflow::strings::Appendf(&s, "%X ", *it++);
506  return s;
507 }

◆ MakeUnicodeTextAcceptingOwnership()

UnicodeText MakeUnicodeTextAcceptingOwnership ( char *  utf8_buffer,
int  byte_length,
int  byte_capacity 
)
inline

Definition at line 405 of file unicodetext.h.

406  {
408  utf8_buffer, byte_length, byte_capacity);
409 }

◆ MakeUnicodeTextWithoutAcceptingOwnership()

UnicodeText MakeUnicodeTextWithoutAcceptingOwnership ( const char *  utf8_buffer,
int  byte_length 
)
inline

Definition at line 415 of file unicodetext.h.

416  {
417  return UnicodeText().PointToUTF8(utf8_buffer, byte_length);
418 }

◆ operator!=()

bool operator!= ( const UnicodeText lhs,
const UnicodeText rhs 
)
inline

Definition at line 380 of file unicodetext.h.

380  {
381  return !(lhs == rhs);
382 }

◆ operator==()

bool operator== ( const UnicodeText lhs,
const UnicodeText rhs 
)

Definition at line 375 of file unicodetext.cc.

375  {
376  if (&lhs == &rhs) return true;
377  if (lhs.repr_.size_ != rhs.repr_.size_) return false;
378  return memcmp(lhs.repr_.data_, rhs.repr_.data_, lhs.repr_.size_) == 0;
379 }

◆ UnicodeTextRangeIsEmpty()

bool UnicodeTextRangeIsEmpty ( const UnicodeTextRange r)
inline

Definition at line 389 of file unicodetext.h.

389  {
390  return r.first == r.second;
391 }

◆ UnicodeTextToUTF8()

string UnicodeTextToUTF8 ( const UnicodeText t)
inline

Definition at line 461 of file unicodetext.h.

461  {
462  return string(t.utf8_data(), t.utf8_length());
463 }

◆ UTF8ToUnicodeText() [1/4]

UnicodeText UTF8ToUnicodeText ( const char *  utf8_buf,
int  len 
)
inline

Definition at line 452 of file unicodetext.h.

452  {
453  return UTF8ToUnicodeText(utf8_buf, len, true);
454 }

◆ UTF8ToUnicodeText() [2/4]

UnicodeText UTF8ToUnicodeText ( const char *  utf8_buf,
int  len,
bool  do_copy 
)
inline

Definition at line 437 of file unicodetext.h.

438  {
439  UnicodeText t;
440  if (do_copy) {
441  t.CopyUTF8(utf8_buf, len);
442  } else {
443  t.PointToUTF8(utf8_buf, len);
444  }
445  return t;
446 }

◆ UTF8ToUnicodeText() [3/4]

UnicodeText UTF8ToUnicodeText ( const string utf8_string)
inline

Definition at line 455 of file unicodetext.h.

455  {
456  return UTF8ToUnicodeText(utf8_string, true);
457 }

◆ UTF8ToUnicodeText() [4/4]

UnicodeText UTF8ToUnicodeText ( const string utf_string,
bool  do_copy 
)
inline

Definition at line 448 of file unicodetext.h.

448  {
449  return UTF8ToUnicodeText(utf_string.data(), utf_string.size(), do_copy);
450 }
UTF8ToUnicodeText
UnicodeText UTF8ToUnicodeText(const char *utf8_buf, int len, bool do_copy)
Definition: unicodetext.h:437
string
std::string string
Definition: equationdetect_test.cc:21
UnicodeText::TakeOwnershipOfUTF8
UnicodeText & TakeOwnershipOfUTF8(char *utf8_buffer, int byte_length, int byte_capacity)
Definition: unicodetext.cc:236
UnicodeText::utf8_length
int utf8_length() const
Definition: unicodetext.h:293
UnicodeText
Definition: unicodetext.h:116
UnicodeText::PointToUTF8
UnicodeText & PointToUTF8(const char *utf8_buffer, int byte_length)
Definition: unicodetext.cc:256
UnicodeText::end
const_iterator end() const
Definition: unicodetext.cc:412
UnicodeText::utf8_data
const char * utf8_data() const
Definition: unicodetext.h:292
UnicodeText::const_iterator
Definition: unicodetext.h:176
UnicodeText::CopyUTF8
UnicodeText & CopyUTF8(const char *utf8_buffer, int byte_length)
Definition: unicodetext.cc:219
UnicodeText::begin
const_iterator begin() const
Definition: unicodetext.cc:408