tesseract  5.0.0-alpha-619-ge9db
CHAR_FRAGMENT Class Reference

#include <unicharset.h>

Public Member Functions

void set_all (const char *unichar, int pos, int total, bool natural)
 
void set_unichar (const char *uch)
 
void set_pos (int p)
 
void set_total (int t)
 
const char * get_unichar () const
 
int get_pos () const
 
int get_total () const
 
STRING to_string () const
 
bool equals (const char *other_unichar, int other_pos, int other_total) const
 
bool equals (const CHAR_FRAGMENT *other) const
 
bool is_continuation_of (const CHAR_FRAGMENT *fragment) const
 
bool is_beginning () const
 
bool is_ending () const
 
bool is_natural () const
 
void set_natural (bool value)
 

Static Public Member Functions

static STRING to_string (const char *unichar, int pos, int total, bool natural)
 
static CHAR_FRAGMENTparse_from_string (const char *str)
 

Static Public Attributes

static const int kMinLen = 6
 
static const int kMaxLen = 3 + UNICHAR_LEN + 2
 
static const int kMaxChunks = 5
 

Detailed Description

Definition at line 48 of file unicharset.h.

Member Function Documentation

◆ equals() [1/2]

bool CHAR_FRAGMENT::equals ( const char *  other_unichar,
int  other_pos,
int  other_total 
) const
inline

Definition at line 85 of file unicharset.h.

86  {
87  return (strcmp(this->unichar, other_unichar) == 0 &&
88  this->pos == other_pos && this->total == other_total);
89  }

◆ equals() [2/2]

bool CHAR_FRAGMENT::equals ( const CHAR_FRAGMENT other) const
inline

Definition at line 90 of file unicharset.h.

90  {
91  return this->equals(other->get_unichar(),
92  other->get_pos(),
93  other->get_total());
94  }

◆ get_pos()

int CHAR_FRAGMENT::get_pos ( ) const
inline

Definition at line 71 of file unicharset.h.

71 { return this->pos; }

◆ get_total()

int CHAR_FRAGMENT::get_total ( ) const
inline

Definition at line 72 of file unicharset.h.

72 { return this->total; }

◆ get_unichar()

const char* CHAR_FRAGMENT::get_unichar ( ) const
inline

Definition at line 70 of file unicharset.h.

70 { return this->unichar; }

◆ is_beginning()

bool CHAR_FRAGMENT::is_beginning ( ) const
inline

Definition at line 105 of file unicharset.h.

105 { return this->pos == 0; }

◆ is_continuation_of()

bool CHAR_FRAGMENT::is_continuation_of ( const CHAR_FRAGMENT fragment) const
inline

Definition at line 98 of file unicharset.h.

98  {
99  return (strcmp(this->unichar, fragment->get_unichar()) == 0 &&
100  this->total == fragment->get_total() &&
101  this->pos == fragment->get_pos() + 1);
102  }

◆ is_ending()

bool CHAR_FRAGMENT::is_ending ( ) const
inline

Definition at line 108 of file unicharset.h.

108 { return this->pos == this->total-1; }

◆ is_natural()

bool CHAR_FRAGMENT::is_natural ( ) const
inline

Definition at line 113 of file unicharset.h.

113 { return natural; }

◆ parse_from_string()

CHAR_FRAGMENT * CHAR_FRAGMENT::parse_from_string ( const char *  str)
static

Definition at line 1057 of file unicharset.cpp.

1057  {
1058  const char *ptr = string;
1059  int len = strlen(string);
1060  if (len < kMinLen || *ptr != kSeparator) {
1061  return nullptr; // this string can not represent a fragment
1062  }
1063  ptr++; // move to the next character
1064  int step = 0;
1065  while ((ptr + step) < (string + len) && *(ptr + step) != kSeparator) {
1066  step += UNICHAR::utf8_step(ptr + step);
1067  }
1068  if (step == 0 || step > UNICHAR_LEN) {
1069  return nullptr; // no character for unichar or the character is too long
1070  }
1071  char unichar[UNICHAR_LEN + 1];
1072  strncpy(unichar, ptr, step);
1073  unichar[step] = '\0'; // null terminate unichar
1074  ptr += step; // move to the next fragment separator
1075  int pos = 0;
1076  int total = 0;
1077  bool natural = false;
1078  char *end_ptr = nullptr;
1079  for (int i = 0; i < 2; i++) {
1080  if (ptr > string + len || *ptr != kSeparator) {
1081  if (i == 1 && *ptr == kNaturalFlag)
1082  natural = true;
1083  else
1084  return nullptr; // Failed to parse fragment representation.
1085  }
1086  ptr++; // move to the next character
1087  i == 0 ? pos = static_cast<int>(strtol(ptr, &end_ptr, 10))
1088  : total = static_cast<int>(strtol(ptr, &end_ptr, 10));
1089  ptr = end_ptr;
1090  }
1091  if (ptr != string + len) {
1092  return nullptr; // malformed fragment representation
1093  }
1094  auto *fragment = new CHAR_FRAGMENT();
1095  fragment->set_all(unichar, pos, total, natural);
1096  return fragment;
1097 }

◆ set_all()

void CHAR_FRAGMENT::set_all ( const char *  unichar,
int  pos,
int  total,
bool  natural 
)
inline

Definition at line 58 of file unicharset.h.

58  {
59  set_unichar(unichar);
60  set_pos(pos);
61  set_total(total);
62  set_natural(natural);
63  }

◆ set_natural()

void CHAR_FRAGMENT::set_natural ( bool  value)
inline

Definition at line 114 of file unicharset.h.

114 { natural = value; }

◆ set_pos()

void CHAR_FRAGMENT::set_pos ( int  p)
inline

Definition at line 68 of file unicharset.h.

68 { this->pos = p; }

◆ set_total()

void CHAR_FRAGMENT::set_total ( int  t)
inline

Definition at line 69 of file unicharset.h.

69 { this->total = t; }

◆ set_unichar()

void CHAR_FRAGMENT::set_unichar ( const char *  uch)
inline

Definition at line 64 of file unicharset.h.

64  {
65  strncpy(this->unichar, uch, sizeof(this->unichar));
66  this->unichar[UNICHAR_LEN] = '\0';
67  }

◆ to_string() [1/2]

STRING CHAR_FRAGMENT::to_string ( ) const
inline

Definition at line 79 of file unicharset.h.

79  {
80  return to_string(unichar, pos, total, natural);
81  }

◆ to_string() [2/2]

STRING CHAR_FRAGMENT::to_string ( const char *  unichar,
int  pos,
int  total,
bool  natural 
)
static

Definition at line 1044 of file unicharset.cpp.

1045  {
1046  if (total == 1) return STRING(unichar);
1047  STRING result = "";
1048  result += kSeparator;
1049  result += unichar;
1050  char buffer[kMaxLen];
1051  snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos,
1052  natural ? kNaturalFlag : kSeparator, total);
1053  result += buffer;
1054  return result;
1055 }

Member Data Documentation

◆ kMaxChunks

const int CHAR_FRAGMENT::kMaxChunks = 5
static

Definition at line 55 of file unicharset.h.

◆ kMaxLen

const int CHAR_FRAGMENT::kMaxLen = 3 + UNICHAR_LEN + 2
static

Definition at line 53 of file unicharset.h.

◆ kMinLen

const int CHAR_FRAGMENT::kMinLen = 6
static

Definition at line 51 of file unicharset.h.


The documentation for this class was generated from the following files:
string
std::string string
Definition: equationdetect_test.cc:21
CHAR_FRAGMENT::get_pos
int get_pos() const
Definition: unicharset.h:71
CHAR_FRAGMENT::get_unichar
const char * get_unichar() const
Definition: unicharset.h:70
CHAR_FRAGMENT::equals
bool equals(const char *other_unichar, int other_pos, int other_total) const
Definition: unicharset.h:85
STRING
Definition: strngs.h:45
tesstrain_utils.int
int
Definition: tesstrain_utils.py:154
CHAR_FRAGMENT::set_natural
void set_natural(bool value)
Definition: unicharset.h:114
CHAR_FRAGMENT::kMaxLen
static const int kMaxLen
Definition: unicharset.h:53
CHAR_FRAGMENT::get_total
int get_total() const
Definition: unicharset.h:72
CHAR_FRAGMENT::to_string
STRING to_string() const
Definition: unicharset.h:79
CHAR_FRAGMENT::set_unichar
void set_unichar(const char *uch)
Definition: unicharset.h:64
CHAR_FRAGMENT
Definition: unicharset.h:48
UNICHAR_LEN
#define UNICHAR_LEN
Definition: unichar.h:32
CHAR_FRAGMENT::set_total
void set_total(int t)
Definition: unicharset.h:69
CHAR_FRAGMENT::set_pos
void set_pos(int p)
Definition: unicharset.h:68
CHAR_FRAGMENT::kMinLen
static const int kMinLen
Definition: unicharset.h:51