tesseract  5.0.0-alpha-619-ge9db
UNICHARMAP Class Reference

#include <unicharmap.h>

Public Member Functions

 UNICHARMAP ()
 
 ~UNICHARMAP ()
 
void insert (const char *const unichar_repr, UNICHAR_ID id)
 
UNICHAR_ID unichar_to_id (const char *const unichar_repr, int length) const
 
bool contains (const char *const unichar_repr, int length) const
 
int minmatch (const char *const unichar_repr) const
 
void clear ()
 

Detailed Description

Definition at line 27 of file unicharmap.h.

Constructor & Destructor Documentation

◆ UNICHARMAP()

UNICHARMAP::UNICHARMAP ( )

Definition at line 23 of file unicharmap.cpp.

23  :
24 nodes(nullptr) {
25 }

◆ ~UNICHARMAP()

UNICHARMAP::~UNICHARMAP ( )

Definition at line 27 of file unicharmap.cpp.

27  {
28  delete[] nodes;
29 }

Member Function Documentation

◆ clear()

void UNICHARMAP::clear ( )

Definition at line 115 of file unicharmap.cpp.

115  {
116  delete[] nodes;
117  nodes = nullptr;
118 }

◆ contains()

bool UNICHARMAP::contains ( const char *const  unichar_repr,
int  length 
) const

Definition at line 79 of file unicharmap.cpp.

80  {
81  if (unichar_repr == nullptr || *unichar_repr == '\0') return false;
82  if (length <= 0 || length > UNICHAR_LEN) return false;
83  int index = 0;
84  if (unichar_repr[index] == '\0') return false;
85  UNICHARMAP_NODE* current_nodes = nodes;
86 
87  while (current_nodes != nullptr && index + 1 < length &&
88  unichar_repr[index + 1] != '\0') {
89  current_nodes =
90  current_nodes[static_cast<unsigned char>(unichar_repr[index])].children;
91  ++index;
92  }
93  return current_nodes != nullptr &&
94  (index + 1 >= length || unichar_repr[index + 1] == '\0') &&
95  current_nodes[static_cast<unsigned char>(unichar_repr[index])].id >= 0;
96 }

◆ insert()

void UNICHARMAP::insert ( const char *const  unichar_repr,
UNICHAR_ID  id 
)

Definition at line 56 of file unicharmap.cpp.

56  {
57  const char* current_char = unichar_repr;
58  if (*current_char == '\0') return;
59  UNICHARMAP_NODE** current_nodes_pointer = &nodes;
60  do {
61  if (*current_nodes_pointer == nullptr)
62  *current_nodes_pointer = new UNICHARMAP_NODE[256];
63  if (current_char[1] == '\0') {
64  (*current_nodes_pointer)
65  [static_cast<unsigned char>(*current_char)].id = id;
66  return;
67  }
68  current_nodes_pointer =
69  &((*current_nodes_pointer)
70  [static_cast<unsigned char>(*current_char)].children);
71  ++current_char;
72  } while (true);
73 }

◆ minmatch()

int UNICHARMAP::minmatch ( const char *const  unichar_repr) const

Definition at line 100 of file unicharmap.cpp.

100  {
101  const char* current_char = unichar_repr;
102  if (*current_char == '\0') return 0;
103  UNICHARMAP_NODE* current_nodes = nodes;
104 
105  while (current_nodes != nullptr && *current_char != '\0') {
106  if (current_nodes[static_cast<unsigned char>(*current_char)].id >= 0)
107  return current_char + 1 - unichar_repr;
108  current_nodes =
109  current_nodes[static_cast<unsigned char>(*current_char)].children;
110  ++current_char;
111  }
112  return 0;
113 }

◆ unichar_to_id()

UNICHAR_ID UNICHARMAP::unichar_to_id ( const char *const  unichar_repr,
int  length 
) const

Definition at line 34 of file unicharmap.cpp.

35  {
36  UNICHARMAP_NODE* current_nodes = nodes;
37 
38  assert(*unichar_repr != '\0');
39  assert(length > 0 && length <= UNICHAR_LEN);
40 
41  int index = 0;
42  if (length <= 0 || unichar_repr[index] == '\0') return INVALID_UNICHAR_ID;
43  do {
44  if (index + 1 >= length || unichar_repr[index + 1] == '\0')
45  return current_nodes[static_cast<unsigned char>(unichar_repr[index])].id;
46  current_nodes =
47  current_nodes[static_cast<unsigned char>(unichar_repr[index])].children;
48  ++index;
49  } while (true);
50 }

The documentation for this class was generated from the following files:
UNICHAR_LEN
#define UNICHAR_LEN
Definition: unichar.h:32