tesseract  5.0.0-alpha-619-ge9db
tesseract::LigatureTable Class Reference

#include <ligature_table.h>

Public Member Functions

std::string AddLigatures (const std::string &str, const PangoFontInfo *font) const
 
std::string RemoveLigatures (const std::string &str) const
 
std::string RemoveCustomLigatures (const std::string &str) const
 
const LigHashnorm_to_lig_table () const
 
const LigHashlig_to_norm_table () const
 

Static Public Member Functions

static LigatureTableGet ()
 

Protected Member Functions

 LigatureTable ()
 
void Init ()
 

Protected Attributes

LigHash norm_to_lig_table_
 
LigHash lig_to_norm_table_
 
int min_lig_length_
 
int max_lig_length_
 
int min_norm_length_
 
int max_norm_length_
 

Static Protected Attributes

static std::unique_ptr< LigatureTableinstance_
 

Detailed Description

Definition at line 55 of file ligature_table.h.

Constructor & Destructor Documentation

◆ LigatureTable()

tesseract::LigatureTable::LigatureTable ( )
protected

Definition at line 78 of file ligature_table.cpp.

82  {

Member Function Documentation

◆ AddLigatures()

std::string tesseract::LigatureTable::AddLigatures ( const std::string str,
const PangoFontInfo font 
) const

Definition at line 174 of file ligature_table.cpp.

185  {
186  result += str[i];
187  step = 1;
188  }
189  }
190  result += str.substr(i, len - i);
191  return result;
192 }
193 
194 } // namespace tesseract

◆ Get()

LigatureTable * tesseract::LigatureTable::Get ( )
static

Definition at line 70 of file ligature_table.cpp.

82  {

◆ Init()

void tesseract::LigatureTable::Init ( )
protected

Definition at line 81 of file ligature_table.cpp.

82  {
83  norm_to_lig_table_[normed8] = lig8;
84  lig_to_norm_table_[lig8] = normed8;
85  if (min_lig_length_ == 0 || lig_length < min_lig_length_)
86  min_lig_length_ = lig_length;
87  if (lig_length > max_lig_length_)
88  max_lig_length_ = lig_length;
89  if (min_norm_length_ == 0 || norm_length < min_norm_length_)
90  min_norm_length_ = norm_length;
91  if (norm_length > max_norm_length_)
92  max_norm_length_ = norm_length;
93  }
94  }
95  // Add custom extra ligatures.
96  for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
99  int norm_length = strlen(UNICHARSET::kCustomLigatures[i][0]);
100  if (min_norm_length_ == 0 || norm_length < min_norm_length_)
101  min_norm_length_ = norm_length;
102  if (norm_length > max_norm_length_)
103  max_norm_length_ = norm_length;
104 
107  }
108  }
109 }
110 
112  std::string result;
113  UNICHAR::const_iterator it_begin = UNICHAR::begin(str.c_str(), str.length());
114  UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length());
115  char tmp[5];
116  int len;
117  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
118  len = it.get_utf8(tmp);
119  tmp[len] = '\0';
120  LigHash::const_iterator lig_it = lig_to_norm_table_.find(tmp);
121  if (lig_it != lig_to_norm_table_.end()) {
122  result += lig_it->second;
123  } else {
124  result += tmp;
125  }
126  }
127  return result;

◆ lig_to_norm_table()

const LigHash& tesseract::LigatureTable::lig_to_norm_table ( ) const
inline

Definition at line 72 of file ligature_table.h.

73  :

◆ norm_to_lig_table()

const LigHash& tesseract::LigatureTable::norm_to_lig_table ( ) const
inline

Definition at line 69 of file ligature_table.h.

73  :

◆ RemoveCustomLigatures()

std::string tesseract::LigatureTable::RemoveCustomLigatures ( const std::string str) const

Definition at line 148 of file ligature_table.cpp.

149  {
150  result += tmp;
151  }
152  }
153  return result;
154 }
155 
157  const PangoFontInfo* font) const {
158  std::string result;
159  int len = str.size();
160  int step = 0;
161  int i = 0;
162  for (i = 0; i < len - min_norm_length_ + 1; i += step) {
163  step = 0;
164  for (int liglen = max_norm_length_; liglen >= min_norm_length_; --liglen) {
165  if (i + liglen <= len) {
166  std::string lig_cand = str.substr(i, liglen);
167  LigHash::const_iterator it = norm_to_lig_table_.find(lig_cand);
168  if (it != norm_to_lig_table_.end()) {
169  tlog(3, "Considering %s -> %s\n", lig_cand.c_str(),
170  it->second.c_str());
171  if (font) {
172  // Test for renderability.

◆ RemoveLigatures()

std::string tesseract::LigatureTable::RemoveLigatures ( const std::string str) const

Definition at line 129 of file ligature_table.cpp.

130  {
131  std::string result;
132  UNICHAR::const_iterator it_begin = UNICHAR::begin(str.c_str(), str.length());
133  UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length());
134  char tmp[5];
135  int len;
136  int norm_ind;
137  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
138  len = it.get_utf8(tmp);
139  tmp[len] = '\0';
140  norm_ind = -1;
141  for (int i = 0;
142  UNICHARSET::kCustomLigatures[i][0] != nullptr && norm_ind < 0; ++i) {
143  if (!strcmp(tmp, UNICHARSET::kCustomLigatures[i][1])) {
144  norm_ind = i;
145  }
146  }

Member Data Documentation

◆ instance_

std::unique_ptr< LigatureTable > tesseract::LigatureTable::instance_
staticprotected

Definition at line 82 of file ligature_table.h.

◆ lig_to_norm_table_

LigHash tesseract::LigatureTable::lig_to_norm_table_
protected

Definition at line 84 of file ligature_table.h.

◆ max_lig_length_

int tesseract::LigatureTable::max_lig_length_
protected

Definition at line 86 of file ligature_table.h.

◆ max_norm_length_

int tesseract::LigatureTable::max_norm_length_
protected

Definition at line 88 of file ligature_table.h.

◆ min_lig_length_

int tesseract::LigatureTable::min_lig_length_
protected

Definition at line 85 of file ligature_table.h.

◆ min_norm_length_

int tesseract::LigatureTable::min_norm_length_
protected

Definition at line 87 of file ligature_table.h.

◆ norm_to_lig_table_

LigHash tesseract::LigatureTable::norm_to_lig_table_
protected

Definition at line 83 of file ligature_table.h.


The documentation for this class was generated from the following files:
string
std::string string
Definition: equationdetect_test.cc:21
tesseract::UNICHAR::begin
static const_iterator begin(const char *utf8_str, int byte_length)
Definition: unichar.cpp:204
tesseract::UNICHAR::end
static const_iterator end(const char *utf8_str, int byte_length)
Definition: unichar.cpp:208
tesseract::LigatureTable::lig_to_norm_table_
LigHash lig_to_norm_table_
Definition: ligature_table.h:84
tesseract::LigatureTable::max_lig_length_
int max_lig_length_
Definition: ligature_table.h:86
tlog
#define tlog(level,...)
Definition: tlog.h:32
UNICHARSET::kCustomLigatures
static const TESS_API char * kCustomLigatures[][2]
Definition: unicharset.h:150
tesseract::LigatureTable::LigatureTable
LigatureTable()
Definition: ligature_table.cpp:78
tesseract::UNICHAR::const_iterator::get_utf8
int get_utf8(char *buf) const
Definition: unichar.cpp:178
tesseract::LigatureTable::norm_to_lig_table_
LigHash norm_to_lig_table_
Definition: ligature_table.h:83
tesseract::LigatureTable::min_lig_length_
int min_lig_length_
Definition: ligature_table.h:85
tesseract::LigatureTable::AddLigatures
std::string AddLigatures(const std::string &str, const PangoFontInfo *font) const
Definition: ligature_table.cpp:174
tesseract::LigatureTable::min_norm_length_
int min_norm_length_
Definition: ligature_table.h:87
tesseract::LigatureTable::max_norm_length_
int max_norm_length_
Definition: ligature_table.h:88
tesseract::LigatureTable::RemoveLigatures
std::string RemoveLigatures(const std::string &str) const
Definition: ligature_table.cpp:129