30 #include "unicode/errorcode.h" 31 #include "unicode/normlzr.h" 32 #include "unicode/unistr.h" 33 #include "unicode/utypes.h" 37 static std::string EncodeAsUTF8(
const char32 ch32) {
39 return std::string(uni_ch.utf8(), uni_ch.utf8_len());
61 min_norm_length_(0), max_norm_length_(0) {}
68 std::string lig8 = EncodeAsUTF8(lig);
69 icu::UnicodeString unicode_lig8(static_cast<UChar32>(lig));
70 icu::UnicodeString normed8_result;
71 icu::ErrorCode status;
72 icu::Normalizer::normalize(unicode_lig8, UNORM_NFKC, 0, normed8_result,
75 normed8_result.toUTF8String(normed8);
80 int lig_length = lig8.length();
81 int norm_length = normed8.size();
82 if (normed8 != lig8 && lig_length > 1 && norm_length > 1) {
122 result += lig_it->second;
159 int len = str.size();
165 if (i + liglen <= len) {
166 std::string lig_cand = str.substr(i, liglen);
169 tlog(3,
"Considering %s -> %s\n", lig_cand.c_str(),
178 result += it->second;
179 tlog(2,
"Substituted %s -> %s\n", lig_cand.c_str(),
190 result += str.substr(i, len - i);
LigHash norm_to_lig_table_
static TESS_API const char * kCustomLigatures[][2]
int get_utf8(char *buf) const
std::string RemoveLigatures(const std::string &str) const
static const_iterator begin(const char *utf8_str, const int byte_length)
static std::unique_ptr< LigatureTable > instance_
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const
static LigatureTable * Get()
static const_iterator end(const char *utf8_str, const int byte_length)
std::string AddLigatures(const std::string &str, const PangoFontInfo *font) const
LigHash lig_to_norm_table_
std::string RemoveCustomLigatures(const std::string &str) const