22 #ifndef TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ 23 #define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ 25 #include <unordered_map> 40 memset(code_, 0,
sizeof(code_));
44 void Set(
int index,
int value) {
46 if (length_ <= index) length_ = index + 1;
50 void Set3(
int code0,
int code1,
int code2) {
57 int length()
const {
return length_; }
62 return fp->
Serialize(&self_normalized_) &&
73 if (length_ != other.length_)
return false;
74 for (
int i = 0; i < length_; ++i) {
75 if (code_[i] != other.code_[i])
return false;
83 for (
int i = 0; i < code.length_; ++i) {
84 result ^= code(i) << (7 * i);
93 int8_t self_normalized_;
150 STRING* radical_stroke_table);
174 auto it = next_codes_.find(code);
175 return it == next_codes_.end() ? nullptr : it->second;
180 auto it = final_codes_.find(code);
181 return it == final_codes_.end() ? nullptr : it->second;
207 void DefragmentCodeValues(
int encoded_null);
209 void ComputeCodeRange();
219 std::unordered_map<RecodedCharID, int, RecodedCharID::RecodedCharIDHash>
225 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
230 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
239 #endif // TESSERACT_CCUTIL_UNICHARCOMPRESS_H_
static const int kMaxCodeLen
int DecodeUnichar(const RecodedCharID &code) const
static const int kNumHangul
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
bool operator==(const RecodedCharID &other) const
bool DeSerialize(char *data, size_t count=1)
void SetupDirect(const GenericVector< RecodedCharID > &codes)
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
bool ComputeEncoding(const UNICHARSET &unicharset, int null_id, STRING *radical_stroke_table)
void Set(int index, int value)
size_t operator()(const RecodedCharID &code) const
UnicharCompress & operator=(const UnicharCompress &src)
bool Serialize(TFile *fp) const
void Truncate(int length)
static const int kFirstHangul
STRING GetEncodingAsString(const UNICHARSET &unicharset) const
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
bool DeSerialize(TFile *fp)
int operator()(int index) const
bool Serialize(const char *data, size_t count=1)
void SetupPassThrough(const UNICHARSET &unicharset)
static bool DecomposeHangul(int unicode, int *leading, int *vowel, int *trailing)
bool IsValidFirstCode(int code) const
bool DeSerialize(TFile *fp)
bool Serialize(TFile *fp) const
void Set3(int code0, int code1, int code2)