42 for (
int r = 0; r < results.
size(); ++r) {
59 for (
int r = 0; r < results.
size(); ++r) {
60 if (results[r].unichar_id == unichar_id)
91 uinT8 sorted = unichars_sorted_;
92 if (fwrite(&sorted,
sizeof(sorted), 1, fp) != 1)
94 if (!unichars_.SerializeClasses(fp))
return false;
101 if (fread(&sorted,
sizeof(sorted), 1, fp) != 1)
103 unichars_sorted_ = sorted != 0;
104 if (!unichars_.DeSerializeClasses(swap, fp))
return false;
111 for (
int c = 0; c < unichars_.size(); ++c) {
112 if (unichars_[c].unichar_id == unichar_id) {
115 for (
int f = 0; f < font_list.
size(); ++f) {
116 if (font_list[f] == font_id)
125 unichars_sorted_ = unichars_.size() <= 1;
130 for (
int c = 0; c < other.unichars_.size(); ++c) {
131 for (
int f = 0; f < other.unichars_[c].font_ids.size(); ++f) {
133 other.unichars_[c].font_ids[f]);
136 unichars_sorted_ = unichars_.size() <= 1;
141 for (
int c = 0; c < unichars_.size(); ++c) {
142 if (unichars_[c].unichar_id == unichar_id) {
145 for (
int f = 0; f < font_list.
size(); ++f) {
146 if (font_list[f] == font_id)
157 for (
int c = 0; c < unichars_.size(); ++c) {
158 if (unichars_[c].unichar_id == unichar_id) {
167 for (
int c = 0; c < unichars_.size(); ++c) {
169 for (
int f = 0; f < font_list.
size(); ++f) {
170 if (font_list[f] == font_id)
179 uinT32 properties)
const {
180 for (
int c = 0; c < unichars_.size(); ++c) {
182 for (
int f = 0; f < font_list.
size(); ++f) {
183 if (font_table.
get(font_list[f]).properties == properties)
193 uinT32 properties = font_table.
get(unichars_[0].font_ids[0]).properties;
194 for (
int c = 0; c < unichars_.size(); ++c) {
196 for (
int f = 0; f < font_list.
size(); ++f) {
197 if (font_table.
get(font_list[f]).properties != properties)
212 for (
int c = 0; c < unichars_.size(); ++c) {
213 int unichar_id = unichars_[c].unichar_id;
215 for (
int f = 0; f < font_list.
size(); ++f) {
227 if (unichars_.size() != other->unichars_.size())
return false;
228 if (!unichars_sorted_) SortUnichars();
229 if (!other->unichars_sorted_) other->SortUnichars();
230 for (
int c = 0; c < unichars_.size(); ++c) {
231 if (unichars_[c].unichar_id != other->unichars_[c].unichar_id)
238 void Shape::SortUnichars() {
240 unichars_sorted_ =
true;
246 : unicharset_(&unicharset), num_fonts_(0) {
251 if (!shape_table_.Serialize(fp))
return false;
257 if (!shape_table_.DeSerialize(swap, fp))
return false;
265 if (num_fonts_ <= 0) {
266 for (
int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
267 const Shape& shape = *shape_table_[shape_id];
268 for (
int c = 0; c < shape.
size(); ++c) {
269 for (
int f = 0; f < shape[c].font_ids.
size(); ++f) {
270 if (shape[c].font_ids[f] >= num_fonts_)
271 num_fonts_ = shape[c].font_ids[f] + 1;
282 for (
int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
283 Shape* shape = shape_table_[shape_id];
284 for (
int c = 0; c < shape->
size(); ++c) {
285 shape->
SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
292 if (shape_id < 0 || shape_id >= shape_table_.size())
293 return STRING(
"INVALID_UNICHAR_ID");
297 if (shape.
size() > 100) {
301 for (
int c = 0; c < shape.
size(); ++c) {
305 if (shape.
size() < 10) {
307 result +=
" fonts =";
308 int num_fonts = shape[c].font_ids.
size();
309 if (num_fonts > 10) {
311 result.
add_str_int(
" ... ", shape[c].font_ids[num_fonts - 1]);
313 for (
int f = 0; f < num_fonts; ++f) {
324 int max_unichars = 0;
325 int num_multi_shapes = 0;
326 int num_master_shapes = 0;
327 for (
int s = 0; s < shape_table_.size(); ++s) {
333 if (shape_size > max_unichars)
334 max_unichars = shape_size;
337 result.
add_str_int(
"Number of shapes = ", num_master_shapes);
338 result.
add_str_int(
" max unichars = ", max_unichars);
339 result.
add_str_int(
" number with multiple unichars = ", num_multi_shapes);
347 int index = shape_table_.size();
350 shape_table_.push_back(shape);
351 num_fonts_ =
MAX(num_fonts_, font_id + 1);
359 for (index = 0; index < shape_table_.size() &&
360 !(other == *shape_table_[index]); ++index)
362 if (index == shape_table_.size()) {
364 shape_table_.push_back(shape);
372 delete shape_table_[shape_id];
373 shape_table_[shape_id] =
NULL;
374 shape_table_.remove(shape_id);
380 Shape& shape = *shape_table_[shape_id];
382 num_fonts_ =
MAX(num_fonts_, font_id + 1);
387 Shape& shape = *shape_table_[shape_id];
397 for (
int s = 0; s < shape_table_.size(); ++s) {
399 for (
int c = 0; c < shape.
size(); ++c) {
400 if (shape[c].unichar_id == unichar_id) {
403 for (
int f = 0; f < shape[c].font_ids.
size(); ++f) {
404 if (shape[c].font_ids[f] == font_id)
415 int* unichar_id,
int* font_id)
const {
416 const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0];
418 *font_id = unichar_and_fonts.
font_ids[0];
426 for (
int u_ind = 0; u_ind < shape.
size(); ++u_ind) {
427 for (
int f_ind = 0; f_ind < shape[u_ind].font_ids.
size(); ++f_ind) {
428 int c = shape[u_ind].unichar_id;
429 int f = shape[u_ind].font_ids[f_ind];
430 int master_id = master_shapes.
FindShape(c, f);
431 if (master_id >= 0) {
432 shape_map.SetBit(master_id);
439 for (
int s = 0; s < master_shapes.
NumShapes(); ++s) {
456 for (
int s1 = 0; s1 < num_shapes; ++s1) {
466 int max_num_unichars = 0;
468 for (
int s = 0; s < num_shapes; ++s) {
469 if (
GetShape(s).size() > max_num_unichars)
472 return max_num_unichars;
479 for (
int s1 = start; s1 < end; ++s1) {
481 int unichar_id =
GetShape(s1)[0].unichar_id;
482 for (
int s2 = s1 + 1; s2 < end; ++s2) {
484 unichar_id ==
GetShape(s2)[0].unichar_id) {
506 for (
int c = 0; c < shape.
size(); ++c) {
507 font_count += shape[c].font_ids.
size();
517 Shape combined_shape(*shape_table_[master_id1]);
518 combined_shape.
AddShape(*shape_table_[master_id2]);
519 return combined_shape.
size();
527 shape_table_[master_id2]->set_destination_index(master_id1);
529 shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
534 Shape* tmp = shape_table_[shape_id1];
535 shape_table_[shape_id1] = shape_table_[shape_id2];
536 shape_table_[shape_id2] = tmp;
542 int dest_id = shape_table_[shape_id]->destination_index();
543 if (dest_id == shape_id || dest_id < 0)
545 int master_id = shape_table_[dest_id]->destination_index();
546 if (master_id == dest_id || master_id < 0)
557 for (c1 = 0; c1 < shape1.
size(); ++c1) {
558 int unichar_id1 = shape1[c1].unichar_id;
562 for (c2 = 0; c2 < shape2.
size(); ++c2) {
563 int unichar_id2 = shape2[c2].unichar_id;
567 return c1 == shape1.
size() || c2 == shape2.
size();
572 int shape_id)
const {
577 for (cs = 0; cs < shape.
size(); ++cs) {
578 int unichar_id = shape[cs].unichar_id;
583 for (cm1 = 0; cm1 < merge1.
size(); ++cm1) {
584 int unichar_id1 = merge1[cm1].unichar_id;
588 for (cm2 = 0; cm2 < merge2.
size(); ++cm2) {
589 int unichar_id2 = merge2[cm2].unichar_id;
593 return cs == shape.
size() || (cm1 == merge1.
size() && cm2 == merge2.
size());
600 for (
int c1 = 0; c1 < shape1.
size(); ++c1) {
601 int unichar_id1 = shape1[c1].unichar_id;
605 for (
int c2 = 0; c2 < shape2.
size(); ++c2) {
606 int unichar_id2 = shape2[c2].unichar_id;
615 int shape_id)
const {
619 for (
int cs = 0; cs < shape.
size(); ++cs) {
620 int unichar_id = shape[cs].unichar_id;
625 for (
int cm1 = 0; cm1 < merge1.
size(); ++cm1) {
626 int unichar_id1 = merge1[cm1].unichar_id;
630 for (
int cm2 = 0; cm2 < merge2.
size(); ++cm2) {
631 int unichar_id2 = merge2[cm2].unichar_id;
642 for (
int c1 = 0; c1 < shape1.
size(); ++c1) {
643 int unichar_id1 = shape1[c1].unichar_id;
654 for (
int c1 = 0; c1 < shape1.
size(); ++c1) {
656 for (
int f = 0; f < font_list1.
size(); ++f) {
668 if (shape_map !=
NULL)
670 for (
int s = 0; s < other.shape_table_.size(); ++s) {
671 if (other.shape_table_[s]->destination_index() < 0) {
672 int index =
AddShape(*other.shape_table_[s]);
673 if (shape_map !=
NULL)
674 (*shape_map)[s] = index;
682 for (
int s = 0; s < shape_table_.size(); ++s) {
683 if (shape_table_[s]->destination_index() < 0)
700 if (shape_rating.
joined) {
704 if (shape_rating.
broken) {
709 for (
int u = 0; u < shape.
size(); ++u) {
710 int result_index = AddUnicharToResults(shape[u].unichar_id,
712 unichar_map, results);
713 for (
int f = 0; f < shape[u].font_ids.
size(); ++f) {
714 (*results)[result_index].fonts.push_back(
723 int ShapeTable::AddUnicharToResults(
726 int result_index = unichar_map->
get(unichar_id);
727 if (result_index < 0) {
729 result_index = results->
push_back(result);
730 (*unichar_map)[unichar_id] = result_index;
static int FirstResultWithUnichar(const GenericVector< UnicharRating > &results, UNICHAR_ID unichar_id)
int BuildFromShape(const Shape &shape, const ShapeTable &master_shapes)
bool ContainsMultipleFontProperties(const FontInfoTable &font_table) const
static int SortByUnicharId(const void *v1, const void *v2)
int NumMasterShapes() const
bool EqualUnichars(int shape_id1, int shape_id2) const
void AddShapeToShape(int shape_id, const Shape &other)
bool Serialize(FILE *fp) const
bool IsSubsetOf(const Shape &other) const
STRING SummaryStr() const
bool DeSerialize(bool swap, FILE *fp)
void AddToShape(int unichar_id, int font_id)
bool Serialize(FILE *fp) const
bool ContainsUnichar(int unichar_id) const
void DeleteShape(int shape_id)
void SwapShapes(int shape_id1, int shape_id2)
void ForceFontMerges(int start, int end)
bool ContainsUnicharAndFont(int unichar_id, int font_id) const
void ReMapClassIds(const GenericVector< int > &unicharset_map)
int MaxNumUnichars() const
int MasterDestinationIndex(int shape_id) const
void MergeShapes(int shape_id1, int shape_id2)
int MasterFontCount(int shape_id) const
void AddShape(const Shape &other)
bool IsEqualUnichars(Shape *other)
bool CommonUnichars(int shape_id1, int shape_id2) const
const char *const id_to_unichar(UNICHAR_ID id) const
bool Serialize(FILE *fp) const
void init_to_size(int size, T t)
bool DeSerialize(bool swap, FILE *fp)
bool CommonFont(int shape_id1, int shape_id2) const
bool ContainsFont(int font_id) const
bool DeSerialize(bool swap, FILE *fp)
bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const
bool AlreadyMerged(int shape_id1, int shape_id2) const
GenericVector< inT32 > font_ids
bool operator==(const Shape &other) const
bool Serialize(FILE *fp) const
STRING DebugStr(int shape_id) const
void add_str_int(const char *str, int number)
void ReverseN(void *ptr, int num_bytes)
void AddShapeToResults(const ShapeRating &shape_rating, GenericVector< int > *unichar_map, GenericVector< UnicharRating > *results) const
int MergedUnicharCount(int shape_id1, int shape_id2) const
int MasterUnicharCount(int shape_id) const
int IntCastRounded(double x)
bool SubsetUnichar(int shape_id1, int shape_id2) const
int FindShape(int unichar_id, int font_id) const
bool AnyMultipleUnichars() const
void SetUnicharId(int index, int unichar_id)
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
int AddShape(int unichar_id, int font_id)
void AppendMasterShapes(const ShapeTable &other, GenericVector< int > *shape_map)
const Shape & GetShape(int shape_id) const
static int FirstResultWithUnichar(const GenericVector< ShapeRating > &results, const ShapeTable &shape_table, UNICHAR_ID unichar_id)
void AddToShape(int shape_id, int unichar_id, int font_id)
bool ContainsFontProperties(const FontInfoTable &font_table, uinT32 properties) const
bool DeSerialize(bool swap, FILE *fp)
bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const