21 #include "config_auto.h"
24 #if (defined __MINGW32__) || (defined __CYGWIN__)
26 #undef __STRICT_ANSI__
33 #include <sys/param.h>
44 #include "pango/pango.h"
45 #include "pango/pangocairo.h"
46 #include "pango/pangofc-font.h"
49 "Overrides fontconfig default temporary dir");
51 #ifdef GOOGLE_TESSERACT
52 #include "ocr/trainingdata/typesetting/legacy_fonts.h"
54 "Overrides --fonts_dir and sets the known universe of fonts to"
55 "the list in legacy_fonts.h");
58 "Overrides system default font location");
62 "If empty it use system default. Otherwise it overrides"
63 " system default font location");
83 tprintf(
"ERROR: Could not parse %s\n", desc.c_str());
88 void PangoFontInfo::Clear() {
93 pango_font_description_free(desc_);
101 if (!desc_)
return "";
102 char* desc_str = pango_font_description_to_string(desc_);
113 if (fonts_dir_.empty()) {
115 FLAGS_fontconfig_tmpdir.c_str());
124 if (!cache_dir_.empty()) {
128 const int MAX_FONTCONF_FILESIZE = 1024;
129 char fonts_conf_template[MAX_FONTCONF_FILESIZE];
130 cache_dir_ = cache_dir;
131 fonts_dir_ = fonts_dir;
132 snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
133 "<?xml version=\"1.0\"?>\n"
134 "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
137 "<cachedir>%s</cachedir>\n"
138 "<config></config>\n"
140 fonts_dir.c_str(), cache_dir_.c_str());
145 env.append(cache_dir_.c_str());
146 _putenv(env.c_str());
147 _putenv(
"LANG=en_US.utf8");
149 setenv(
"FONTCONFIG_PATH", cache_dir_.c_str(),
true);
151 setenv(
"LANG",
"en_US.utf8",
true);
154 if (FcInitReinitialize() != FcTrue) {
155 tprintf(
"FcInitiReinitialize failed!!\n");
159 pango_cairo_font_map_set_default(
nullptr);
162 static void ListFontFamilies(PangoFontFamily*** families,
165 PangoFontMap* font_map = pango_cairo_font_map_get_default();
167 pango_font_map_list_families(font_map, families, n_families);
170 bool PangoFontInfo::ParseFontDescription(
const PangoFontDescription *desc) {
172 const char* family = pango_font_description_get_family(desc);
174 char* desc_str = pango_font_description_to_string(desc);
175 tprintf(
"WARNING: Could not parse family name from description: '%s'\n",
181 desc_ = pango_font_description_copy(desc);
184 font_size_ = pango_font_description_get_size(desc);
185 if (!pango_font_description_get_size_is_absolute(desc)) {
186 font_size_ /= PANGO_SCALE;
193 PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
194 bool success = ParseFontDescription(desc);
195 pango_font_description_free(desc);
202 PangoFont* PangoFontInfo::ToPangoFont()
const {
204 PangoFontMap* font_map = pango_cairo_font_map_get_default();
205 PangoContext* context = pango_context_new();
206 pango_cairo_context_set_resolution(context, resolution_);
207 pango_context_set_font_map(context, font_map);
208 PangoFont* font =
nullptr;
211 font = pango_font_map_load_font(font_map, context, desc_);
213 g_object_unref(context);
218 PangoFont* font = ToPangoFont();
219 if (font ==
nullptr) {
223 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
229 if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
231 int len = it.get_utf8(tmp);
233 tlog(2,
"'%s' (U+%x) not covered by font\n", tmp, *it);
234 pango_coverage_unref(coverage);
235 g_object_unref(font);
239 pango_coverage_unref(coverage);
240 g_object_unref(font);
246 static char* my_strnmove(
char*
dest,
const char* src,
size_t n) {
255 }
while (n && src[0]);
267 int num_dropped_chars = 0;
268 PangoFont* font = ToPangoFont();
269 if (font ==
nullptr) {
271 num_dropped_chars = utf8_text->length();
272 utf8_text->resize(0);
273 return num_dropped_chars;
275 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
279 char* out = const_cast<char*>(utf8_text->c_str());
286 if (!it.is_legal()) {
292 const char* utf8_char = it.utf8_data();
295 if (!
IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
296 pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
300 tlog(2,
"'%s' (U+%x) not covered by font\n", str, unicode);
306 my_strnmove(out, utf8_char, utf8_len);
309 pango_coverage_unref(coverage);
310 g_object_unref(font);
311 utf8_text->resize(out - utf8_text->c_str());
312 return num_dropped_chars;
316 int* x_bearing,
int* x_advance)
const {
318 PangoFont* font = ToPangoFont();
320 int total_advance = 0;
330 PangoGlyph glyph_index = pango_fc_font_get_glyph(
331 reinterpret_cast<PangoFcFont*>(font), *it);
334 g_object_unref(font);
338 PangoRectangle ink_rect, logical_rect;
339 pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
340 pango_extents_to_pixels(&ink_rect,
nullptr);
341 pango_extents_to_pixels(&logical_rect,
nullptr);
343 int bearing = total_advance + PANGO_LBEARING(ink_rect);
344 if (it == it_begin || bearing < min_bearing) {
345 min_bearing = bearing;
347 total_advance += PANGO_RBEARING(logical_rect);
349 *x_bearing = min_bearing;
350 *x_advance = total_advance;
351 g_object_unref(font);
356 std::vector<std::string> graphemes;
361 std::vector<std::string>* graphemes)
const {
362 if (graphemes) graphemes->clear();
371 const char32 kDottedCircleGlyph = 9676;
372 bool bad_glyph =
false;
373 PangoFontMap* font_map = pango_cairo_font_map_get_default();
374 PangoContext* context = pango_context_new();
375 pango_context_set_font_map(context, font_map);
380 layout = pango_layout_new(context);
383 pango_layout_set_font_description(layout, desc_);
385 PangoFontDescription *desc = pango_font_description_from_string(
387 pango_layout_set_font_description(layout, desc);
388 pango_font_description_free(desc);
390 pango_layout_set_text(layout, utf8_word, len);
391 PangoLayoutIter* run_iter =
nullptr;
394 run_iter = pango_layout_get_iter(layout);
397 PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
399 tlog(2,
"Found end of line nullptr run marker\n");
402 PangoGlyph dotted_circle_glyph;
403 PangoFont* font = run->item->analysis.font;
406 PangoGlyphString* glyphs = pango_glyph_string_new();
407 const char s[] =
"\xc2\xa7";
408 pango_shape(s, strlen(s), &(run->item->analysis), glyphs);
409 dotted_circle_glyph = glyphs->glyphs[0].glyph;
410 #else // TODO: Do we need separate solution for non win build?
411 dotted_circle_glyph = pango_fc_font_get_glyph(
412 reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
416 PangoFontDescription* desc = pango_font_describe(font);
417 char* desc_str = pango_font_description_to_string(desc);
418 tlog(2,
"Desc of font in run: %s\n", desc_str);
420 pango_font_description_free(desc);
423 PangoGlyphItemIter cluster_iter;
424 gboolean have_cluster;
425 for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
427 have_cluster && !bad_glyph;
428 have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
429 const int start_byte_index = cluster_iter.start_index;
430 const int end_byte_index = cluster_iter.end_index;
431 int start_glyph_index = cluster_iter.start_glyph;
432 int end_glyph_index = cluster_iter.end_glyph;
434 end_byte_index - start_byte_index);
435 if (graphemes) graphemes->push_back(cluster_text);
437 tlog(2,
"Skipping whitespace\n");
441 printf(
"start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
442 start_byte_index, end_byte_index,
443 start_glyph_index, end_glyph_index);
445 for (
int i = start_glyph_index,
446 step = (end_glyph_index > start_glyph_index) ? 1 : -1;
447 !bad_glyph && i != end_glyph_index; i+= step) {
448 const bool unknown_glyph =
449 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
450 PANGO_GLYPH_UNKNOWN_FLAG);
451 const bool illegal_glyph =
452 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
453 dotted_circle_glyph);
454 bad_glyph = unknown_glyph || illegal_glyph;
456 printf(
"(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
461 printf(
" '%s'\n", cluster_text.c_str());
464 tlog(1,
"Found illegal glyph!\n");
467 pango_glyph_string_free(glyphs);
469 }
while (!bad_glyph && pango_layout_iter_next_run(run_iter));
471 pango_layout_iter_free(run_iter);
472 g_object_unref(context);
473 g_object_unref(layout);
474 if (bad_glyph && graphemes) graphemes->clear();
480 std::vector<std::string> FontUtils::available_fonts_;
499 PangoFontDescription *desc = pango_font_description_from_string(
501 PangoFont* selected_font =
nullptr;
504 PangoFontMap* font_map = pango_cairo_font_map_get_default();
505 PangoContext* context = pango_context_new();
506 pango_context_set_font_map(context, font_map);
509 selected_font = pango_font_map_load_font(font_map, context, desc);
511 g_object_unref(context);
513 if (selected_font ==
nullptr) {
514 pango_font_description_free(desc);
515 tlog(4,
"** Font '%s' failed to load from font map!\n", input_query_desc);
518 PangoFontDescription* selected_desc = pango_font_describe(selected_font);
520 bool equal = pango_font_description_equal(desc, selected_desc);
521 tlog(3,
"query weight = %d \t selected weight =%d\n",
522 pango_font_description_get_weight(desc),
523 pango_font_description_get_weight(selected_desc));
525 char* selected_desc_str = pango_font_description_to_string(selected_desc);
526 tlog(2,
"query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
528 if (!equal && best_match !=
nullptr) {
529 *best_match = selected_desc_str;
532 int len = best_match->size();
533 if (len > 2 && best_match->at(len - 1) ==
'0' &&
534 best_match->at(len - 2) ==
' ') {
535 *best_match = best_match->substr(0, len - 2);
538 g_free(selected_desc_str);
539 pango_font_description_free(selected_desc);
540 g_object_unref(selected_font);
541 pango_font_description_free(desc);
543 tlog(4,
"** Font '%s' failed pango_font_description_equal!\n",
548 static bool ShouldIgnoreFontFamilyName(
const char* query) {
549 static const char* kIgnoredFamilyNames[] = {
"Sans",
"Serif",
"Monospace",
551 const char** list = kIgnoredFamilyNames;
552 for (; *list !=
nullptr; ++list) {
553 if (!strcmp(*list, query))
562 if (!available_fonts_.empty()) {
563 return available_fonts_;
565 #ifdef GOOGLE_TESSERACT
566 if (FLAGS_use_only_legacy_fonts) {
568 tprintf(
"Using list of legacy fonts only\n");
569 const int kNumFontLists = 4;
570 for (
int i = 0; i < kNumFontLists; ++i) {
571 for (
int j = 0; kFontlists[i][j] !=
nullptr; ++j) {
572 available_fonts_.push_back(kFontlists[i][j]);
575 return available_fonts_;
579 PangoFontFamily** families =
nullptr;
581 ListFontFamilies(&families, &n_families);
582 for (
int i = 0; i < n_families; ++i) {
583 const char* family_name = pango_font_family_get_name(families[i]);
584 tlog(2,
"Listing family %s\n", family_name);
585 if (ShouldIgnoreFontFamilyName(family_name)) {
590 PangoFontFace** faces =
nullptr;
591 pango_font_family_list_faces(families[i], &faces, &n_faces);
592 for (
int j = 0; j < n_faces; ++j) {
593 PangoFontDescription* desc = pango_font_face_describe(faces[j]);
594 char* desc_str = pango_font_description_to_string(desc);
596 if (!pango_font_face_is_synthesized(faces[j])
598 available_fonts_.push_back(desc_str);
600 pango_font_description_free(desc);
606 std::sort(available_fonts_.begin(), available_fonts_.end());
607 return available_fonts_;
611 static void CharCoverageMapToBitmap(PangoCoverage* coverage,
612 std::vector<bool>* unichar_bitmap) {
613 const int kMinUnicodeValue = 33;
614 const int kMaxUnicodeValue = 0x10FFFF;
615 unichar_bitmap->resize(kMaxUnicodeValue + 1,
false);
617 for (
int i = kMinUnicodeValue; i <= kMaxUnicodeValue; ++i) {
620 = (pango_coverage_get(coverage, i) == PANGO_COVERAGE_EXACT);
633 std::vector<bool>* unichar_bitmap) {
635 PangoFont* font = font_info.ToPangoFont();
636 if (font !=
nullptr) {
638 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
639 CharCoverageMapToBitmap(coverage, unichar_bitmap);
640 pango_coverage_unref(coverage);
641 g_object_unref(font);
647 std::vector<bool>* unichar_bitmap) {
649 PangoCoverage* all_coverage = pango_coverage_new();
650 tlog(1,
"Processing %u fonts\n", static_cast<unsigned>(fonts.size()));
651 for (
unsigned i = 0; i < fonts.size(); ++i) {
653 PangoFont* font = font_info.ToPangoFont();
654 if (font !=
nullptr) {
656 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
658 pango_coverage_max(all_coverage, coverage);
659 pango_coverage_unref(coverage);
660 g_object_unref(font);
663 CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
664 pango_coverage_unref(all_coverage);
673 std::vector<bool>* ch_flags) {
676 tprintf(
"ERROR: Could not parse %s\n", fontname.c_str());
678 PangoFont* font = font_info.ToPangoFont();
679 PangoCoverage* coverage =
nullptr;
680 if (font !=
nullptr) coverage = pango_font_get_coverage(font,
nullptr);
683 ch_flags->reserve(ch_map.size());
687 for (std::unordered_map<char32, int64_t>::const_iterator it = ch_map.begin();
688 it != ch_map.end(); ++it) {
689 bool covered = (coverage !=
nullptr) && (
IsWhitespace(it->first) ||
690 (pango_coverage_get(coverage, it->first)
691 == PANGO_COVERAGE_EXACT));
694 ok_chars += it->second;
697 ch_flags->push_back(covered);
700 pango_coverage_unref(coverage);
701 g_object_unref(font);
708 const std::unordered_map<char32, int64_t>& ch_map,
709 std::vector<std::pair<
const char*, std::vector<bool> > >* fonts) {
710 const double kMinOKFraction = 0.99;
713 const double kMinWeightedFraction = 0.99995;
716 std::vector<std::vector<bool> > font_flags;
717 std::vector<int> font_scores;
718 std::vector<int> raw_scores;
719 int most_ok_chars = 0;
720 int best_raw_score = 0;
722 for (
unsigned i = 0; i < font_names.size(); ++i) {
723 std::vector<bool> ch_flags;
725 int ok_chars =
FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
726 most_ok_chars = std::max(ok_chars, most_ok_chars);
727 best_raw_score = std::max(raw_score, best_raw_score);
729 font_flags.push_back(ch_flags);
730 font_scores.push_back(ok_chars);
731 raw_scores.push_back(raw_score);
742 int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
743 int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
744 int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
747 for (
unsigned i = 0; i < font_names.size(); ++i) {
748 int score = font_scores[i];
749 int raw_score = raw_scores[i];
750 if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
751 score >= override_enough) {
752 fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
753 tlog(1,
"OK font %s = %.4f%%, raw = %d = %.2f%%\n",
754 font_names[i].c_str(),
755 100.0 * score / most_ok_chars,
756 raw_score, 100.0 * raw_score / best_raw_score);
757 font_list += font_names[i];
759 }
else if (score >= least_good_enough || raw_score >= least_raw_enough) {
760 tlog(1,
"Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
761 font_names[i].c_str(),
762 100.0 * score / most_ok_chars,
763 raw_score, 100.0 * raw_score / best_raw_score);
771 std::string* font_name, std::vector<std::string>* graphemes) {
778 const std::vector<std::string>& all_fonts,
779 std::string* font_name, std::vector<std::string>* graphemes) {
780 if (font_name) font_name->clear();
781 if (graphemes) graphemes->clear();
782 for (
unsigned i = 0; i < all_fonts.size(); ++i) {
784 std::vector<std::string> found_graphemes;
786 "Could not parse font desc name %s\n",
787 all_fonts[i].c_str());
789 if (graphemes) graphemes->swap(found_graphemes);
790 if (font_name) *font_name = all_fonts[i];
804 PangoFontMap* font_map = pango_cairo_font_map_get_default();
805 if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap*>(
806 font_map)) == CAIRO_FONT_TYPE_TOY) {
807 printf(
"Using CAIRO_FONT_TYPE_TOY.\n");
808 }
else if (pango_cairo_font_map_get_font_type(
809 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
810 CAIRO_FONT_TYPE_FT) {
811 printf(
"Using CAIRO_FONT_TYPE_FT.\n");
812 }
else if (pango_cairo_font_map_get_font_type(
813 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
814 CAIRO_FONT_TYPE_WIN32) {
815 printf(
"Using CAIRO_FONT_TYPE_WIN32.\n");
816 }
else if (pango_cairo_font_map_get_font_type(
817 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
818 CAIRO_FONT_TYPE_QUARTZ) {
819 printf(
"Using CAIRO_FONT_TYPE_QUARTZ.\n");
820 }
else if (pango_cairo_font_map_get_font_type(
821 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
822 CAIRO_FONT_TYPE_USER) {
823 printf(
"Using CAIRO_FONT_TYPE_USER.\n");
824 }
else if (!font_map) {
825 printf(
"Can not create pango cairo font map!\n");