22 #include "config_auto.h" 25 #if (defined __MINGW32__) || (defined __CYGWIN__) 27 #undef __STRICT_ANSI__ 34 #include <sys/param.h> 45 #include "pango/pango.h" 46 #include "pango/pangocairo.h" 47 #include "pango/pangofc-font.h" 50 "Overrides fontconfig default temporary dir");
52 #ifdef GOOGLE_TESSERACT 53 #include "ocr/trainingdata/typesetting/legacy_fonts.h" 55 "Overrides --fonts_dir and sets the known universe of fonts to" 56 "the list in legacy_fonts.h");
59 "Overrides system default font location");
63 "If empty it use system default. Otherwise it overrides" 64 " system default font location");
73 std::string PangoFontInfo::fonts_dir_;
74 std::string PangoFontInfo::cache_dir_;
84 tprintf(
"ERROR: Could not parse %s\n", desc.c_str());
89 void PangoFontInfo::Clear() {
94 pango_font_description_free(desc_);
102 if (!desc_)
return "";
103 char* desc_str = pango_font_description_to_string(desc_);
104 std::string desc_name(desc_str);
114 if (fonts_dir_.empty()) {
116 FLAGS_fontconfig_tmpdir.c_str());
124 const std::string& cache_dir) {
125 if (!cache_dir_.empty()) {
129 const int MAX_FONTCONF_FILESIZE = 1024;
130 char fonts_conf_template[MAX_FONTCONF_FILESIZE];
131 cache_dir_ = cache_dir;
132 fonts_dir_ = fonts_dir;
133 snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
134 "<?xml version=\"1.0\"?>\n" 135 "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n" 138 "<cachedir>%s</cachedir>\n" 139 "<config></config>\n" 141 fonts_dir.c_str(), cache_dir_.c_str());
142 std::string fonts_conf_file =
File::JoinPath(cache_dir_.c_str(),
"fonts.conf");
145 std::string env(
"FONTCONFIG_PATH=");
146 env.append(cache_dir_.c_str());
147 _putenv(env.c_str());
148 _putenv(
"LANG=en_US.utf8");
150 setenv(
"FONTCONFIG_PATH", cache_dir_.c_str(),
true);
152 setenv(
"LANG",
"en_US.utf8",
true);
155 if (FcInitReinitialize() != FcTrue) {
156 tprintf(
"FcInitiReinitialize failed!!\n");
160 pango_cairo_font_map_set_default(
nullptr);
163 static void ListFontFamilies(PangoFontFamily*** families,
166 PangoFontMap* font_map = pango_cairo_font_map_get_default();
168 pango_font_map_list_families(font_map, families, n_families);
171 bool PangoFontInfo::ParseFontDescription(
const PangoFontDescription *desc) {
173 const char* family = pango_font_description_get_family(desc);
175 char* desc_str = pango_font_description_to_string(desc);
176 tprintf(
"WARNING: Could not parse family name from description: '%s'\n",
181 family_name_ = std::string(family);
182 desc_ = pango_font_description_copy(desc);
185 font_size_ = pango_font_description_get_size(desc);
186 if (!pango_font_description_get_size_is_absolute(desc)) {
187 font_size_ /= PANGO_SCALE;
194 PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
195 bool success = ParseFontDescription(desc);
196 pango_font_description_free(desc);
203 PangoFont* PangoFontInfo::ToPangoFont()
const {
205 PangoFontMap* font_map = pango_cairo_font_map_get_default();
206 PangoContext* context = pango_context_new();
207 pango_cairo_context_set_resolution(context, resolution_);
208 pango_context_set_font_map(context, font_map);
209 PangoFont* font =
nullptr;
212 font = pango_font_map_load_font(font_map, context, desc_);
214 g_object_unref(context);
219 PangoFont* font = ToPangoFont();
220 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
226 if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
228 int len = it.get_utf8(tmp);
230 tlog(2,
"'%s' (U+%x) not covered by font\n", tmp, *it);
234 pango_coverage_unref(coverage);
235 g_object_unref(font);
241 static char* my_strnmove(
char* dest,
const char* src,
size_t n) {
250 }
while (n && src[0]);
262 PangoFont* font = ToPangoFont();
263 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
264 int num_dropped_chars = 0;
268 char* out =
const_cast<char*
>(utf8_text->c_str());
275 if (!it.is_legal()) {
281 const char* utf8_char = it.utf8_data();
284 if (!
IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
285 pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
289 tlog(2,
"'%s' (U+%x) not covered by font\n", str, unicode);
295 my_strnmove(out, utf8_char, utf8_len);
298 pango_coverage_unref(coverage);
299 g_object_unref(font);
300 utf8_text->resize(out - utf8_text->c_str());
301 return num_dropped_chars;
305 int* x_bearing,
int* x_advance)
const {
307 PangoFont* font = ToPangoFont();
309 int total_advance = 0;
319 PangoGlyph glyph_index = pango_fc_font_get_glyph(
320 reinterpret_cast<PangoFcFont*>(font), *it);
323 g_object_unref(font);
327 PangoRectangle ink_rect, logical_rect;
328 pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
329 pango_extents_to_pixels(&ink_rect,
nullptr);
330 pango_extents_to_pixels(&logical_rect,
nullptr);
332 int bearing = total_advance + PANGO_LBEARING(ink_rect);
333 if (it == it_begin || bearing < min_bearing) {
334 min_bearing = bearing;
336 total_advance += PANGO_RBEARING(logical_rect);
338 *x_bearing = min_bearing;
339 *x_advance = total_advance;
340 g_object_unref(font);
345 std::vector<std::string> graphemes;
350 std::vector<std::string>* graphemes)
const {
351 if (graphemes) graphemes->clear();
360 const char32 kDottedCircleGlyph = 9676;
361 bool bad_glyph =
false;
362 PangoFontMap* font_map = pango_cairo_font_map_get_default();
363 PangoContext* context = pango_context_new();
364 pango_context_set_font_map(context, font_map);
369 layout = pango_layout_new(context);
372 pango_layout_set_font_description(layout, desc_);
374 PangoFontDescription *desc = pango_font_description_from_string(
376 pango_layout_set_font_description(layout, desc);
377 pango_font_description_free(desc);
379 pango_layout_set_text(layout, utf8_word, len);
380 PangoLayoutIter* run_iter =
nullptr;
383 run_iter = pango_layout_get_iter(layout);
386 PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
388 tlog(2,
"Found end of line nullptr run marker\n");
391 PangoGlyph dotted_circle_glyph;
392 PangoFont* font = run->item->analysis.font;
394 #ifdef _WIN32 // Fixme! Leaks memory and breaks unittests. 395 PangoGlyphString* glyphs = pango_glyph_string_new();
396 char s[] =
"\xc2\xa7";
397 pango_shape(s,
sizeof(s), &(run->item->analysis), glyphs);
398 dotted_circle_glyph = glyphs->glyphs[0].glyph;
400 dotted_circle_glyph = pango_fc_font_get_glyph(
401 reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
405 PangoFontDescription* desc = pango_font_describe(font);
406 char* desc_str = pango_font_description_to_string(desc);
407 tlog(2,
"Desc of font in run: %s\n", desc_str);
409 pango_font_description_free(desc);
412 PangoGlyphItemIter cluster_iter;
413 gboolean have_cluster;
414 for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
416 have_cluster && !bad_glyph;
417 have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
418 const int start_byte_index = cluster_iter.start_index;
419 const int end_byte_index = cluster_iter.end_index;
420 int start_glyph_index = cluster_iter.start_glyph;
421 int end_glyph_index = cluster_iter.end_glyph;
422 std::string cluster_text = std::string(utf8_word + start_byte_index,
423 end_byte_index - start_byte_index);
424 if (graphemes) graphemes->push_back(cluster_text);
426 tlog(2,
"Skipping whitespace\n");
430 printf(
"start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
431 start_byte_index, end_byte_index,
432 start_glyph_index, end_glyph_index);
434 for (
int i = start_glyph_index,
435 step = (end_glyph_index > start_glyph_index) ? 1 : -1;
436 !bad_glyph && i != end_glyph_index; i+= step) {
437 const bool unknown_glyph =
438 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
439 PANGO_GLYPH_UNKNOWN_FLAG);
440 const bool illegal_glyph =
441 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
442 dotted_circle_glyph);
443 bad_glyph = unknown_glyph || illegal_glyph;
445 printf(
"(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
450 printf(
" '%s'\n", cluster_text.c_str());
453 tlog(1,
"Found illegal glyph!\n");
455 }
while (!bad_glyph && pango_layout_iter_next_run(run_iter));
457 pango_layout_iter_free(run_iter);
458 g_object_unref(context);
459 g_object_unref(layout);
460 if (bad_glyph && graphemes) graphemes->clear();
466 std::vector<std::string> FontUtils::available_fonts_;
479 std::string* best_match) {
480 std::string query_desc(input_query_desc);
481 PangoFontDescription *desc = pango_font_description_from_string(
483 PangoFont* selected_font =
nullptr;
486 PangoFontMap* font_map = pango_cairo_font_map_get_default();
487 PangoContext* context = pango_context_new();
488 pango_context_set_font_map(context, font_map);
491 selected_font = pango_font_map_load_font(font_map, context, desc);
493 g_object_unref(context);
495 if (selected_font ==
nullptr) {
496 pango_font_description_free(desc);
499 PangoFontDescription* selected_desc = pango_font_describe(selected_font);
501 bool equal = pango_font_description_equal(desc, selected_desc);
502 tlog(3,
"query weight = %d \t selected weight =%d\n",
503 pango_font_description_get_weight(desc),
504 pango_font_description_get_weight(selected_desc));
506 char* selected_desc_str = pango_font_description_to_string(selected_desc);
507 tlog(2,
"query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
509 if (!equal && best_match !=
nullptr) {
510 *best_match = selected_desc_str;
513 int len = best_match->size();
514 if (len > 2 && best_match->at(len - 1) ==
'0' &&
515 best_match->at(len - 2) ==
' ') {
516 *best_match = best_match->substr(0, len - 2);
519 g_free(selected_desc_str);
520 pango_font_description_free(selected_desc);
521 g_object_unref(selected_font);
522 pango_font_description_free(desc);
526 static bool ShouldIgnoreFontFamilyName(
const char* query) {
527 static const char* kIgnoredFamilyNames[] = {
"Sans",
"Serif",
"Monospace",
529 const char** list = kIgnoredFamilyNames;
530 for (; *list !=
nullptr; ++list) {
531 if (!strcmp(*list, query))
540 if (!available_fonts_.empty()) {
541 return available_fonts_;
543 #ifdef GOOGLE_TESSERACT 544 if (FLAGS_use_only_legacy_fonts) {
546 tprintf(
"Using list of legacy fonts only\n");
547 const int kNumFontLists = 4;
548 for (
int i = 0; i < kNumFontLists; ++i) {
549 for (
int j = 0; kFontlists[i][j] !=
nullptr; ++j) {
550 available_fonts_.push_back(kFontlists[i][j]);
553 return available_fonts_;
557 PangoFontFamily** families =
nullptr;
559 ListFontFamilies(&families, &n_families);
560 for (
int i = 0; i < n_families; ++i) {
561 const char* family_name = pango_font_family_get_name(families[i]);
562 tlog(2,
"Listing family %s\n", family_name);
563 if (ShouldIgnoreFontFamilyName(family_name)) {
568 PangoFontFace** faces =
nullptr;
569 pango_font_family_list_faces(families[i], &faces, &n_faces);
570 for (
int j = 0; j < n_faces; ++j) {
571 PangoFontDescription* desc = pango_font_face_describe(faces[j]);
572 char* desc_str = pango_font_description_to_string(desc);
574 available_fonts_.push_back(desc_str);
576 pango_font_description_free(desc);
582 std::sort(available_fonts_.begin(), available_fonts_.end());
583 return available_fonts_;
587 static void CharCoverageMapToBitmap(PangoCoverage* coverage,
588 std::vector<bool>* unichar_bitmap) {
589 const int kMinUnicodeValue = 33;
590 const int kMaxUnicodeValue = 0x10FFFF;
591 unichar_bitmap->resize(kMaxUnicodeValue + 1,
false);
593 for (
int i = kMinUnicodeValue; i <= kMaxUnicodeValue; ++i) {
596 = (pango_coverage_get(coverage, i) == PANGO_COVERAGE_EXACT);
609 std::vector<bool>* unichar_bitmap) {
611 PangoFont* font = font_info.ToPangoFont();
612 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
613 CharCoverageMapToBitmap(coverage, unichar_bitmap);
614 pango_coverage_unref(coverage);
615 g_object_unref(font);
620 std::vector<bool>* unichar_bitmap) {
622 PangoCoverage* all_coverage = pango_coverage_new();
623 tlog(1,
"Processing %u fonts\n", static_cast<unsigned>(fonts.size()));
624 for (
unsigned i = 0; i < fonts.size(); ++i) {
626 PangoFont* font = font_info.ToPangoFont();
627 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
629 pango_coverage_max(all_coverage, coverage);
630 pango_coverage_unref(coverage);
631 g_object_unref(font);
633 CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
634 pango_coverage_unref(all_coverage);
642 const std::string& fontname,
int* raw_score,
643 std::vector<bool>* ch_flags) {
646 tprintf(
"ERROR: Could not parse %s\n", fontname.c_str());
648 PangoFont* font = font_info.ToPangoFont();
649 PangoCoverage* coverage = pango_font_get_coverage(font,
nullptr);
653 ch_flags->reserve(ch_map.size());
657 for (std::unordered_map<char32, int64_t>::const_iterator it = ch_map.begin();
658 it != ch_map.end(); ++it) {
660 (pango_coverage_get(coverage, it->first)
661 == PANGO_COVERAGE_EXACT));
664 ok_chars += it->second;
667 ch_flags->push_back(covered);
670 pango_coverage_unref(coverage);
671 g_object_unref(font);
678 const std::unordered_map<char32, int64_t>& ch_map,
679 std::vector<std::pair<
const char*, std::vector<bool> > >* fonts) {
680 const double kMinOKFraction = 0.99;
683 const double kMinWeightedFraction = 0.99995;
686 std::vector<std::vector<bool> > font_flags;
687 std::vector<int> font_scores;
688 std::vector<int> raw_scores;
689 int most_ok_chars = 0;
690 int best_raw_score = 0;
692 for (
unsigned i = 0; i < font_names.size(); ++i) {
693 std::vector<bool> ch_flags;
695 int ok_chars =
FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
696 most_ok_chars = std::max(ok_chars, most_ok_chars);
697 best_raw_score = std::max(raw_score, best_raw_score);
699 font_flags.push_back(ch_flags);
700 font_scores.push_back(ok_chars);
701 raw_scores.push_back(raw_score);
712 int least_good_enough =
static_cast<int>(most_ok_chars * kMinOKFraction);
713 int least_raw_enough =
static_cast<int>(best_raw_score * kMinOKFraction);
714 int override_enough =
static_cast<int>(most_ok_chars * kMinWeightedFraction);
716 std::string font_list;
717 for (
unsigned i = 0; i < font_names.size(); ++i) {
718 int score = font_scores[i];
719 int raw_score = raw_scores[i];
720 if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
721 score >= override_enough) {
722 fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
723 tlog(1,
"OK font %s = %.4f%%, raw = %d = %.2f%%\n",
724 font_names[i].c_str(),
725 100.0 * score / most_ok_chars,
726 raw_score, 100.0 * raw_score / best_raw_score);
727 font_list += font_names[i];
729 }
else if (score >= least_good_enough || raw_score >= least_raw_enough) {
730 tlog(1,
"Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
731 font_names[i].c_str(),
732 100.0 * score / most_ok_chars,
733 raw_score, 100.0 * raw_score / best_raw_score);
741 std::string* font_name, std::vector<std::string>* graphemes) {
748 const std::vector<std::string>& all_fonts,
749 std::string* font_name, std::vector<std::string>* graphemes) {
750 if (font_name) font_name->clear();
751 if (graphemes) graphemes->clear();
752 for (
unsigned i = 0; i < all_fonts.size(); ++i) {
754 std::vector<std::string> found_graphemes;
756 "Could not parse font desc name %s\n",
757 all_fonts[i].c_str());
759 if (graphemes) graphemes->swap(found_graphemes);
760 if (font_name) *font_name = all_fonts[i];
774 PangoFontMap* font_map = pango_cairo_font_map_get_default();
775 if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap*>(
776 font_map)) == CAIRO_FONT_TYPE_TOY) {
777 printf(
"Using CAIRO_FONT_TYPE_TOY.\n");
778 }
else if (pango_cairo_font_map_get_font_type(
779 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
780 CAIRO_FONT_TYPE_FT) {
781 printf(
"Using CAIRO_FONT_TYPE_FT.\n");
782 }
else if (pango_cairo_font_map_get_font_type(
783 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
784 CAIRO_FONT_TYPE_WIN32) {
785 printf(
"Using CAIRO_FONT_TYPE_WIN32.\n");
786 }
else if (pango_cairo_font_map_get_font_type(
787 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
788 CAIRO_FONT_TYPE_QUARTZ) {
789 printf(
"Using CAIRO_FONT_TYPE_QUARTZ.\n");
790 }
else if (pango_cairo_font_map_get_font_type(
791 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
792 CAIRO_FONT_TYPE_USER) {
793 printf(
"Using CAIRO_FONT_TYPE_USER.\n");
794 }
else if (!font_map) {
795 printf(
"Can not create pango cairo font map!\n");
static bool DeleteMatchingFiles(const char *pattern)
static void SoftInitFontConfig()
#define DISABLE_HEAP_LEAK_CHECK
static void PangoFontTypeInfo()
bool IsInterchangeValid(const char32 ch)
std::string DescriptionName() const
bool IsWhitespace(const char32 ch)
const int kDefaultResolution
bool IsUTF8Whitespace(const char *text)
#define TLOG_IS_ON(level)
static const_iterator begin(const char *utf8_str, const int byte_length)
static bool IsAvailableFont(const char *font_desc)
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const
bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const
#define BOOL_PARAM_FLAG(name, val, comment)
STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp", "Overrides fontconfig default temporary dir")
static std::string BestFonts(const std::unordered_map< char32, int64_t > &ch_map, std::vector< std::pair< const char *, std::vector< bool > > > *font_flag)
static int FontScore(const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
static void WriteStringToFileOrDie(const std::string &str, const std::string &filename)
static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
DLLSYM void tprintf(const char *format,...)
#define ASSERT_HOST_MSG(x,...)
static const_iterator end(const char *utf8_str, const int byte_length)
static std::string JoinPath(const std::string &prefix, const std::string &suffix)
bool ParseFontDescriptionName(const std::string &name)
static void HardInitFontConfig(const std::string &fonts_dir, const std::string &cache_dir)
static void GetAllRenderableCharacters(std::vector< bool > *unichar_bitmap)
static const std::vector< std::string > & ListAvailableFonts()
int DropUncoveredChars(std::string *utf8_text) const