tesseract  4.0.0-1-g2a2b
tesseract::FontUtils Class Reference

#include <pango_font_info.h>

Static Public Member Functions

static bool IsAvailableFont (const char *font_desc)
 
static bool IsAvailableFont (const char *font_desc, std::string *best_match)
 
static const std::vector< std::string > & ListAvailableFonts ()
 
static bool SelectFont (const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)
 
static bool SelectFont (const char *utf8_word, const int utf8_len, const std::vector< std::string > &all_fonts, std::string *font_name, std::vector< std::string > *graphemes)
 
static void GetAllRenderableCharacters (std::vector< bool > *unichar_bitmap)
 
static void GetAllRenderableCharacters (const std::vector< std::string > &font_names, std::vector< bool > *unichar_bitmap)
 
static void GetAllRenderableCharacters (const std::string &font_name, std::vector< bool > *unichar_bitmap)
 
static std::string BestFonts (const std::unordered_map< char32, int64_t > &ch_map, std::vector< std::pair< const char *, std::vector< bool > > > *font_flag)
 
static int FontScore (const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
 
static void ReInit ()
 
static void PangoFontTypeInfo ()
 

Detailed Description

Definition at line 150 of file pango_font_info.h.

Member Function Documentation

◆ BestFonts()

std::string tesseract::FontUtils::BestFonts ( const std::unordered_map< char32, int64_t > &  ch_map,
std::vector< std::pair< const char *, std::vector< bool > > > *  font_flag 
)
static

Definition at line 677 of file pango_font_info.cpp.

679  {
680  const double kMinOKFraction = 0.99;
681  // Weighted fraction of characters that must be renderable in a font to make
682  // it OK even if the raw count is not good.
683  const double kMinWeightedFraction = 0.99995;
684 
685  fonts->clear();
686  std::vector<std::vector<bool> > font_flags;
687  std::vector<int> font_scores;
688  std::vector<int> raw_scores;
689  int most_ok_chars = 0;
690  int best_raw_score = 0;
691  const std::vector<std::string>& font_names = FontUtils::ListAvailableFonts();
692  for (unsigned i = 0; i < font_names.size(); ++i) {
693  std::vector<bool> ch_flags;
694  int raw_score = 0;
695  int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
696  most_ok_chars = std::max(ok_chars, most_ok_chars);
697  best_raw_score = std::max(raw_score, best_raw_score);
698 
699  font_flags.push_back(ch_flags);
700  font_scores.push_back(ok_chars);
701  raw_scores.push_back(raw_score);
702  }
703 
704  // Now select the fonts with a score above a threshold fraction
705  // of both the raw and weighted best scores. To prevent bogus fonts being
706  // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
707  // BOTH weighted and raw scores.
708  // In low character-count scripts, the issue is more getting enough fonts,
709  // when only 1 or 2 might have all those rare dingbats etc in them, so we
710  // allow a font with a very high weighted (coverage) score
711  // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
712  int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
713  int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
714  int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
715 
716  std::string font_list;
717  for (unsigned i = 0; i < font_names.size(); ++i) {
718  int score = font_scores[i];
719  int raw_score = raw_scores[i];
720  if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
721  score >= override_enough) {
722  fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
723  tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
724  font_names[i].c_str(),
725  100.0 * score / most_ok_chars,
726  raw_score, 100.0 * raw_score / best_raw_score);
727  font_list += font_names[i];
728  font_list += "\n";
729  } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
730  tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
731  font_names[i].c_str(),
732  100.0 * score / most_ok_chars,
733  raw_score, 100.0 * raw_score / best_raw_score);
734  }
735  }
736  return font_list;
737 }
#define tlog(level,...)
Definition: tlog.h:33
static int FontScore(const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
static const std::vector< std::string > & ListAvailableFonts()

◆ FontScore()

int tesseract::FontUtils::FontScore ( const std::unordered_map< char32, int64_t > &  ch_map,
const std::string &  fontname,
int *  raw_score,
std::vector< bool > *  ch_flags 
)
static

Definition at line 641 of file pango_font_info.cpp.

643  {
644  PangoFontInfo font_info;
645  if (!font_info.ParseFontDescriptionName(fontname)) {
646  tprintf("ERROR: Could not parse %s\n", fontname.c_str());
647  }
648  PangoFont* font = font_info.ToPangoFont();
649  PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
650 
651  if (ch_flags) {
652  ch_flags->clear();
653  ch_flags->reserve(ch_map.size());
654  }
655  *raw_score = 0;
656  int ok_chars = 0;
657  for (std::unordered_map<char32, int64_t>::const_iterator it = ch_map.begin();
658  it != ch_map.end(); ++it) {
659  bool covered = (IsWhitespace(it->first) ||
660  (pango_coverage_get(coverage, it->first)
661  == PANGO_COVERAGE_EXACT));
662  if (covered) {
663  ++(*raw_score);
664  ok_chars += it->second;
665  }
666  if (ch_flags) {
667  ch_flags->push_back(covered);
668  }
669  }
670  pango_coverage_unref(coverage);
671  g_object_unref(font);
672  return ok_chars;
673 }
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:223
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

◆ GetAllRenderableCharacters() [1/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( std::vector< bool > *  unichar_bitmap)
static

Definition at line 602 of file pango_font_info.cpp.

602  {
603  const std::vector<std::string>& all_fonts = ListAvailableFonts();
604  return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
605 }
static void GetAllRenderableCharacters(std::vector< bool > *unichar_bitmap)
static const std::vector< std::string > & ListAvailableFonts()

◆ GetAllRenderableCharacters() [2/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( const std::vector< std::string > &  font_names,
std::vector< bool > *  unichar_bitmap 
)
static

Definition at line 619 of file pango_font_info.cpp.

620  {
621  // Form the union of coverage maps from the fonts
622  PangoCoverage* all_coverage = pango_coverage_new();
623  tlog(1, "Processing %u fonts\n", static_cast<unsigned>(fonts.size()));
624  for (unsigned i = 0; i < fonts.size(); ++i) {
625  PangoFontInfo font_info(fonts[i]);
626  PangoFont* font = font_info.ToPangoFont();
627  PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
628  // Mark off characters that any font can render.
629  pango_coverage_max(all_coverage, coverage);
630  pango_coverage_unref(coverage);
631  g_object_unref(font);
632  }
633  CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
634  pango_coverage_unref(all_coverage);
635 }
#define tlog(level,...)
Definition: tlog.h:33

◆ GetAllRenderableCharacters() [3/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( const std::string &  font_name,
std::vector< bool > *  unichar_bitmap 
)
static

Definition at line 608 of file pango_font_info.cpp.

609  {
610  PangoFontInfo font_info(font_name);
611  PangoFont* font = font_info.ToPangoFont();
612  PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
613  CharCoverageMapToBitmap(coverage, unichar_bitmap);
614  pango_coverage_unref(coverage);
615  g_object_unref(font);
616 }

◆ IsAvailableFont() [1/2]

static bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc)
inlinestatic

Definition at line 154 of file pango_font_info.h.

154  {
155  return IsAvailableFont(font_desc, nullptr);
156  }
static bool IsAvailableFont(const char *font_desc)

◆ IsAvailableFont() [2/2]

bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc,
std::string *  best_match 
)
static

Definition at line 478 of file pango_font_info.cpp.

479  {
480  std::string query_desc(input_query_desc);
481  PangoFontDescription *desc = pango_font_description_from_string(
482  query_desc.c_str());
483  PangoFont* selected_font = nullptr;
484  {
486  PangoFontMap* font_map = pango_cairo_font_map_get_default();
487  PangoContext* context = pango_context_new();
488  pango_context_set_font_map(context, font_map);
489  {
491  selected_font = pango_font_map_load_font(font_map, context, desc);
492  }
493  g_object_unref(context);
494  }
495  if (selected_font == nullptr) {
496  pango_font_description_free(desc);
497  return false;
498  }
499  PangoFontDescription* selected_desc = pango_font_describe(selected_font);
500 
501  bool equal = pango_font_description_equal(desc, selected_desc);
502  tlog(3, "query weight = %d \t selected weight =%d\n",
503  pango_font_description_get_weight(desc),
504  pango_font_description_get_weight(selected_desc));
505 
506  char* selected_desc_str = pango_font_description_to_string(selected_desc);
507  tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
508  selected_desc_str);
509  if (!equal && best_match != nullptr) {
510  *best_match = selected_desc_str;
511  // Clip the ending ' 0' if there is one. It seems that, if there is no
512  // point size on the end of the fontname, then Pango always appends ' 0'.
513  int len = best_match->size();
514  if (len > 2 && best_match->at(len - 1) == '0' &&
515  best_match->at(len - 2) == ' ') {
516  *best_match = best_match->substr(0, len - 2);
517  }
518  }
519  g_free(selected_desc_str);
520  pango_font_description_free(selected_desc);
521  g_object_unref(selected_font);
522  pango_font_description_free(desc);
523  return equal;
524 }
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:62
#define tlog(level,...)
Definition: tlog.h:33

◆ ListAvailableFonts()

const std::vector< std::string > & tesseract::FontUtils::ListAvailableFonts ( )
static

Definition at line 539 of file pango_font_info.cpp.

539  {
540  if (!available_fonts_.empty()) {
541  return available_fonts_;
542  }
543 #ifdef GOOGLE_TESSERACT
544  if (FLAGS_use_only_legacy_fonts) {
545  // Restrict view to list of fonts in legacy_fonts.h
546  tprintf("Using list of legacy fonts only\n");
547  const int kNumFontLists = 4;
548  for (int i = 0; i < kNumFontLists; ++i) {
549  for (int j = 0; kFontlists[i][j] != nullptr; ++j) {
550  available_fonts_.push_back(kFontlists[i][j]);
551  }
552  }
553  return available_fonts_;
554  }
555 #endif
556 
557  PangoFontFamily** families = nullptr;
558  int n_families = 0;
559  ListFontFamilies(&families, &n_families);
560  for (int i = 0; i < n_families; ++i) {
561  const char* family_name = pango_font_family_get_name(families[i]);
562  tlog(2, "Listing family %s\n", family_name);
563  if (ShouldIgnoreFontFamilyName(family_name)) {
564  continue;
565  }
566 
567  int n_faces;
568  PangoFontFace** faces = nullptr;
569  pango_font_family_list_faces(families[i], &faces, &n_faces);
570  for (int j = 0; j < n_faces; ++j) {
571  PangoFontDescription* desc = pango_font_face_describe(faces[j]);
572  char* desc_str = pango_font_description_to_string(desc);
573  if (IsAvailableFont(desc_str)) {
574  available_fonts_.push_back(desc_str);
575  }
576  pango_font_description_free(desc);
577  g_free(desc_str);
578  }
579  g_free(faces);
580  }
581  g_free(families);
582  std::sort(available_fonts_.begin(), available_fonts_.end());
583  return available_fonts_;
584 }
#define tlog(level,...)
Definition: tlog.h:33
static bool IsAvailableFont(const char *font_desc)
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

◆ PangoFontTypeInfo()

void tesseract::FontUtils::PangoFontTypeInfo ( )
static

Definition at line 773 of file pango_font_info.cpp.

773  {
774  PangoFontMap* font_map = pango_cairo_font_map_get_default();
775  if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap*>(
776  font_map)) == CAIRO_FONT_TYPE_TOY) {
777  printf("Using CAIRO_FONT_TYPE_TOY.\n");
778  } else if (pango_cairo_font_map_get_font_type(
779  reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
780  CAIRO_FONT_TYPE_FT) {
781  printf("Using CAIRO_FONT_TYPE_FT.\n");
782  } else if (pango_cairo_font_map_get_font_type(
783  reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
784  CAIRO_FONT_TYPE_WIN32) {
785  printf("Using CAIRO_FONT_TYPE_WIN32.\n");
786  } else if (pango_cairo_font_map_get_font_type(
787  reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
788  CAIRO_FONT_TYPE_QUARTZ) {
789  printf("Using CAIRO_FONT_TYPE_QUARTZ.\n");
790  } else if (pango_cairo_font_map_get_font_type(
791  reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
792  CAIRO_FONT_TYPE_USER) {
793  printf("Using CAIRO_FONT_TYPE_USER.\n");
794  } else if (!font_map) {
795  printf("Can not create pango cairo font map!\n");
796  }
797 }

◆ ReInit()

void tesseract::FontUtils::ReInit ( )
static

Definition at line 769 of file pango_font_info.cpp.

769 { available_fonts_.clear(); }

◆ SelectFont() [1/2]

bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
std::string *  font_name,
std::vector< std::string > *  graphemes 
)
static

Definition at line 740 of file pango_font_info.cpp.

741  {
742  return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
743  graphemes);
744 }
static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)
static const std::vector< std::string > & ListAvailableFonts()

◆ SelectFont() [2/2]

bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
const std::vector< std::string > &  all_fonts,
std::string *  font_name,
std::vector< std::string > *  graphemes 
)
static

Definition at line 747 of file pango_font_info.cpp.

749  {
750  if (font_name) font_name->clear();
751  if (graphemes) graphemes->clear();
752  for (unsigned i = 0; i < all_fonts.size(); ++i) {
753  PangoFontInfo font;
754  std::vector<std::string> found_graphemes;
755  ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
756  "Could not parse font desc name %s\n",
757  all_fonts[i].c_str());
758  if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
759  if (graphemes) graphemes->swap(found_graphemes);
760  if (font_name) *font_name = all_fonts[i];
761  return true;
762  }
763  }
764  return false;
765 }
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:90

The documentation for this class was generated from the following files: