tesseract  5.0.0-alpha-619-ge9db
tesseract::PangoFontInfo Class Reference

#include <pango_font_info.h>

Public Types

enum  FontTypeEnum { UNKNOWN, SERIF, SANS_SERIF, DECORATIVE }
 

Public Member Functions

 PangoFontInfo ()
 
 ~PangoFontInfo ()
 
 PangoFontInfo (const std::string &name)
 
bool ParseFontDescriptionName (const std::string &name)
 
bool CoversUTF8Text (const char *utf8_text, int byte_length) const
 
int DropUncoveredChars (std::string *utf8_text) const
 
bool CanRenderString (const char *utf8_word, int len, std::vector< std::string > *graphemes) const
 
bool CanRenderString (const char *utf8_word, int len) const
 
bool GetSpacingProperties (const std::string &utf8_char, int *x_bearing, int *x_advance) const
 
std::string DescriptionName () const
 
const std::stringfamily_name () const
 
int font_size () const
 
FontTypeEnum font_type () const
 
int resolution () const
 
void set_resolution (const int resolution)
 

Static Public Member Functions

static void SoftInitFontConfig ()
 
static void HardInitFontConfig (const std::string &fonts_dir, const std::string &cache_dir)
 

Friends

class FontUtils
 

Detailed Description

Definition at line 39 of file pango_font_info.h.

Member Enumeration Documentation

◆ FontTypeEnum

Enumerator
UNKNOWN 
SERIF 
SANS_SERIF 
DECORATIVE 

Definition at line 41 of file pango_font_info.h.

41  :
42  enum FontTypeEnum {
43  UNKNOWN,
44  SERIF,
45  SANS_SERIF,
46  DECORATIVE,

Constructor & Destructor Documentation

◆ PangoFontInfo() [1/2]

tesseract::PangoFontInfo::PangoFontInfo ( )

Definition at line 74 of file pango_font_info.cpp.

76  : desc_(nullptr), resolution_(kDefaultResolution) {
77  Clear();

◆ ~PangoFontInfo()

tesseract::PangoFontInfo::~PangoFontInfo ( )

Definition at line 97 of file pango_font_info.cpp.

◆ PangoFontInfo() [2/2]

tesseract::PangoFontInfo::PangoFontInfo ( const std::string name)
explicit

Definition at line 79 of file pango_font_info.cpp.

81  : desc_(nullptr), resolution_(kDefaultResolution) {
82  if (!ParseFontDescriptionName(desc)) {
83  tprintf("ERROR: Could not parse %s\n", desc.c_str());
84  Clear();
85  }

Member Function Documentation

◆ CanRenderString() [1/2]

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len 
) const

Definition at line 354 of file pango_font_info.cpp.

355  {
356  std::vector<std::string> graphemes;
357  return CanRenderString(utf8_word, len, &graphemes);

◆ CanRenderString() [2/2]

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len,
std::vector< std::string > *  graphemes 
) const

Definition at line 359 of file pango_font_info.cpp.

361  {
362  if (graphemes) graphemes->clear();
363  // We check for font coverage of the text first, as otherwise Pango could
364  // (undesirably) fall back to another font that does have the required
365  // coverage.
366  if (!CoversUTF8Text(utf8_word, len)) {
367  return false;
368  }
369  // U+25CC dotted circle character that often (but not always) gets rendered
370  // when there is an illegal grapheme sequence.
371  const char32 kDottedCircleGlyph = 9676;
372  bool bad_glyph = false;
373  PangoFontMap* font_map = pango_cairo_font_map_get_default();
374  PangoContext* context = pango_context_new();
375  pango_context_set_font_map(context, font_map);
376  PangoLayout* layout;
377  {
378  // Pango is not releasing the cached layout.
380  layout = pango_layout_new(context);
381  }
382  if (desc_) {
383  pango_layout_set_font_description(layout, desc_);
384  } else {
385  PangoFontDescription *desc = pango_font_description_from_string(
386  DescriptionName().c_str());
387  pango_layout_set_font_description(layout, desc);
388  pango_font_description_free(desc);
389  }
390  pango_layout_set_text(layout, utf8_word, len);
391  PangoLayoutIter* run_iter = nullptr;
392  { // Fontconfig caches some information here that is not freed before exit.
394  run_iter = pango_layout_get_iter(layout);
395  }
396  do {
397  PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
398  if (!run) {
399  tlog(2, "Found end of line nullptr run marker\n");
400  continue;
401  }
402  PangoGlyph dotted_circle_glyph;
403  PangoFont* font = run->item->analysis.font;
404 
405 #ifdef _WIN32
406  PangoGlyphString* glyphs = pango_glyph_string_new();
407  const char s[] = "\xc2\xa7";
408  pango_shape(s, strlen(s), &(run->item->analysis), glyphs);
409  dotted_circle_glyph = glyphs->glyphs[0].glyph;
410 #else // TODO: Do we need separate solution for non win build?
411  dotted_circle_glyph = pango_fc_font_get_glyph(
412  reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
413 #endif
414 
415  if (TLOG_IS_ON(2)) {
416  PangoFontDescription* desc = pango_font_describe(font);
417  char* desc_str = pango_font_description_to_string(desc);
418  tlog(2, "Desc of font in run: %s\n", desc_str);
419  g_free(desc_str);
420  pango_font_description_free(desc);
421  }
422 
423  PangoGlyphItemIter cluster_iter;
424  gboolean have_cluster;
425  for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
426  run, utf8_word);
427  have_cluster && !bad_glyph;
428  have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
429  const int start_byte_index = cluster_iter.start_index;
430  const int end_byte_index = cluster_iter.end_index;
431  int start_glyph_index = cluster_iter.start_glyph;
432  int end_glyph_index = cluster_iter.end_glyph;
433  std::string cluster_text = std::string(utf8_word + start_byte_index,
434  end_byte_index - start_byte_index);
435  if (graphemes) graphemes->push_back(cluster_text);
436  if (IsUTF8Whitespace(cluster_text.c_str())) {
437  tlog(2, "Skipping whitespace\n");
438  continue;
439  }
440  if (TLOG_IS_ON(2)) {
441  printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
442  start_byte_index, end_byte_index,
443  start_glyph_index, end_glyph_index);
444  }
445  for (int i = start_glyph_index,
446  step = (end_glyph_index > start_glyph_index) ? 1 : -1;
447  !bad_glyph && i != end_glyph_index; i+= step) {
448  const bool unknown_glyph =
449  (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
450  PANGO_GLYPH_UNKNOWN_FLAG);
451  const bool illegal_glyph =
452  (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
453  dotted_circle_glyph);
454  bad_glyph = unknown_glyph || illegal_glyph;
455  if (TLOG_IS_ON(2)) {
456  printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
457  bad_glyph ? 1 : 0);
458  }
459  }
460  if (TLOG_IS_ON(2)) {
461  printf(" '%s'\n", cluster_text.c_str());
462  }
463  if (bad_glyph)
464  tlog(1, "Found illegal glyph!\n");
465  }
466 #ifdef _WIN32
467  pango_glyph_string_free(glyphs);
468 #endif
469  } while (!bad_glyph && pango_layout_iter_next_run(run_iter));
470 
471  pango_layout_iter_free(run_iter);
472  g_object_unref(context);
473  g_object_unref(layout);
474  if (bad_glyph && graphemes) graphemes->clear();
475  return !bad_glyph;

◆ CoversUTF8Text()

bool tesseract::PangoFontInfo::CoversUTF8Text ( const char *  utf8_text,
int  byte_length 
) const

Definition at line 216 of file pango_font_info.cpp.

217  {
218  PangoFont* font = ToPangoFont();
219  if (font == nullptr) {
220  // Font not found.
221  return false;
222  }
223  PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
224  for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
225  it != UNICHAR::end(utf8_text, byte_length);
226  ++it) {
227  if (IsWhitespace(*it) || pango_is_zero_width(*it))
228  continue;
229  if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
230  char tmp[5];
231  int len = it.get_utf8(tmp);
232  tmp[len] = '\0';
233  tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
234  pango_coverage_unref(coverage);
235  g_object_unref(font);
236  return false;
237  }
238  }
239  pango_coverage_unref(coverage);
240  g_object_unref(font);
241  return true;

◆ DescriptionName()

std::string tesseract::PangoFontInfo::DescriptionName ( ) const

Definition at line 99 of file pango_font_info.cpp.

100  {
101  if (!desc_) return "";
102  char* desc_str = pango_font_description_to_string(desc_);
103  std::string desc_name(desc_str);
104  g_free(desc_str);
105  return desc_name;

◆ DropUncoveredChars()

int tesseract::PangoFontInfo::DropUncoveredChars ( std::string utf8_text) const

Definition at line 265 of file pango_font_info.cpp.

266  {
267  int num_dropped_chars = 0;
268  PangoFont* font = ToPangoFont();
269  if (font == nullptr) {
270  // Font not found, drop all characters.
271  num_dropped_chars = utf8_text->length();
272  utf8_text->resize(0);
273  return num_dropped_chars;
274  }
275  PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
276  // Maintain two iterators that point into the string. For space efficiency, we
277  // will repeatedly copy one covered UTF8 character from one to the other, and
278  // at the end resize the string to the right length.
279  char* out = const_cast<char*>(utf8_text->c_str());
280  const UNICHAR::const_iterator it_begin =
281  UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
282  const UNICHAR::const_iterator it_end =
283  UNICHAR::end(utf8_text->c_str(), utf8_text->length());
284  for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
285  // Skip bad utf-8.
286  if (!it.is_legal()) {
287  ++it; // One suitable error message will still be issued.
288  continue;
289  }
290  int unicode = *it;
291  int utf8_len = it.utf8_len();
292  const char* utf8_char = it.utf8_data();
293  // Move it forward before the data gets modified.
294  ++it;
295  if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
296  pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
297  if (TLOG_IS_ON(2)) {
298  UNICHAR unichar(unicode);
299  char* str = unichar.utf8_str();
300  tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
301  delete[] str;
302  }
303  ++num_dropped_chars;
304  continue;
305  }
306  my_strnmove(out, utf8_char, utf8_len);
307  out += utf8_len;
308  }
309  pango_coverage_unref(coverage);
310  g_object_unref(font);
311  utf8_text->resize(out - utf8_text->c_str());
312  return num_dropped_chars;

◆ family_name()

const std::string& tesseract::PangoFontInfo::family_name ( ) const
inline

Definition at line 100 of file pango_font_info.h.

◆ font_size()

int tesseract::PangoFontInfo::font_size ( ) const
inline

Definition at line 102 of file pango_font_info.h.

103 { return font_size_; }

◆ font_type()

FontTypeEnum tesseract::PangoFontInfo::font_type ( ) const
inline

Definition at line 103 of file pango_font_info.h.

103 { return font_size_; }

◆ GetSpacingProperties()

bool tesseract::PangoFontInfo::GetSpacingProperties ( const std::string utf8_char,
int *  x_bearing,
int *  x_advance 
) const

Definition at line 314 of file pango_font_info.cpp.

316  {
317  // Convert to equivalent PangoFont structure
318  PangoFont* font = ToPangoFont();
319  // Find the glyph index in the font for the supplied utf8 character.
320  int total_advance = 0;
321  int min_bearing = 0;
322  // Handle multi-unicode strings by reporting the left-most position of the
323  // x-bearing, and right-most position of the x-advance if the string were to
324  // be rendered.
325  const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(),
326  utf8_char.length());
327  const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(),
328  utf8_char.length());
329  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
330  PangoGlyph glyph_index = pango_fc_font_get_glyph(
331  reinterpret_cast<PangoFcFont*>(font), *it);
332  if (!glyph_index) {
333  // Glyph for given unicode character doesn't exist in font.
334  g_object_unref(font);
335  return false;
336  }
337  // Find the ink glyph extents for the glyph
338  PangoRectangle ink_rect, logical_rect;
339  pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
340  pango_extents_to_pixels(&ink_rect, nullptr);
341  pango_extents_to_pixels(&logical_rect, nullptr);
342 
343  int bearing = total_advance + PANGO_LBEARING(ink_rect);
344  if (it == it_begin || bearing < min_bearing) {
345  min_bearing = bearing;
346  }
347  total_advance += PANGO_RBEARING(logical_rect);
348  }
349  *x_bearing = min_bearing;
350  *x_advance = total_advance;
351  g_object_unref(font);
352  return true;

◆ HardInitFontConfig()

void tesseract::PangoFontInfo::HardInitFontConfig ( const std::string fonts_dir,
const std::string cache_dir 
)
static

Definition at line 121 of file pango_font_info.cpp.

123  {
124  if (!cache_dir_.empty()) {
126  File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
127  }
128  const int MAX_FONTCONF_FILESIZE = 1024;
129  char fonts_conf_template[MAX_FONTCONF_FILESIZE];
130  cache_dir_ = cache_dir;
131  fonts_dir_ = fonts_dir;
132  snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
133  "<?xml version=\"1.0\"?>\n"
134  "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
135  "<fontconfig>\n"
136  "<dir>%s</dir>\n"
137  "<cachedir>%s</cachedir>\n"
138  "<config></config>\n"
139  "</fontconfig>",
140  fonts_dir.c_str(), cache_dir_.c_str());
141  std::string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf");
142  File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
143 #ifdef _WIN32
144  std::string env("FONTCONFIG_PATH=");
145  env.append(cache_dir_.c_str());
146  _putenv(env.c_str());
147  _putenv("LANG=en_US.utf8");
148 #else
149  setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true);
150  // Fix the locale so that the reported font names are consistent.
151  setenv("LANG", "en_US.utf8", true);
152 #endif // _WIN32
153 
154  if (FcInitReinitialize() != FcTrue) {
155  tprintf("FcInitiReinitialize failed!!\n");
156  }
158  // Clear Pango's font cache too.
159  pango_cairo_font_map_set_default(nullptr);

◆ ParseFontDescriptionName()

bool tesseract::PangoFontInfo::ParseFontDescriptionName ( const std::string name)

Definition at line 191 of file pango_font_info.cpp.

192  {
193  PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
194  bool success = ParseFontDescription(desc);
195  pango_font_description_free(desc);
196  return success;

◆ resolution()

int tesseract::PangoFontInfo::resolution ( ) const
inline

Definition at line 105 of file pango_font_info.h.

106 { return resolution_; }

◆ set_resolution()

void tesseract::PangoFontInfo::set_resolution ( const int  resolution)
inline

Definition at line 106 of file pango_font_info.h.

106  { return resolution_; }
107  void set_resolution(const int resolution) {
108  resolution_ = resolution;

◆ SoftInitFontConfig()

void tesseract::PangoFontInfo::SoftInitFontConfig ( )
static

Definition at line 111 of file pango_font_info.cpp.

112  {
113  if (fonts_dir_.empty()) {
114  HardInitFontConfig(FLAGS_fonts_dir.c_str(),
115  FLAGS_fontconfig_tmpdir.c_str());
116  }

Friends And Related Function Documentation

◆ FontUtils

friend class FontUtils
friend

Definition at line 111 of file pango_font_info.h.


The documentation for this class was generated from the following files:
string
std::string string
Definition: equationdetect_test.cc:21
tesseract::IsWhitespace
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:239
tesseract::PangoFontInfo::FontTypeEnum
FontTypeEnum
Definition: pango_font_info.h:41
tesseract::UNICHAR::begin
static const_iterator begin(const char *utf8_str, int byte_length)
Definition: unichar.cpp:204
tesseract::PangoFontInfo::set_resolution
void set_resolution(const int resolution)
Definition: pango_font_info.h:106
tesseract::UNICHAR::end
static const_iterator end(const char *utf8_str, int byte_length)
Definition: unichar.cpp:208
tesseract::PangoFontInfo::CanRenderString
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const
Definition: pango_font_info.cpp:359
tesseract::PangoFontInfo::ParseFontDescriptionName
bool ParseFontDescriptionName(const std::string &name)
Definition: pango_font_info.cpp:191
TLOG_IS_ON
#define TLOG_IS_ON(level)
Definition: tlog.h:38
tesseract::PangoFontInfo::HardInitFontConfig
static void HardInitFontConfig(const std::string &fonts_dir, const std::string &cache_dir)
Definition: pango_font_info.cpp:121
tesseract::UNICHAR::const_iterator::utf8_len
int utf8_len() const
Definition: unichar.cpp:190
tesseract::PangoFontInfo::SERIF
Definition: pango_font_info.h:43
tlog
#define tlog(level,...)
Definition: tlog.h:32
tesseract::kDefaultResolution
const int kDefaultResolution
Definition: pango_font_info.cpp:69
tesseract::PangoFontInfo::SANS_SERIF
Definition: pango_font_info.h:44
tesseract::File::JoinPath
static std::string JoinPath(const std::string &prefix, const std::string &suffix)
Definition: fileio.cpp:98
tesseract::PangoFontInfo::DescriptionName
std::string DescriptionName() const
Definition: pango_font_info.cpp:99
tesseract::File::WriteStringToFileOrDie
static void WriteStringToFileOrDie(const std::string &str, const std::string &filename)
Definition: fileio.cpp:68
tesseract::PangoFontInfo::resolution
int resolution() const
Definition: pango_font_info.h:105
tesseract::PangoFontInfo::UNKNOWN
Definition: pango_font_info.h:42
tesseract::IsUTF8Whitespace
bool IsUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:245
tesseract::PangoFontInfo::DECORATIVE
Definition: pango_font_info.h:45
tesseract::FontUtils::ReInit
static void ReInit()
Definition: pango_font_info.cpp:798
char32
signed int char32
Definition: pango_font_info.h:33
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::File::DeleteMatchingFiles
static bool DeleteMatchingFiles(const char *pattern)
Definition: fileio.cpp:133
DISABLE_HEAP_LEAK_CHECK
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:60
tesseract::PangoFontInfo::CoversUTF8Text
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
Definition: pango_font_info.cpp:216