tesseract  5.0.0-alpha-619-ge9db
pango_font_info.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: pango_font_info.h
3  * Description: Font-related objects and helper functions
4  * Author: Ranjith Unnikrishnan
5  * Created: Mon Nov 18 2013
6  *
7  * (C) Copyright 2013, Google Inc.
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  * http://www.apache.org/licenses/LICENSE-2.0
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_
21 #define TESSERACT_TRAINING_PANGO_FONT_INFO_H_
22 
23 #include <string>
24 #include <unordered_map>
25 #include <utility>
26 #include <vector>
27 
28 #include "commandlineflags.h"
29 #include "pango/pango-font.h"
30 #include "pango/pango.h"
31 #include "pango/pangocairo.h"
32 #include "util.h"
33 
34 using char32 = signed int;
35 
36 namespace tesseract {
37 
38 // Data holder class for a font, intended to avoid having to work with Pango or
39 // FontConfig-specific objects directly.
40 class PangoFontInfo {
41  public:
42  enum FontTypeEnum {
46  DECORATIVE,
47  };
48  PangoFontInfo();
50  // Initialize from parsing a font description name, defined as a string of the
51  // format:
52  // "FamilyName [FaceName] [PointSize]"
53  // where a missing FaceName implies the default regular face.
54  // eg. "Arial Italic 12", "Verdana"
55  //
56  // FaceName is a combination of:
57  // [StyleName] [Variant] [Weight] [Stretch]
58  // with (all optional) Pango-defined values of:
59  // StyleName: Oblique, Italic
60  // Variant : Small-Caps
61  // Weight : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy
62  // Stretch : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed,
63  // Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded.
64  explicit PangoFontInfo(const std::string& name);
65  bool ParseFontDescriptionName(const std::string& name);
66 
67  // Returns true if the font have codepoint coverage for the specified text.
68  bool CoversUTF8Text(const char* utf8_text, int byte_length) const;
69  // Modifies string to remove unicode points that are not covered by the
70  // font. Returns the number of characters dropped.
71  int DropUncoveredChars(std::string* utf8_text) const;
72 
73  // Returns true if the entire string can be rendered by the font with full
74  // character coverage and no unknown glyph or dotted-circle glyph
75  // substitutions on encountering a badly formed unicode sequence.
76  // If true, returns individual graphemes. Any whitespace characters in the
77  // original string are also included in the list.
78  bool CanRenderString(const char* utf8_word, int len,
79  std::vector<std::string>* graphemes) const;
80  bool CanRenderString(const char* utf8_word, int len) const;
81 
82  // Retrieves the x_bearing and x_advance for the given utf8 character in the
83  // font. Returns false if the glyph for the character could not be found in
84  // the font.
85  // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html
86  bool GetSpacingProperties(const std::string& utf8_char,
87  int* x_bearing, int* x_advance) const;
88 
89  // If not already initialized, initializes FontConfig by setting its
90  // environment variable and creating a fonts.conf file that points to the
91  // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
92  static void SoftInitFontConfig();
93  // Re-initializes font config, whether or not already initialized.
94  // If already initialized, any existing cache is deleted, just to be sure.
95  static void HardInitFontConfig(const std::string& fonts_dir,
96  const std::string& cache_dir);
97 
98  // Accessors
100  // Font Family name eg. "Arial"
101  const std::string& family_name() const { return family_name_; }
102  // Size in points (1/72"), rounded to the nearest integer.
103  int font_size() const { return font_size_; }
104  FontTypeEnum font_type() const { return font_type_; }
105 
106  int resolution() const { return resolution_; }
107  void set_resolution(const int resolution) {
108  resolution_ = resolution;
109  }
110 
111  private:
112  friend class FontUtils;
113  void Clear();
114  bool ParseFontDescription(const PangoFontDescription* desc);
115  // Returns the PangoFont structure corresponding to the closest available font
116  // in the font map.
117  PangoFont* ToPangoFont() const;
118 
119  // Font properties set automatically from parsing the font description name.
120  std::string family_name_;
121  int font_size_;
122  FontTypeEnum font_type_;
123  // The Pango description that was used to initialize the instance.
124  PangoFontDescription* desc_;
125  // Default output resolution to assume for GetSpacingProperties() and any
126  // other methods that returns pixel values.
127  int resolution_;
128  // Fontconfig operates through an environment variable, so it intrinsically
129  // cannot be thread-friendly, but you can serialize multiple independent
130  // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
131  // These hold the last initialized values set by HardInitFontConfig or
132  // the first call to SoftInitFontConfig.
133  // Directory to be scanned for font files.
134  static std::string fonts_dir_;
135  // Directory to store the cache of font information. (Can be the same as
136  // fonts_dir_)
137  static std::string cache_dir_;
138 
139  private:
141  void operator=(const PangoFontInfo&);
142 };
143 
144 // Static utility methods for querying font availability and font-selection
145 // based on codepoint coverage.
146 class FontUtils {
147  public:
148  // Returns true if the font of the given description name is available in the
149  // target directory specified by --fonts_dir
150  static bool IsAvailableFont(const char* font_desc) {
151  return IsAvailableFont(font_desc, nullptr);
152  }
153  // Returns true if the font of the given description name is available in the
154  // target directory specified by --fonts_dir. If false is returned, and
155  // best_match is not nullptr, the closest matching font is returned there.
156  static bool IsAvailableFont(const char* font_desc, std::string* best_match);
157  // Outputs description names of available fonts.
158  static const std::vector<std::string>& ListAvailableFonts();
159 
160  // Picks font among available fonts that covers and can render the given word,
161  // and returns the font description name and the decomposition of the word to
162  // graphemes. Returns false if no suitable font was found.
163  static bool SelectFont(const char* utf8_word, const int utf8_len,
164  std::string* font_name, std::vector<std::string>* graphemes);
165 
166  // Picks font among all_fonts that covers and can render the given word,
167  // and returns the font description name and the decomposition of the word to
168  // graphemes. Returns false if no suitable font was found.
169  static bool SelectFont(const char* utf8_word, const int utf8_len,
170  const std::vector<std::string>& all_fonts,
171  std::string* font_name, std::vector<std::string>* graphemes);
172 
173  // Returns a bitmask where the value of true at index 'n' implies that unicode
174  // value 'n' is renderable by at least one available font.
175  static void GetAllRenderableCharacters(std::vector<bool>* unichar_bitmap);
176  // Variant of the above function that inspects only the provided font names.
177  static void GetAllRenderableCharacters(const std::vector<std::string>& font_names,
178  std::vector<bool>* unichar_bitmap);
179  static void GetAllRenderableCharacters(const std::string& font_name,
180  std::vector<bool>* unichar_bitmap);
181 
182  // NOTE: The following utilities were written to be backward compatible with
183  // StringRender.
184 
185  // BestFonts returns a font name and a bit vector of the characters it
186  // can render for the fonts that score within some fraction of the best
187  // font on the characters in the given hash map.
188  // In the flags vector, each flag is set according to whether the
189  // corresponding character (in order of iterating ch_map) can be rendered.
190  // The return string is a list of the acceptable fonts that were used.
191  static std::string BestFonts(
192  const std::unordered_map<char32, int64_t>& ch_map,
193  std::vector<std::pair<const char*, std::vector<bool> > >* font_flag);
194 
195  // FontScore returns the weighted renderability score of the given
196  // hash map character table in the given font. The unweighted score
197  // is also returned in raw_score.
198  // The values in the bool vector ch_flags correspond to whether the
199  // corresponding character (in order of iterating ch_map) can be rendered.
200  static int FontScore(const std::unordered_map<char32, int64_t>& ch_map,
201  const std::string& fontname, int* raw_score,
202  std::vector<bool>* ch_flags);
203 
204  // PangoFontInfo is reinitialized, so clear the static list of fonts.
205  static void ReInit();
206  static void PangoFontTypeInfo();
207 
208  private:
209  static std::vector<std::string> available_fonts_; // cache list
210 };
211 } // namespace tesseract
212 
213 #endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_
string
std::string string
Definition: equationdetect_test.cc:21
tesseract::PangoFontInfo::GetSpacingProperties
bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const
Definition: pango_font_info.cpp:314
tesseract::PangoFontInfo::FontTypeEnum
FontTypeEnum
Definition: pango_font_info.h:41
tesseract::PangoFontInfo::set_resolution
void set_resolution(const int resolution)
Definition: pango_font_info.h:106
tesseract::PangoFontInfo::CanRenderString
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const
Definition: pango_font_info.cpp:359
tesseract::PangoFontInfo::ParseFontDescriptionName
bool ParseFontDescriptionName(const std::string &name)
Definition: pango_font_info.cpp:191
tesseract::PangoFontInfo::font_type
FontTypeEnum font_type() const
Definition: pango_font_info.h:103
tesseract::PangoFontInfo::HardInitFontConfig
static void HardInitFontConfig(const std::string &fonts_dir, const std::string &cache_dir)
Definition: pango_font_info.cpp:121
tesseract::PangoFontInfo::family_name
const std::string & family_name() const
Definition: pango_font_info.h:100
tesseract::PangoFontInfo::font_size
int font_size() const
Definition: pango_font_info.h:102
tesseract::PangoFontInfo::SoftInitFontConfig
static void SoftInitFontConfig()
Definition: pango_font_info.cpp:111
tesseract::PangoFontInfo::SERIF
Definition: pango_font_info.h:43
util.h
tesseract::PangoFontInfo::SANS_SERIF
Definition: pango_font_info.h:44
tesseract::PangoFontInfo
Definition: pango_font_info.h:39
tesseract::PangoFontInfo::~PangoFontInfo
~PangoFontInfo()
Definition: pango_font_info.cpp:97
tesstrain_utils.int
int
Definition: tesstrain_utils.py:154
tesseract::PangoFontInfo::DescriptionName
std::string DescriptionName() const
Definition: pango_font_info.cpp:99
tesseract::PangoFontInfo::resolution
int resolution() const
Definition: pango_font_info.h:105
tesseract::PangoFontInfo::DropUncoveredChars
int DropUncoveredChars(std::string *utf8_text) const
Definition: pango_font_info.cpp:265
tesseract
Definition: baseapi.h:65
tesseract::FontUtils::ListAvailableFonts
static const std::vector< std::string > & ListAvailableFonts()
Definition: pango_font_info.cpp:560
tesseract::PangoFontInfo::UNKNOWN
Definition: pango_font_info.h:42
tesseract::FontUtils::BestFonts
static std::string BestFonts(const std::unordered_map< char32, int64_t > &ch_map, std::vector< std::pair< const char *, std::vector< bool > > > *font_flag)
Definition: pango_font_info.cpp:706
tesseract::FontUtils::FontScore
static int FontScore(const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
Definition: pango_font_info.cpp:670
tesseract::PangoFontInfo::PangoFontInfo
PangoFontInfo()
Definition: pango_font_info.cpp:74
tesseract::FontUtils::PangoFontTypeInfo
static void PangoFontTypeInfo()
Definition: pango_font_info.cpp:802
tesseract::FontUtils::SelectFont
static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)
Definition: pango_font_info.cpp:769
tesseract::PangoFontInfo::DECORATIVE
Definition: pango_font_info.h:45
tesseract::FontUtils
Definition: pango_font_info.h:145
tesseract::FontUtils::ReInit
static void ReInit()
Definition: pango_font_info.cpp:798
char32
signed int char32
Definition: pango_font_info.h:33
tesseract::FontUtils::GetAllRenderableCharacters
static void GetAllRenderableCharacters(std::vector< bool > *unichar_bitmap)
Definition: pango_font_info.cpp:625
commandlineflags.h
tesseract::FontUtils::IsAvailableFont
static bool IsAvailableFont(const char *font_desc)
Definition: pango_font_info.h:149
tesseract::PangoFontInfo::CoversUTF8Text
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
Definition: pango_font_info.cpp:216