tesseract  4.0.0-1-g2a2b
text2image.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: text2image.cpp
3  * Description: Program to generate OCR training pages. Given a text file it
4  * outputs an image with a given font and degradation.
5  *
6  * Note that since the results depend on the fonts available on
7  * your system, running the code on a different machine, or
8  * different OS, or even at a different time on the same machine,
9  * may produce different fonts even if --font is given explicitly.
10  * To see names of available fonts, use --list_available_fonts with
11  * the appropriate --fonts_dir path.
12  * Specifying --use_only_legacy_fonts will restrict the available
13  * fonts to those listed in legacy_fonts.h
14  *
15  * Authors: Ranjith Unnikrishnan, Ray Smith
16  * Created: Tue Nov 19 2013
17  *
18  * (C) Copyright 2013, Google Inc.
19  * Licensed under the Apache License, Version 2.0 (the "License");
20  * you may not use this file except in compliance with the License.
21  * You may obtain a copy of the License at
22  * http://www.apache.org/licenses/LICENSE-2.0
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an "AS IS" BASIS,
25  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  *
29  **********************************************************************/
30 
31 #include <cstdlib>
32 #include <cstring>
33 #include <algorithm>
34 #include <iostream>
35 #include <map>
36 #include <string>
37 #include <utility>
38 #include <vector>
39 
40 #include "allheaders.h" // from leptonica
41 #include "boxchar.h"
42 #include "commandlineflags.h"
43 #include "commontraining.h" // CheckSharedLibraryVersion
44 #include "degradeimage.h"
45 #include "errcode.h"
46 #include "fileio.h"
47 #include "helpers.h"
48 #include "normstrngs.h"
49 #include "stringrenderer.h"
50 #include "tlog.h"
51 #include "unicharset.h"
52 #include "util.h"
53 #ifdef _MSC_VER
54 # define putenv(s) _putenv(s)
55 #endif
56 
57 // A number with which to initialize the random number generator.
58 const int kRandomSeed = 0x18273645;
59 
60 // The text input file.
61 STRING_PARAM_FLAG(text, "", "File name of text input to process");
62 
63 // The text output file.
64 STRING_PARAM_FLAG(outputbase, "", "Basename for output image/box file");
65 
66 // Degrade the rendered image to mimic scanner quality.
67 BOOL_PARAM_FLAG(degrade_image, true,
68  "Degrade rendered image with speckle noise, dilation/erosion "
69  "and rotation");
70 
71 // Rotate the rendered image to have more realistic glyph borders
72 BOOL_PARAM_FLAG(rotate_image, true, "Rotate the image in a random way.");
73 
74 // Degradation to apply to the image.
75 INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier");
76 
77 // Output image resolution.
78 INT_PARAM_FLAG(resolution, 300, "Pixels per inch");
79 
80 // Width of output image (in pixels).
81 INT_PARAM_FLAG(xsize, 3600, "Width of output image");
82 
83 // Max height of output image (in pixels).
84 INT_PARAM_FLAG(ysize, 4800, "Height of output image");
85 
86 // Max number of pages to produce.
87 INT_PARAM_FLAG(max_pages, 0, "Maximum number of pages to output (0=unlimited)");
88 
89 // Margin around text (in pixels).
90 INT_PARAM_FLAG(margin, 100, "Margin round edges of image");
91 
92 // Size of text (in points).
93 INT_PARAM_FLAG(ptsize, 12, "Size of printed text");
94 
95 // Inter-character space (in ems).
96 DOUBLE_PARAM_FLAG(char_spacing, 0, "Inter-character space in ems");
97 
98 // Sets the probability (value in [0, 1]) of starting to render a word with an
99 // underline. Words are assumed to be space-delimited.
100 DOUBLE_PARAM_FLAG(underline_start_prob, 0,
101  "Fraction of words to underline (value in [0,1])");
102 // Set the probability (value in [0, 1]) of continuing a started underline to
103 // the next word.
104 DOUBLE_PARAM_FLAG(underline_continuation_prob, 0,
105  "Fraction of words to underline (value in [0,1])");
106 
107 // Inter-line space (in pixels).
108 INT_PARAM_FLAG(leading, 12, "Inter-line space (in pixels)");
109 
110 // Layout and glyph orientation on rendering.
111 STRING_PARAM_FLAG(writing_mode, "horizontal",
112  "Specify one of the following writing"
113  " modes.\n"
114  "'horizontal' : Render regular horizontal text. (default)\n"
115  "'vertical' : Render vertical text. Glyph orientation is"
116  " selected by Pango.\n"
117  "'vertical-upright' : Render vertical text. Glyph "
118  " orientation is set to be upright.");
119 
120 INT_PARAM_FLAG(box_padding, 0, "Padding around produced bounding boxes");
121 
122 BOOL_PARAM_FLAG(strip_unrenderable_words, true,
123  "Remove unrenderable words from source text");
124 
125 // Font name.
126 STRING_PARAM_FLAG(font, "Arial", "Font description name to use");
127 
128 BOOL_PARAM_FLAG(ligatures, false,
129  "Rebuild and render ligatures");
130 
131 BOOL_PARAM_FLAG(find_fonts, false,
132  "Search for all fonts that can render the text");
133 BOOL_PARAM_FLAG(render_per_font, true,
134  "If find_fonts==true, render each font to its own image. "
135  "Image filenames are of the form output_name.font_name.tif");
136 DOUBLE_PARAM_FLAG(min_coverage, 1.0,
137  "If find_fonts==true, the minimum coverage the font has of "
138  "the characters in the text file to include it, between "
139  "0 and 1.");
140 
141 BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit.");
142 
143 BOOL_PARAM_FLAG(render_ngrams, false, "Put each space-separated entity from the"
144  " input file into one bounding box. The ngrams in the input"
145  " file will be randomly permuted before rendering (so that"
146  " there is sufficient variety of characters on each line).");
147 
148 BOOL_PARAM_FLAG(output_word_boxes, false,
149  "Output word bounding boxes instead of character boxes. "
150  "This is used for Cube training, and implied by "
151  "--render_ngrams.");
152 
153 STRING_PARAM_FLAG(unicharset_file, "",
154  "File with characters in the unicharset. If --render_ngrams"
155  " is true and --unicharset_file is specified, ngrams with"
156  " characters that are not in unicharset will be omitted");
157 
158 BOOL_PARAM_FLAG(bidirectional_rotation, false,
159  "Rotate the generated characters both ways.");
160 
161 BOOL_PARAM_FLAG(only_extract_font_properties, false,
162  "Assumes that the input file contains a list of ngrams. Renders"
163  " each ngram, extracts spacing properties and records them in"
164  " output_base/[font_name].fontinfo file.");
165 
166 // Use these flags to output zero-padded, square individual character images
167 BOOL_PARAM_FLAG(output_individual_glyph_images, false,
168  "If true also outputs individual character images");
169 INT_PARAM_FLAG(glyph_resized_size, 0,
170  "Each glyph is square with this side length in pixels");
171 INT_PARAM_FLAG(glyph_num_border_pixels_to_pad, 0,
172  "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad");
173 
174 namespace tesseract {
175 
178  SpacingProperties(int b, int a) : x_gap_before(b), x_gap_after(a) {}
179  // These values are obtained from FT_Glyph_Metrics struct
180  // used by the FreeType font engine.
181  int x_gap_before; // horizontal x bearing
182  int x_gap_after; // horizontal advance - x_gap_before - width
183  std::map<std::string, int> kerned_x_gaps;
184 };
185 
186 static bool IsWhitespaceBox(const BoxChar* boxchar) {
187  return (boxchar->box() == nullptr ||
188  SpanUTF8Whitespace(boxchar->ch().c_str()));
189 }
190 
191 static std::string StringReplace(const std::string& in,
192  const std::string& oldsub, const std::string& newsub) {
193  std::string out;
194  size_t start_pos = 0, pos;
195  while ((pos = in.find(oldsub, start_pos)) != std::string::npos) {
196  out.append(in.data() + start_pos, pos - start_pos);
197  out.append(newsub.data(), newsub.length());
198  start_pos = pos + oldsub.length();
199  }
200  out.append(in.data() + start_pos, in.length() - start_pos);
201  return out;
202 }
203 
204 // Assumes that each word (whitespace-separated entity) in text is a bigram.
205 // Renders the bigrams and calls FontInfo::GetSpacingProperties() to
206 // obtain spacing information. Produces the output .fontinfo file with a line
207 // per unichar of the form:
208 // unichar space_before space_after kerned1 kerned_space1 kerned2 ...
209 // Fox example, if unichar "A" has spacing of 0 pixels before and -1 pixels
210 // after, is kerned with "V" resulting in spacing of "AV" to be -7 and kerned
211 // with "T", such that "AT" has spacing of -5, the entry/line for unichar "A"
212 // in .fontinfo file will be:
213 // A 0 -1 T -5 V -7
214 static void ExtractFontProperties(const std::string &utf8_text,
215  StringRenderer *render,
216  const std::string &output_base) {
217  std::map<std::string, SpacingProperties> spacing_map;
218  std::map<std::string, SpacingProperties>::iterator spacing_map_it0;
219  std::map<std::string, SpacingProperties>::iterator spacing_map_it1;
220  int x_bearing, x_advance;
221  int len = utf8_text.length();
222  int offset = 0;
223  const char* text = utf8_text.c_str();
224  while (offset < len) {
225  offset +=
226  render->RenderToImage(text + offset, strlen(text + offset), nullptr);
227  const std::vector<BoxChar*> &boxes = render->GetBoxes();
228 
229  // If the page break split a bigram, correct the offset so we try the bigram
230  // on the next iteration.
231  if (boxes.size() > 2 && !IsWhitespaceBox(boxes[boxes.size() - 1]) &&
232  IsWhitespaceBox(boxes[boxes.size() - 2])) {
233  if (boxes.size() > 3) {
234  tprintf("WARNING: Adjusting to bad page break after '%s%s'\n",
235  boxes[boxes.size() - 4]->ch().c_str(),
236  boxes[boxes.size() - 3]->ch().c_str());
237  }
238  offset -= boxes[boxes.size() - 1]->ch().size();
239  }
240 
241  for (size_t b = 0; b < boxes.size(); b += 2) {
242  while (b < boxes.size() && IsWhitespaceBox(boxes[b])) ++b;
243  if (b + 1 >= boxes.size()) break;
244  const std::string &ch0 = boxes[b]->ch();
245  // We encountered a ligature. This happens in at least two scenarios:
246  // One is when the rendered bigram forms a grapheme cluster (eg. the
247  // second character in the bigram is a combining vowel), in which case we
248  // correctly output only one bounding box.
249  // A second far less frequent case is when caused some fonts like 'DejaVu
250  // Sans Ultra-Light' force Pango to render a ligatured character even if
251  // the input consists of the separated characters. NOTE(ranjith): As per
252  // behdad@ this is not currently controllable at the level of the Pango
253  // API.
254  // The most frequent of all is a single character "word" made by the CJK
255  // segmenter.
256  // Safeguard against these cases here by just skipping the bigram.
257  if (IsWhitespaceBox(boxes[b+1])) {
258  continue;
259  }
260  int xgap = (boxes[b+1]->box()->x -
261  (boxes[b]->box()->x + boxes[b]->box()->w));
262  spacing_map_it0 = spacing_map.find(ch0);
263  int ok_count = 0;
264  if (spacing_map_it0 == spacing_map.end() &&
265  render->font().GetSpacingProperties(ch0, &x_bearing, &x_advance)) {
266  spacing_map[ch0] = SpacingProperties(
267  x_bearing, x_advance - x_bearing - boxes[b]->box()->w);
268  spacing_map_it0 = spacing_map.find(ch0);
269  ++ok_count;
270  }
271  const std::string &ch1 = boxes[b+1]->ch();
272  tlog(3, "%s%s\n", ch0.c_str(), ch1.c_str());
273  spacing_map_it1 = spacing_map.find(ch1);
274  if (spacing_map_it1 == spacing_map.end() &&
275  render->font().GetSpacingProperties(ch1, &x_bearing, &x_advance)) {
276  spacing_map[ch1] = SpacingProperties(
277  x_bearing, x_advance - x_bearing - boxes[b+1]->box()->w);
278  spacing_map_it1 = spacing_map.find(ch1);
279  ++ok_count;
280  }
281  if (ok_count == 2 && xgap != (spacing_map_it0->second.x_gap_after +
282  spacing_map_it1->second.x_gap_before)) {
283  spacing_map_it0->second.kerned_x_gaps[ch1] = xgap;
284  }
285  }
286  render->ClearBoxes();
287  }
288  std::string output_string;
289  const int kBufSize = 1024;
290  char buf[kBufSize];
291  snprintf(buf, kBufSize, "%d\n", static_cast<int>(spacing_map.size()));
292  output_string.append(buf);
293  std::map<std::string, SpacingProperties>::const_iterator spacing_map_it;
294  for (spacing_map_it = spacing_map.begin();
295  spacing_map_it != spacing_map.end(); ++spacing_map_it) {
296  snprintf(buf, kBufSize,
297  "%s %d %d %d", spacing_map_it->first.c_str(),
298  spacing_map_it->second.x_gap_before,
299  spacing_map_it->second.x_gap_after,
300  static_cast<int>(spacing_map_it->second.kerned_x_gaps.size()));
301  output_string.append(buf);
302  std::map<std::string, int>::const_iterator kern_it;
303  for (kern_it = spacing_map_it->second.kerned_x_gaps.begin();
304  kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) {
305  snprintf(buf, kBufSize,
306  " %s %d", kern_it->first.c_str(), kern_it->second);
307  output_string.append(buf);
308  }
309  output_string.append("\n");
310  }
311  File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
312 }
313 
314 static bool MakeIndividualGlyphs(Pix* pix, const std::vector<BoxChar*>& vbox,
315  const int input_tiff_page) {
316  // If checks fail, return false without exiting text2image
317  if (!pix) {
318  tprintf("ERROR: MakeIndividualGlyphs(): Input Pix* is nullptr\n");
319  return false;
320  } else if (FLAGS_glyph_resized_size <= 0) {
321  tprintf("ERROR: --glyph_resized_size must be positive\n");
322  return false;
323  } else if (FLAGS_glyph_num_border_pixels_to_pad < 0) {
324  tprintf("ERROR: --glyph_num_border_pixels_to_pad must be 0 or positive\n");
325  return false;
326  }
327 
328  const int n_boxes = vbox.size();
329  int n_boxes_saved = 0;
330  int current_tiff_page = 0;
331  int y_previous = 0;
332  static int glyph_count = 0;
333  for (int i = 0; i < n_boxes; i++) {
334  // Get one bounding box
335  Box* b = vbox[i]->mutable_box();
336  if (!b) continue;
337  const int x = b->x;
338  const int y = b->y;
339  const int w = b->w;
340  const int h = b->h;
341  // Check present tiff page (for multipage tiff)
342  if (y < y_previous-pixGetHeight(pix)/10) {
343  tprintf("ERROR: Wrap-around encountered, at i=%d\n", i);
344  current_tiff_page++;
345  }
346  if (current_tiff_page < input_tiff_page) continue;
347  else if (current_tiff_page > input_tiff_page) break;
348  // Check box validity
349  if (x < 0 || y < 0 ||
350  (x+w-1) >= pixGetWidth(pix) ||
351  (y+h-1) >= pixGetHeight(pix)) {
352  tprintf("ERROR: MakeIndividualGlyphs(): Index out of range, at i=%d"
353  " (x=%d, y=%d, w=%d, h=%d\n)", i, x, y, w, h);
354  continue;
355  } else if (w < FLAGS_glyph_num_border_pixels_to_pad &&
356  h < FLAGS_glyph_num_border_pixels_to_pad) {
357  tprintf("ERROR: Input image too small to be a character, at i=%d\n", i);
358  continue;
359  }
360  // Crop the boxed character
361  Pix* pix_glyph = pixClipRectangle(pix, b, nullptr);
362  if (!pix_glyph) {
363  tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
364  continue;
365  }
366  // Resize to square
367  Pix* pix_glyph_sq = pixScaleToSize(pix_glyph,
368  FLAGS_glyph_resized_size,
369  FLAGS_glyph_resized_size);
370  if (!pix_glyph_sq) {
371  tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i);
372  continue;
373  }
374  // Zero-pad
375  Pix* pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq,
376  FLAGS_glyph_num_border_pixels_to_pad,
377  0);
378  if (!pix_glyph_sq_pad) {
379  tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n",
380  i);
381  continue;
382  }
383  // Write out
384  Pix* pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false);
385  char filename[1024];
386  snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(),
387  glyph_count++);
388  if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) {
389  tprintf("ERROR: MakeIndividualGlyphs(): Failed to write JPEG to %s,"
390  " at i=%d\n", filename, i);
391  continue;
392  }
393 
394  pixDestroy(&pix_glyph);
395  pixDestroy(&pix_glyph_sq);
396  pixDestroy(&pix_glyph_sq_pad);
397  pixDestroy(&pix_glyph_sq_pad_8);
398  n_boxes_saved++;
399  y_previous = y;
400  }
401  if (n_boxes_saved == 0) {
402  return false;
403  } else {
404  tprintf("Total number of characters saved = %d\n", n_boxes_saved);
405  return true;
406  }
407 }
408 } // namespace tesseract
409 
411 using tesseract::ExtractFontProperties;
412 using tesseract::File;
417 
418 static int Main() {
419  if (FLAGS_list_available_fonts) {
420  const std::vector<std::string>& all_fonts = FontUtils::ListAvailableFonts();
421  for (unsigned int i = 0; i < all_fonts.size(); ++i) {
422  // Remove trailing comma: pango-font-description-to-string adds a comma
423  // to some fonts.
424  // See https://github.com/tesseract-ocr/tesseract/issues/408
425  std::string font_name(all_fonts[i].c_str());
426  if (font_name.back() == ',')
427  font_name.pop_back();
428  printf("%3u: %s\n", i, font_name.c_str());
429  ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
430  "Font %s is unrecognized.\n", all_fonts[i].c_str());
431  }
432  return EXIT_SUCCESS;
433  }
434 
435  // Check validity of input flags.
436  if (FLAGS_text.empty()) {
437  tprintf("'--text' option is missing!\n");
438  exit(1);
439  }
440  if (FLAGS_outputbase.empty()) {
441  tprintf("'--outputbase' option is missing!\n");
442  exit(1);
443  }
444  if (!FLAGS_unicharset_file.empty() && FLAGS_render_ngrams) {
445  tprintf("Use '--unicharset_file' only if '--render_ngrams' is set.\n");
446  exit(1);
447  }
448 
449  std::string font_name = FLAGS_font.c_str();
450  if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(font_name.c_str())) {
451  font_name += ',';
452  std::string pango_name;
453  if (!FontUtils::IsAvailableFont(font_name.c_str(), &pango_name)) {
454  tprintf("Could not find font named '%s'.\n", FLAGS_font.c_str());
455  if (!pango_name.empty()) {
456  tprintf("Pango suggested font '%s'.\n", pango_name.c_str());
457  }
458  tprintf("Please correct --font arg.\n");
459  exit(1);
460  }
461  }
462 
463  if (FLAGS_render_ngrams)
464  FLAGS_output_word_boxes = true;
465 
466  char font_desc_name[1024];
467  snprintf(font_desc_name, 1024, "%s %d", font_name.c_str(),
468  static_cast<int>(FLAGS_ptsize));
469 
470  StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize);
471  render.set_add_ligatures(FLAGS_ligatures);
472  render.set_leading(FLAGS_leading);
473  render.set_resolution(FLAGS_resolution);
474  render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize);
475  render.set_h_margin(FLAGS_margin);
476  render.set_v_margin(FLAGS_margin);
477  render.set_output_word_boxes(FLAGS_output_word_boxes);
478  render.set_box_padding(FLAGS_box_padding);
479  render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words);
480  render.set_underline_start_prob(FLAGS_underline_start_prob);
481  render.set_underline_continuation_prob(FLAGS_underline_continuation_prob);
482 
483  // Set text rendering orientation and their forms.
484  if (FLAGS_writing_mode == "horizontal") {
485  // Render regular horizontal text (default).
486  render.set_vertical_text(false);
487  render.set_gravity_hint_strong(false);
488  render.set_render_fullwidth_latin(false);
489  } else if (FLAGS_writing_mode == "vertical") {
490  // Render vertical text. Glyph orientation is selected by Pango.
491  render.set_vertical_text(true);
492  render.set_gravity_hint_strong(false);
493  render.set_render_fullwidth_latin(false);
494  } else if (FLAGS_writing_mode == "vertical-upright") {
495  // Render vertical text. Glyph orientation is set to be upright.
496  // Also Basic Latin characters are converted to their fullwidth forms
497  // on rendering, since fullwidth Latin characters are well designed to fit
498  // vertical text lines, while .box files store halfwidth Basic Latin
499  // unichars.
500  render.set_vertical_text(true);
501  render.set_gravity_hint_strong(true);
502  render.set_render_fullwidth_latin(true);
503  } else {
504  tprintf("Invalid writing mode: %s\n", FLAGS_writing_mode.c_str());
505  exit(1);
506  }
507 
508  std::string src_utf8;
509  // This c_str is NOT redundant!
510  if (!File::ReadFileToString(FLAGS_text.c_str(), &src_utf8)) {
511  tprintf("Failed to read file: %s\n", FLAGS_text.c_str());
512  exit(1);
513  }
514 
515  // Remove the unicode mark if present.
516  if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) {
517  src_utf8.erase(0, 3);
518  }
519  tlog(1, "Render string of size %d\n", src_utf8.length());
520 
521  if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
522  // Try to preserve behavior of old text2image by expanding inter-word
523  // spaces by a factor of 4.
524  const std::string kSeparator = FLAGS_render_ngrams ? " " : " ";
525  // Also restrict the number of characters per line to try and avoid
526  // line-breaking in the middle of words like "-A", "R$" etc. which are
527  // otherwise allowed by the standard unicode line-breaking rules.
528  const unsigned int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
529  std::string rand_utf8;
530  UNICHARSET unicharset;
531  if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
532  !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
533  tprintf("Failed to load unicharset from file %s\n",
534  FLAGS_unicharset_file.c_str());
535  exit(1);
536  }
537 
538  // If we are rendering ngrams that will be OCRed later, shuffle them so that
539  // tesseract does not have difficulties finding correct baseline, word
540  // spaces, etc.
541  const char *str8 = src_utf8.c_str();
542  int len = src_utf8.length();
543  int step;
544  std::vector<std::pair<int, int> > offsets;
545  int offset = SpanUTF8Whitespace(str8);
546  while (offset < len) {
547  step = SpanUTF8NotWhitespace(str8 + offset);
548  offsets.push_back(std::make_pair(offset, step));
549  offset += step;
550  offset += SpanUTF8Whitespace(str8 + offset);
551  }
552  if (FLAGS_render_ngrams)
553  std::random_shuffle(offsets.begin(), offsets.end());
554 
555  for (size_t i = 0, line = 1; i < offsets.size(); ++i) {
556  const char *curr_pos = str8 + offsets[i].first;
557  int ngram_len = offsets[i].second;
558  // Skip words that contain characters not in found in unicharset.
559  std::string cleaned = UNICHARSET::CleanupString(curr_pos, ngram_len);
560  if (!FLAGS_unicharset_file.empty() &&
561  !unicharset.encodable_string(cleaned.c_str(), nullptr)) {
562  continue;
563  }
564  rand_utf8.append(curr_pos, ngram_len);
565  if (rand_utf8.length() > line * kCharsPerLine) {
566  rand_utf8.append(" \n");
567  ++line;
568  if (line & 0x1) rand_utf8.append(kSeparator);
569  } else {
570  rand_utf8.append(kSeparator);
571  }
572  }
573  tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length());
574  src_utf8.swap(rand_utf8);
575  }
576  if (FLAGS_only_extract_font_properties) {
577  tprintf("Extracting font properties only\n");
578  ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str());
579  tprintf("Done!\n");
580  return 0;
581  }
582 
583  int im = 0;
584  std::vector<float> page_rotation;
585  const char* to_render_utf8 = src_utf8.c_str();
586 
587  tesseract::TRand randomizer;
588  randomizer.set_seed(kRandomSeed);
589  std::vector<std::string> font_names;
590  // We use a two pass mechanism to rotate images in both direction.
591  // The first pass(0) will rotate the images in random directions and
592  // the second pass(1) will mirror those rotations.
593  int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
594  for (int pass = 0; pass < num_pass; ++pass) {
595  int page_num = 0;
596  std::string font_used;
597  for (size_t offset = 0;
598  offset < strlen(to_render_utf8) &&
599  (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages);
600  ++im, ++page_num) {
601  tlog(1, "Starting page %d\n", im);
602  Pix* pix = nullptr;
603  if (FLAGS_find_fonts) {
604  offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
605  to_render_utf8 + offset,
606  strlen(to_render_utf8 + offset),
607  &font_used, &pix);
608  } else {
609  offset += render.RenderToImage(to_render_utf8 + offset,
610  strlen(to_render_utf8 + offset), &pix);
611  }
612  if (pix != nullptr) {
613  float rotation = 0;
614  if (pass == 1) {
615  // Pass 2, do mirror rotation.
616  rotation = -1 * page_rotation[page_num];
617  }
618  if (FLAGS_degrade_image) {
619  pix = DegradeImage(pix, FLAGS_exposure, &randomizer,
620  FLAGS_rotate_image ? &rotation : nullptr);
621  }
622  render.RotatePageBoxes(rotation);
623 
624  if (pass == 0) {
625  // Pass 1, rotate randomly and store the rotation..
626  page_rotation.push_back(rotation);
627  }
628 
629  Pix* gray_pix = pixConvertTo8(pix, false);
630  pixDestroy(&pix);
631  Pix* binary = pixThresholdToBinary(gray_pix, 128);
632  pixDestroy(&gray_pix);
633  char tiff_name[1024];
634  if (FLAGS_find_fonts) {
635  if (FLAGS_render_per_font) {
636  std::string fontname_for_file = tesseract::StringReplace(
637  font_used, " ", "_");
638  snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(),
639  fontname_for_file.c_str());
640  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w");
641  tprintf("Rendered page %d to file %s\n", im, tiff_name);
642  } else {
643  font_names.push_back(font_used);
644  }
645  } else {
646  snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str());
647  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a");
648  tprintf("Rendered page %d to file %s\n", im, tiff_name);
649  }
650  // Make individual glyphs
651  if (FLAGS_output_individual_glyph_images) {
652  if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) {
653  tprintf("ERROR: Individual glyphs not saved\n");
654  }
655  }
656  pixDestroy(&binary);
657  }
658  if (FLAGS_find_fonts && offset != 0) {
659  // We just want a list of names, or some sample images so we don't need
660  // to render more than the first page of the text.
661  break;
662  }
663  }
664  }
665  if (!FLAGS_find_fonts) {
666  std::string box_name = FLAGS_outputbase.c_str();
667  box_name += ".box";
668  render.WriteAllBoxes(box_name);
669  } else if (!FLAGS_render_per_font && !font_names.empty()) {
670  std::string filename = FLAGS_outputbase.c_str();
671  filename += ".fontlist.txt";
672  FILE* fp = fopen(filename.c_str(), "wb");
673  if (fp == nullptr) {
674  tprintf("Failed to create output font list %s\n", filename.c_str());
675  } else {
676  for (size_t i = 0; i < font_names.size(); ++i) {
677  fprintf(fp, "%s\n", font_names[i].c_str());
678  }
679  fclose(fp);
680  }
681  }
682 
683  return 0;
684 }
685 
686 int main(int argc, char** argv) {
687  // Respect enviroment variable. could be:
688  // fc (fontconfig), win32, and coretext
689  // If not set force fontconfig for Mac OS.
690  // See https://github.com/tesseract-ocr/tesseract/issues/736
691  char* backend;
692  backend = getenv("PANGOCAIRO_BACKEND");
693  if (backend == NULL) {
694  putenv("PANGOCAIRO_BACKEND=fc");
695  } else {
696  printf("Using '%s' as pango cairo backend based on enviroment "
697  "variable.\n", backend);
698  }
699  tesseract::CheckSharedLibraryVersion();
700  if (argc > 1) {
701  if ((strcmp(argv[1], "-v") == 0) ||
702  (strcmp(argv[1], "--version") == 0)) {
703  FontUtils::PangoFontTypeInfo();
704  }
705  }
706  tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
707  return Main();
708 }
int RenderAllFontsToImage(double min_coverage, const char *text, int text_length, std::string *font_used, Pix **pix)
INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier")
void set_underline_continuation_prob(const double frac)
const std::string & ch() const
Definition: boxchar.h:43
void set_vertical_text(bool vertical_text)
void set_gravity_hint_strong(bool gravity_hint_strong)
const std::vector< BoxChar * > & GetBoxes() const
void RotatePageBoxes(float rotation)
std::map< std::string, int > kerned_x_gaps
Definition: text2image.cpp:183
void set_render_fullwidth_latin(bool render_fullwidth_latin)
void set_output_word_boxes(bool val)
unsigned int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:233
void set_add_ligatures(bool add_ligatures)
const Box * box() const
Definition: boxchar.h:44
void set_resolution(const int resolution)
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
#define tlog(level,...)
Definition: tlog.h:33
bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const
bool encodable_string(const char *str, int *first_bad_position) const
Definition: unicharset.cpp:243
static void WriteStringToFileOrDie(const std::string &str, const std::string &filename)
Definition: fileio.cpp:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void set_strip_unrenderable_words(bool val)
void WriteAllBoxes(const std::string &filename)
void set_v_margin(const int v_margin)
const int kRandomSeed
Definition: text2image.cpp:58
static std::string CleanupString(const char *utf8_str)
Definition: unicharset.h:241
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:90
void set_leading(int leading)
void set_underline_start_prob(const double frac)
unsigned int SpanUTF8NotWhitespace(const char *text)
Definition: normstrngs.cpp:243
DOUBLE_PARAM_FLAG(char_spacing, 0, "Inter-character space in ems")
STRING_PARAM_FLAG(text, "", "File name of text input to process")
int main(int argc, char **argv)
Definition: text2image.cpp:686
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:383
void set_char_spacing(int char_spacing)
void set_h_margin(const int h_margin)
BOOL_PARAM_FLAG(degrade_image, true, "Degrade rendered image with speckle noise, dilation/erosion " "and rotation")
const PangoFontInfo & font() const
int RenderToImage(const char *text, int text_length, Pix **pix)
void set_seed(uint64_t seed)
Definition: helpers.h:46