All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
text2image.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: text2image.cpp
3  * Description: Program to generate OCR training pages. Given a text file it
4  * outputs an image with a given font and degradation.
5  *
6  * Note that since the results depend on the fonts available on
7  * your system, running the code on a different machine, or
8  * different OS, or even at a different time on the same machine,
9  * may produce different fonts even if --font is given explicitly.
10  * To see names of available fonts, use --list_available_fonts with
11  * the appropriate --fonts_dir path.
12  * Specifying --use_only_legacy_fonts will restrict the available
13  * fonts to those listed in legacy_fonts.h
14  *
15  * Authors: Ranjith Unnikrishnan, Ray Smith
16  * Created: Tue Nov 19 2013
17  *
18  * (C) Copyright 2013, Google Inc.
19  * Licensed under the Apache License, Version 2.0 (the "License");
20  * you may not use this file except in compliance with the License.
21  * You may obtain a copy of the License at
22  * http://www.apache.org/licenses/LICENSE-2.0
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an "AS IS" BASIS,
25  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  *
29  **********************************************************************/
30 
31 #include <stdlib.h>
32 #include <string.h>
33 #include <algorithm>
34 #include <iostream>
35 #include <map>
36 #include <string>
37 #include <utility>
38 #include <vector>
39 
40 #include "allheaders.h" // from leptonica
41 #include "boxchar.h"
42 #include "commandlineflags.h"
43 #include "degradeimage.h"
44 #include "errcode.h"
45 #include "fileio.h"
46 #include "helpers.h"
47 #include "normstrngs.h"
48 #include "stringrenderer.h"
49 #include "tlog.h"
50 #include "unicharset.h"
51 #include "util.h"
52 
53 #ifdef USE_STD_NAMESPACE
54 using std::make_pair;
55 using std::map;
56 using std::pair;
57 #endif
58 
59 // A number with which to initialize the random number generator.
60 const int kRandomSeed = 0x18273645;
61 
62 // The text input file.
63 STRING_PARAM_FLAG(text, "", "File name of text input to process");
64 
65 // The text output file.
66 STRING_PARAM_FLAG(outputbase, "", "Basename for output image/box file");
67 
68 // Degrade the rendered image to mimic scanner quality.
69 BOOL_PARAM_FLAG(degrade_image, true,
70  "Degrade rendered image with speckle noise, dilation/erosion "
71  "and rotation");
72 
73 // Degradation to apply to the image.
74 INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier");
75 
76 // Output image resolution.
77 INT_PARAM_FLAG(resolution, 300, "Pixels per inch");
78 
79 // Width of output image (in pixels).
80 INT_PARAM_FLAG(xsize, 3600, "Width of output image");
81 
82 // Max height of output image (in pixels).
83 INT_PARAM_FLAG(ysize, 4800, "Height of output image");
84 
85 // Margin around text (in pixels).
86 INT_PARAM_FLAG(margin, 100, "Margin round edges of image");
87 
88 // Size of text (in points).
89 INT_PARAM_FLAG(ptsize, 12, "Size of printed text");
90 
91 // Inter-character space (in ems).
92 DOUBLE_PARAM_FLAG(char_spacing, 0, "Inter-character space in ems");
93 
94 // Sets the probability (value in [0, 1]) of starting to render a word with an
95 // underline. Words are assumed to be space-delimited.
96 DOUBLE_PARAM_FLAG(underline_start_prob, 0,
97  "Fraction of words to underline (value in [0,1])");
98 // Set the probability (value in [0, 1]) of continuing a started underline to
99 // the next word.
100 DOUBLE_PARAM_FLAG(underline_continuation_prob, 0,
101  "Fraction of words to underline (value in [0,1])");
102 
103 // Inter-line space (in pixels).
104 INT_PARAM_FLAG(leading, 12, "Inter-line space (in pixels)");
105 
106 // Layout and glyph orientation on rendering.
107 STRING_PARAM_FLAG(writing_mode, "horizontal",
108  "Specify one of the following writing"
109  " modes.\n"
110  "'horizontal' : Render regular horizontal text. (default)\n"
111  "'vertical' : Render vertical text. Glyph orientation is"
112  " selected by Pango.\n"
113  "'vertical-upright' : Render vertical text. Glyph "
114  " orientation is set to be upright.");
115 
116 INT_PARAM_FLAG(box_padding, 0, "Padding around produced bounding boxes");
117 
118 BOOL_PARAM_FLAG(strip_unrenderable_words, true,
119  "Remove unrenderable words from source text");
120 
121 // Font name.
122 STRING_PARAM_FLAG(font, "Arial", "Font description name to use");
123 
124 BOOL_PARAM_FLAG(ligatures, false,
125  "Rebuild and render ligatures");
126 
127 BOOL_PARAM_FLAG(find_fonts, false,
128  "Search for all fonts that can render the text");
129 BOOL_PARAM_FLAG(render_per_font, true,
130  "If find_fonts==true, render each font to its own image. "
131  "Image filenames are of the form output_name.font_name.tif");
132 DOUBLE_PARAM_FLAG(min_coverage, 1.0,
133  "If find_fonts==true, the minimum coverage the font has of "
134  "the characters in the text file to include it, between "
135  "0 and 1.");
136 
137 BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit.");
138 
139 BOOL_PARAM_FLAG(render_ngrams, false, "Put each space-separated entity from the"
140  " input file into one bounding box. The ngrams in the input"
141  " file will be randomly permuted before rendering (so that"
142  " there is sufficient variety of characters on each line).");
143 
144 BOOL_PARAM_FLAG(output_word_boxes, false,
145  "Output word bounding boxes instead of character boxes. "
146  "This is used for Cube training, and implied by "
147  "--render_ngrams.");
148 
149 STRING_PARAM_FLAG(unicharset_file, "",
150  "File with characters in the unicharset. If --render_ngrams"
151  " is true and --unicharset_file is specified, ngrams with"
152  " characters that are not in unicharset will be omitted");
153 
154 BOOL_PARAM_FLAG(bidirectional_rotation, false,
155  "Rotate the generated characters both ways.");
156 
157 BOOL_PARAM_FLAG(only_extract_font_properties, false,
158  "Assumes that the input file contains a list of ngrams. Renders"
159  " each ngram, extracts spacing properties and records them in"
160  " output_base/[font_name].fontinfo file.");
161 
162 // Use these flags to output zero-padded, square individual character images
163 BOOL_PARAM_FLAG(output_individual_glyph_images, false,
164  "If true also outputs individual character images");
165 INT_PARAM_FLAG(glyph_resized_size, 0,
166  "Each glyph is square with this side length in pixels");
167 INT_PARAM_FLAG(glyph_num_border_pixels_to_pad, 0,
168  "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad");
169 
170 namespace tesseract {
171 
174  SpacingProperties(int b, int a) : x_gap_before(b), x_gap_after(a) {}
175  // These values are obtained from FT_Glyph_Metrics struct
176  // used by the FreeType font engine.
177  int x_gap_before; // horizontal x bearing
178  int x_gap_after; // horizontal advance - x_gap_before - width
179  map<string, int> kerned_x_gaps;
180 };
181 
182 static bool IsWhitespaceBox(const BoxChar* boxchar) {
183  return (boxchar->box() == NULL ||
184  SpanUTF8Whitespace(boxchar->ch().c_str()));
185 }
186 
187 static string StringReplace(const string& in,
188  const string& oldsub, const string& newsub) {
189  string out;
190  int start_pos = 0;
191  do {
192  int pos = in.find(oldsub, start_pos);
193  if (pos == string::npos) break;
194  out.append(in.data() + start_pos, pos - start_pos);
195  out.append(newsub.data(), newsub.length());
196  start_pos = pos + oldsub.length();
197  } while (true);
198  out.append(in.data() + start_pos, in.length() - start_pos);
199  return out;
200 }
201 
202 // Assumes that each word (whitespace-separated entity) in text is a bigram.
203 // Renders the bigrams and calls FontInfo::GetSpacingProperties() to
204 // obtain spacing information. Produces the output .fontinfo file with a line
205 // per unichar of the form:
206 // unichar space_before space_after kerned1 kerned_space1 kerned2 ...
207 // Fox example, if unichar "A" has spacing of 0 pixels before and -1 pixels
208 // after, is kerned with "V" resulting in spacing of "AV" to be -7 and kerned
209 // with "T", such that "AT" has spacing of -5, the entry/line for unichar "A"
210 // in .fontinfo file will be:
211 // A 0 -1 T -5 V -7
212 void ExtractFontProperties(const string &utf8_text,
213  StringRenderer *render,
214  const string &output_base) {
215  map<string, SpacingProperties> spacing_map;
216  map<string, SpacingProperties>::iterator spacing_map_it0;
217  map<string, SpacingProperties>::iterator spacing_map_it1;
218  int x_bearing, x_advance;
219  int len = utf8_text.length();
220  int offset = 0;
221  const char* text = utf8_text.c_str();
222  while (offset < len) {
223  offset += render->RenderToImage(text + offset, strlen(text + offset), NULL);
224  const vector<BoxChar*> &boxes = render->GetBoxes();
225 
226  // If the page break split a bigram, correct the offset so we try the bigram
227  // on the next iteration.
228  if (boxes.size() > 2 && !IsWhitespaceBox(boxes[boxes.size() - 1]) &&
229  IsWhitespaceBox(boxes[boxes.size() - 2])) {
230  if (boxes.size() > 3) {
231  tprintf("WARNING: Adjusting to bad page break after '%s%s'\n",
232  boxes[boxes.size() - 4]->ch().c_str(),
233  boxes[boxes.size() - 3]->ch().c_str());
234  }
235  offset -= boxes[boxes.size() - 1]->ch().size();
236  }
237 
238  for (int b = 0; b < boxes.size(); b += 2) {
239  while (b < boxes.size() && IsWhitespaceBox(boxes[b])) ++b;
240  if (b + 1 >= boxes.size()) break;
241  const string &ch0 = boxes[b]->ch();
242  // We encountered a ligature. This happens in at least two scenarios:
243  // One is when the rendered bigram forms a grapheme cluster (eg. the
244  // second character in the bigram is a combining vowel), in which case we
245  // correctly output only one bounding box.
246  // A second far less frequent case is when caused some fonts like 'DejaVu
247  // Sans Ultra-Light' force Pango to render a ligatured character even if
248  // the input consists of the separated characters. NOTE(ranjith): As per
249  // behdad@ this is not currently controllable at the level of the Pango
250  // API.
251  // Safeguard against these cases here by just skipping the bigram.
252  if (IsWhitespaceBox(boxes[b+1])) {
253  continue;
254  }
255  int xgap = (boxes[b+1]->box()->x -
256  (boxes[b]->box()->x + boxes[b]->box()->w));
257  spacing_map_it0 = spacing_map.find(ch0);
258  int ok_count = 0;
259  if (spacing_map_it0 == spacing_map.end() &&
260  render->font().GetSpacingProperties(ch0, &x_bearing, &x_advance)) {
261  spacing_map[ch0] = SpacingProperties(
262  x_bearing, x_advance - x_bearing - boxes[b]->box()->w);
263  spacing_map_it0 = spacing_map.find(ch0);
264  ++ok_count;
265  }
266  const string &ch1 = boxes[b+1]->ch();
267  tlog(3, "%s%s\n", ch0.c_str(), ch1.c_str());
268  spacing_map_it1 = spacing_map.find(ch1);
269  if (spacing_map_it1 == spacing_map.end() &&
270  render->font().GetSpacingProperties(ch1, &x_bearing, &x_advance)) {
271  spacing_map[ch1] = SpacingProperties(
272  x_bearing, x_advance - x_bearing - boxes[b+1]->box()->w);
273  spacing_map_it1 = spacing_map.find(ch1);
274  ++ok_count;
275  }
276  if (ok_count == 2 && xgap != (spacing_map_it0->second.x_gap_after +
277  spacing_map_it1->second.x_gap_before)) {
278  spacing_map_it0->second.kerned_x_gaps[ch1] = xgap;
279  }
280  }
281  render->ClearBoxes();
282  }
283  string output_string;
284  const int kBufSize = 1024;
285  char buf[kBufSize];
286  snprintf(buf, kBufSize, "%d\n", static_cast<int>(spacing_map.size()));
287  output_string.append(buf);
288  map<string, SpacingProperties>::const_iterator spacing_map_it;
289  for (spacing_map_it = spacing_map.begin();
290  spacing_map_it != spacing_map.end(); ++spacing_map_it) {
291  snprintf(buf, kBufSize,
292  "%s %d %d %d", spacing_map_it->first.c_str(),
293  spacing_map_it->second.x_gap_before,
294  spacing_map_it->second.x_gap_after,
295  static_cast<int>(spacing_map_it->second.kerned_x_gaps.size()));
296  output_string.append(buf);
297  map<string, int>::const_iterator kern_it;
298  for (kern_it = spacing_map_it->second.kerned_x_gaps.begin();
299  kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) {
300  snprintf(buf, kBufSize,
301  " %s %d", kern_it->first.c_str(), kern_it->second);
302  output_string.append(buf);
303  }
304  output_string.append("\n");
305  }
306  File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
307 }
308 
309 bool MakeIndividualGlyphs(Pix* pix,
310  const vector<BoxChar*>& vbox,
311  const int input_tiff_page) {
312  // If checks fail, return false without exiting text2image
313  if (!pix) {
314  tprintf("ERROR: MakeIndividualGlyphs(): Input Pix* is NULL\n");
315  return false;
316  } else if (FLAGS_glyph_resized_size <= 0) {
317  tprintf("ERROR: --glyph_resized_size must be positive\n");
318  return false;
319  } else if (FLAGS_glyph_num_border_pixels_to_pad < 0) {
320  tprintf("ERROR: --glyph_num_border_pixels_to_pad must be 0 or positive\n");
321  return false;
322  }
323 
324  const int n_boxes = vbox.size();
325  int n_boxes_saved = 0;
326  int current_tiff_page = 0;
327  int y_previous = 0;
328  static int glyph_count = 0;
329  for (int i = 0; i < n_boxes; i++) {
330  // Get one bounding box
331  Box* b = vbox[i]->mutable_box();
332  if (!b) continue;
333  const int x = b->x;
334  const int y = b->y;
335  const int w = b->w;
336  const int h = b->h;
337  // Check present tiff page (for multipage tiff)
338  if (y < y_previous-pixGetHeight(pix)/10) {
339  tprintf("ERROR: Wrap-around encountered, at i=%d\n", i);
340  current_tiff_page++;
341  }
342  if (current_tiff_page < input_tiff_page) continue;
343  else if (current_tiff_page > input_tiff_page) break;
344  // Check box validity
345  if (x < 0 || y < 0 ||
346  (x+w-1) >= pixGetWidth(pix) ||
347  (y+h-1) >= pixGetHeight(pix)) {
348  tprintf("ERROR: MakeIndividualGlyphs(): Index out of range, at i=%d"
349  " (x=%d, y=%d, w=%d, h=%d\n)", i, x, y, w, h);
350  continue;
351  } else if (w < FLAGS_glyph_num_border_pixels_to_pad &&
352  h < FLAGS_glyph_num_border_pixels_to_pad) {
353  tprintf("ERROR: Input image too small to be a character, at i=%d\n", i);
354  continue;
355  }
356  // Crop the boxed character
357  Pix* pix_glyph = pixClipRectangle(pix, b, NULL);
358  if (!pix_glyph) {
359  tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
360  continue;
361  }
362  // Resize to square
363  Pix* pix_glyph_sq = pixScaleToSize(pix_glyph,
364  FLAGS_glyph_resized_size,
365  FLAGS_glyph_resized_size);
366  if (!pix_glyph_sq) {
367  tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i);
368  continue;
369  }
370  // Zero-pad
371  Pix* pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq,
372  FLAGS_glyph_num_border_pixels_to_pad,
373  0);
374  if (!pix_glyph_sq_pad) {
375  tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n",
376  i);
377  continue;
378  }
379  // Write out
380  Pix* pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false);
381  char filename[1024];
382  snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(),
383  glyph_count++);
384  if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) {
385  tprintf("ERROR: MakeIndividualGlyphs(): Failed to write JPEG to %s,"
386  " at i=%d\n", filename, i);
387  continue;
388  }
389 
390  pixDestroy(&pix_glyph);
391  pixDestroy(&pix_glyph_sq);
392  pixDestroy(&pix_glyph_sq_pad);
393  pixDestroy(&pix_glyph_sq_pad_8);
394  n_boxes_saved++;
395  y_previous = y;
396  }
397  if (n_boxes_saved == 0) {
398  return false;
399  } else {
400  tprintf("Total number of characters saved = %d\n", n_boxes_saved);
401  return true;
402  }
403 }
404 } // namespace tesseract
405 
408 using tesseract::File;
413 
414 int main(int argc, char** argv) {
415  tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
416 
417  if (FLAGS_list_available_fonts) {
418  const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
419  for (int i = 0; i < all_fonts.size(); ++i) {
420  tprintf("%3d: %s\n", i, all_fonts[i].c_str());
421  ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
422  "Font %s is unrecognized.\n", all_fonts[i].c_str());
423  }
424  return EXIT_SUCCESS;
425  }
426  // Check validity of input flags.
427  ASSERT_HOST_MSG(!FLAGS_text.empty(), "Text file missing!\n");
428  ASSERT_HOST_MSG(!FLAGS_outputbase.empty(), "Output file missing!\n");
429  ASSERT_HOST_MSG(FLAGS_render_ngrams || FLAGS_unicharset_file.empty(),
430  "Use --unicharset_file only if --render_ngrams is set.\n");
431 
432  if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) {
433  string pango_name;
434  if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) {
435  tprintf("Could not find font named %s. Pango suggested font %s\n",
436  FLAGS_font.c_str(), pango_name.c_str());
437  TLOG_FATAL("Please correct --font arg.");
438  }
439  }
440 
441  if (FLAGS_render_ngrams)
442  FLAGS_output_word_boxes = true;
443 
444  char font_desc_name[1024];
445  snprintf(font_desc_name, 1024, "%s %d", FLAGS_font.c_str(),
446  static_cast<int>(FLAGS_ptsize));
447  StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize);
448  render.set_add_ligatures(FLAGS_ligatures);
449  render.set_leading(FLAGS_leading);
450  render.set_resolution(FLAGS_resolution);
451  render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize);
452  render.set_h_margin(FLAGS_margin);
453  render.set_v_margin(FLAGS_margin);
454  render.set_output_word_boxes(FLAGS_output_word_boxes);
455  render.set_box_padding(FLAGS_box_padding);
456  render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words);
457  render.set_underline_start_prob(FLAGS_underline_start_prob);
458  render.set_underline_continuation_prob(FLAGS_underline_continuation_prob);
459 
460  // Set text rendering orientation and their forms.
461  if (FLAGS_writing_mode == "horizontal") {
462  // Render regular horizontal text (default).
463  render.set_vertical_text(false);
464  render.set_gravity_hint_strong(false);
465  render.set_render_fullwidth_latin(false);
466  } else if (FLAGS_writing_mode == "vertical") {
467  // Render vertical text. Glyph orientation is selected by Pango.
468  render.set_vertical_text(true);
469  render.set_gravity_hint_strong(false);
470  render.set_render_fullwidth_latin(false);
471  } else if (FLAGS_writing_mode == "vertical-upright") {
472  // Render vertical text. Glyph orientation is set to be upright.
473  // Also Basic Latin characters are converted to their fullwidth forms
474  // on rendering, since fullwidth Latin characters are well designed to fit
475  // vertical text lines, while .box files store halfwidth Basic Latin
476  // unichars.
477  render.set_vertical_text(true);
478  render.set_gravity_hint_strong(true);
479  render.set_render_fullwidth_latin(true);
480  } else {
481  TLOG_FATAL("Invalid writing mode : %s\n", FLAGS_writing_mode.c_str());
482  }
483 
484  string src_utf8;
485  // This c_str is NOT redundant!
486  File::ReadFileToStringOrDie(FLAGS_text.c_str(), &src_utf8);
487 
488  // Remove the unicode mark if present.
489  if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) {
490  src_utf8.erase(0, 3);
491  }
492  tlog(1, "Render string of size %d\n", src_utf8.length());
493 
494  if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
495  // Try to preserve behavior of old text2image by expanding inter-word
496  // spaces by a factor of 4.
497  const string kSeparator = FLAGS_render_ngrams ? " " : " ";
498  // Also restrict the number of charactes per line to try and avoid
499  // line-breaking in the middle of words like "-A", "R$" etc. which are
500  // otherwise allowed by the standard unicode line-breaking rules.
501  const int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
502  string rand_utf8;
503  UNICHARSET unicharset;
504  if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
505  !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
506  TLOG_FATAL("Failed to load unicharset from file %s\n",
507  FLAGS_unicharset_file.c_str());
508  }
509 
510  // If we are rendering ngrams that will be OCRed later, shuffle them so that
511  // tesseract does not have difficulties finding correct baseline, word
512  // spaces, etc.
513  const char *str8 = src_utf8.c_str();
514  int len = src_utf8.length();
515  int step;
516  vector<pair<int, int> > offsets;
517  int offset = SpanUTF8Whitespace(str8);
518  while (offset < len) {
519  step = SpanUTF8NotWhitespace(str8 + offset);
520  offsets.push_back(make_pair(offset, step));
521  offset += step;
522  offset += SpanUTF8Whitespace(str8 + offset);
523  }
524  if (FLAGS_render_ngrams)
525  std::random_shuffle(offsets.begin(), offsets.end());
526 
527  for (int i = 0, line = 1; i < offsets.size(); ++i) {
528  const char *curr_pos = str8 + offsets[i].first;
529  int ngram_len = offsets[i].second;
530  // Skip words that contain characters not in found in unicharset.
531  if (!FLAGS_unicharset_file.empty() &&
532  !unicharset.encodable_string(curr_pos, NULL)) {
533  continue;
534  }
535  rand_utf8.append(curr_pos, ngram_len);
536  if (rand_utf8.length() > line * kCharsPerLine) {
537  rand_utf8.append(" \n");
538  ++line;
539  if (line & 0x1) rand_utf8.append(kSeparator);
540  } else {
541  rand_utf8.append(kSeparator);
542  }
543  }
544  tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length());
545  src_utf8.swap(rand_utf8);
546  }
547  if (FLAGS_only_extract_font_properties) {
548  tprintf("Extracting font properties only\n");
549  ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str());
550  tprintf("Done!\n");
551  return 0;
552  }
553 
554  int im = 0;
555  vector<float> page_rotation;
556  const char* to_render_utf8 = src_utf8.c_str();
557 
558  tesseract::TRand randomizer;
559  randomizer.set_seed(kRandomSeed);
560  vector<string> font_names;
561  // We use a two pass mechanism to rotate images in both direction.
562  // The first pass(0) will rotate the images in random directions and
563  // the second pass(1) will mirror those rotations.
564  int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
565  for (int pass = 0; pass < num_pass; ++pass) {
566  int page_num = 0;
567  string font_used;
568  for (int offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
569  tlog(1, "Starting page %d\n", im);
570  Pix* pix = NULL;
571  if (FLAGS_find_fonts) {
572  offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
573  to_render_utf8 + offset,
574  strlen(to_render_utf8 + offset),
575  &font_used, &pix);
576  } else {
577  offset += render.RenderToImage(to_render_utf8 + offset,
578  strlen(to_render_utf8 + offset), &pix);
579  }
580  if (pix != NULL) {
581  float rotation = 0;
582  if (pass == 1) {
583  // Pass 2, do mirror rotation.
584  rotation = -1 * page_rotation[page_num];
585  }
586  if (FLAGS_degrade_image) {
587  pix = DegradeImage(pix, FLAGS_exposure, &randomizer, &rotation);
588  }
589  render.RotatePageBoxes(rotation);
590 
591  if (pass == 0) {
592  // Pass 1, rotate randomly and store the rotation..
593  page_rotation.push_back(rotation);
594  }
595 
596  Pix* gray_pix = pixConvertTo8(pix, false);
597  pixDestroy(&pix);
598  Pix* binary = pixThresholdToBinary(gray_pix, 128);
599  pixDestroy(&gray_pix);
600  char tiff_name[1024];
601  if (FLAGS_find_fonts) {
602  if (FLAGS_render_per_font) {
603  string fontname_for_file = tesseract::StringReplace(
604  font_used, " ", "_");
605  snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(),
606  fontname_for_file.c_str());
607  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w");
608  tprintf("Rendered page %d to file %s\n", im, tiff_name);
609  } else {
610  font_names.push_back(font_used);
611  }
612  } else {
613  snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str());
614  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a");
615  tprintf("Rendered page %d to file %s\n", im, tiff_name);
616  }
617  // Make individual glyphs
618  if (FLAGS_output_individual_glyph_images) {
619  if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) {
620  tprintf("ERROR: Individual glyphs not saved\n");
621  }
622  }
623  pixDestroy(&binary);
624  }
625  if (FLAGS_find_fonts && offset != 0) {
626  // We just want a list of names, or some sample images so we don't need
627  // to render more than the first page of the text.
628  break;
629  }
630  }
631  }
632  if (!FLAGS_find_fonts) {
633  string box_name = FLAGS_outputbase.c_str();
634  box_name += ".box";
635  render.WriteAllBoxes(box_name);
636  } else if (!FLAGS_render_per_font && !font_names.empty()) {
637  string filename = FLAGS_outputbase.c_str();
638  filename += ".fontlist.txt";
639  FILE* fp = fopen(filename.c_str(), "wb");
640  if (fp == NULL) {
641  tprintf("Failed to create output font list %s\n", filename.c_str());
642  } else {
643  for (int i = 0; i < font_names.size(); ++i) {
644  fprintf(fp, "%s\n", font_names[i].c_str());
645  }
646  fclose(fp);
647  }
648  }
649 
650  return 0;
651 }
STRING_PARAM_FLAG(text,"","File name of text input to process")
void set_gravity_hint_strong(bool gravity_hint_strong)
void set_render_fullwidth_latin(bool render_fullwidth_latin)
void WriteAllBoxes(const string &filename)
const PangoFontInfo & font() const
bool GetSpacingProperties(const string &utf8_char, int *x_bearing, int *x_advance) const
bool MakeIndividualGlyphs(Pix *pix, const vector< BoxChar * > &vbox, const int input_tiff_page)
Definition: text2image.cpp:309
void set_strip_unrenderable_words(bool val)
#define tprintf(...)
Definition: tprintf.h:31
BOOL_PARAM_FLAG(degrade_image, true,"Degrade rendered image with speckle noise, dilation/erosion ""and rotation")
void set_resolution(const int resolution)
void set_leading(int leading)
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
void set_underline_start_prob(const double frac)
void set_vertical_text(bool vertical_text)
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:346
void RotatePageBoxes(float rotation)
const string & ch() const
Definition: boxchar.h:47
static void WriteStringToFileOrDie(const string &str, const string &filename)
Definition: fileio.cpp:53
map< string, int > kerned_x_gaps
Definition: text2image.cpp:179
int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:186
#define ASSERT_HOST_MSG(x, msg...)
Definition: errcode.h:98
void set_box_padding(int val)
int SpanUTF8NotWhitespace(const char *text)
Definition: normstrngs.cpp:197
void set_underline_continuation_prob(const double frac)
const Box * box() const
Definition: boxchar.h:48
void set_char_spacing(double char_spacing)
void set_seed(uinT64 seed)
Definition: helpers.h:43
const vector< BoxChar * > & GetBoxes() const
DOUBLE_PARAM_FLAG(char_spacing, 0,"Inter-character space in ems")
#define TLOG_FATAL(msg...)
Definition: tlog.h:41
void set_add_ligatures(bool add_ligatures)
void set_h_margin(const int h_margin)
void set_v_margin(const int v_margin)
bool encodable_string(const char *str, int *first_bad_position) const
Definition: unicharset.cpp:222
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
INT_PARAM_FLAG(exposure, 0,"Exposure level in photocopier")
const int kRandomSeed
Definition: text2image.cpp:60
#define tlog(level,...)
Definition: tlog.h:33
void ExtractFontProperties(const string &utf8_text, StringRenderer *render, const string &output_base)
Definition: text2image.cpp:212
int RenderAllFontsToImage(double min_coverage, const char *text, int text_length, string *font_used, Pix **pix)
#define NULL
Definition: host.h:144
void set_output_word_boxes(bool val)
int RenderToImage(const char *text, int text_length, Pix **pix)
int main(int argc, char **argv)
Definition: text2image.cpp:414