39 #include "allheaders.h"
53 # define putenv(s) _putenv(s)
67 "Degrade rendered image with speckle noise, dilation/erosion "
71 static BOOL_PARAM_FLAG(rotate_image,
true,
"Rotate the image in a random way.");
74 static INT_PARAM_FLAG(exposure, 0,
"Exposure level in photocopier");
78 "Degrade rendered image with noise, blur, invert.");
95 static BOOL_PARAM_FLAG(perspective,
false,
"Generate Perspective Distortion");
98 static INT_PARAM_FLAG(box_reduction, 0,
"Integer reduction factor box_scale");
112 static INT_PARAM_FLAG(max_pages, 0,
"Maximum number of pages to output (0=unlimited)");
115 static INT_PARAM_FLAG(margin, 100,
"Margin round edges of image");
126 "Fraction of words to underline (value in [0,1])");
130 "Fraction of words to underline (value in [0,1])");
133 static INT_PARAM_FLAG(leading, 12,
"Inter-line space (in pixels)");
137 "Specify one of the following writing"
139 "'horizontal' : Render regular horizontal text. (default)\n"
140 "'vertical' : Render vertical text. Glyph orientation is"
141 " selected by Pango.\n"
142 "'vertical-upright' : Render vertical text. Glyph "
143 " orientation is set to be upright.");
145 static INT_PARAM_FLAG(box_padding, 0,
"Padding around produced bounding boxes");
148 "Remove unrenderable words from source text");
154 "Rebuild and render ligatures");
157 "Search for all fonts that can render the text");
159 "If find_fonts==true, render each font to its own image. "
160 "Image filenames are of the form output_name.font_name.tif");
162 "If find_fonts==true, the minimum coverage the font has of "
163 "the characters in the text file to include it, between "
166 static BOOL_PARAM_FLAG(list_available_fonts,
false,
"List available fonts and quit.");
168 static BOOL_PARAM_FLAG(render_ngrams,
false,
"Put each space-separated entity from the"
169 " input file into one bounding box. The ngrams in the input"
170 " file will be randomly permuted before rendering (so that"
171 " there is sufficient variety of characters on each line).");
174 "Output word bounding boxes instead of character boxes. "
175 "This is used for Cube training, and implied by "
179 "File with characters in the unicharset. If --render_ngrams"
180 " is true and --unicharset_file is specified, ngrams with"
181 " characters that are not in unicharset will be omitted");
184 "Rotate the generated characters both ways.");
187 "Assumes that the input file contains a list of ngrams. Renders"
188 " each ngram, extracts spacing properties and records them in"
189 " output_base/[font_name].fontinfo file.");
193 "If true also outputs individual character images");
195 "Each glyph is square with this side length in pixels");
197 "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad");
211 static bool IsWhitespaceBox(
const BoxChar* boxchar) {
212 return (boxchar->
box() ==
nullptr ||
219 size_t start_pos = 0, pos;
220 while ((pos = in.find(oldsub, start_pos)) != std::string::npos) {
221 out.append(in.data() + start_pos, pos - start_pos);
222 out.append(newsub.data(), newsub.length());
223 start_pos = pos + oldsub.length();
225 out.append(in.data() + start_pos, in.length() - start_pos);
239 static void ExtractFontProperties(
const std::string &utf8_text,
242 std::map<std::string, SpacingProperties> spacing_map;
243 std::map<std::string, SpacingProperties>::iterator spacing_map_it0;
244 std::map<std::string, SpacingProperties>::iterator spacing_map_it1;
245 int x_bearing, x_advance;
246 int len = utf8_text.length();
248 const char* text = utf8_text.c_str();
249 while (offset < len) {
251 render->
RenderToImage(text + offset, strlen(text + offset),
nullptr);
252 const std::vector<BoxChar*> &boxes = render->
GetBoxes();
256 if (boxes.size() > 2 && !IsWhitespaceBox(boxes[boxes.size() - 1]) &&
257 IsWhitespaceBox(boxes[boxes.size() - 2])) {
258 if (boxes.size() > 3) {
259 tprintf(
"WARNING: Adjusting to bad page break after '%s%s'\n",
260 boxes[boxes.size() - 4]->ch().c_str(),
261 boxes[boxes.size() - 3]->ch().c_str());
263 offset -= boxes[boxes.size() - 1]->ch().size();
266 for (
size_t b = 0; b < boxes.size(); b += 2) {
267 while (b < boxes.size() && IsWhitespaceBox(boxes[b])) ++b;
268 if (b + 1 >= boxes.size())
break;
282 if (IsWhitespaceBox(boxes[b+1])) {
285 int xgap = (boxes[b+1]->box()->x -
286 (boxes[b]->box()->x + boxes[b]->box()->w));
287 spacing_map_it0 = spacing_map.find(ch0);
289 if (spacing_map_it0 == spacing_map.end() &&
291 spacing_map[ch0] = SpacingProperties(
292 x_bearing, x_advance - x_bearing - boxes[b]->box()->w);
293 spacing_map_it0 = spacing_map.find(ch0);
297 tlog(3,
"%s%s\n", ch0.c_str(), ch1.c_str());
298 spacing_map_it1 = spacing_map.find(ch1);
299 if (spacing_map_it1 == spacing_map.end() &&
301 spacing_map[ch1] = SpacingProperties(
302 x_bearing, x_advance - x_bearing - boxes[b+1]->box()->w);
303 spacing_map_it1 = spacing_map.find(ch1);
306 if (ok_count == 2 && xgap != (spacing_map_it0->second.x_gap_after +
307 spacing_map_it1->second.x_gap_before)) {
308 spacing_map_it0->second.kerned_x_gaps[ch1] = xgap;
314 const int kBufSize = 1024;
316 snprintf(buf, kBufSize,
"%d\n", static_cast<int>(spacing_map.size()));
317 output_string.append(buf);
318 std::map<std::string, SpacingProperties>::const_iterator spacing_map_it;
319 for (spacing_map_it = spacing_map.begin();
320 spacing_map_it != spacing_map.end(); ++spacing_map_it) {
321 snprintf(buf, kBufSize,
322 "%s %d %d %d", spacing_map_it->first.c_str(),
323 spacing_map_it->second.x_gap_before,
324 spacing_map_it->second.x_gap_after,
325 static_cast<int>(spacing_map_it->second.kerned_x_gaps.size()));
326 output_string.append(buf);
327 std::map<std::string, int>::const_iterator kern_it;
328 for (kern_it = spacing_map_it->second.kerned_x_gaps.begin();
329 kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) {
330 snprintf(buf, kBufSize,
331 " %s %d", kern_it->first.c_str(), kern_it->second);
332 output_string.append(buf);
334 output_string.append(
"\n");
339 static bool MakeIndividualGlyphs(Pix* pix,
const std::vector<BoxChar*>& vbox,
340 const int input_tiff_page) {
343 tprintf(
"ERROR: MakeIndividualGlyphs(): Input Pix* is nullptr\n");
345 }
else if (FLAGS_glyph_resized_size <= 0) {
346 tprintf(
"ERROR: --glyph_resized_size must be positive\n");
348 }
else if (FLAGS_glyph_num_border_pixels_to_pad < 0) {
349 tprintf(
"ERROR: --glyph_num_border_pixels_to_pad must be 0 or positive\n");
353 const int n_boxes = vbox.size();
354 int n_boxes_saved = 0;
355 int current_tiff_page = 0;
357 static int glyph_count = 0;
358 for (
int i = 0; i < n_boxes; i++) {
360 Box* b = vbox[i]->mutable_box();
367 if (y < y_previous-pixGetHeight(pix)/10) {
368 tprintf(
"ERROR: Wrap-around encountered, at i=%d\n", i);
371 if (current_tiff_page < input_tiff_page)
continue;
372 else if (current_tiff_page > input_tiff_page)
break;
374 if (x < 0 || y < 0 ||
375 (x+w-1) >= pixGetWidth(pix) ||
376 (y+h-1) >= pixGetHeight(pix)) {
377 tprintf(
"ERROR: MakeIndividualGlyphs(): Index out of range, at i=%d"
378 " (x=%d, y=%d, w=%d, h=%d\n)", i, x, y, w, h);
380 }
else if (w < FLAGS_glyph_num_border_pixels_to_pad &&
381 h < FLAGS_glyph_num_border_pixels_to_pad) {
382 tprintf(
"ERROR: Input image too small to be a character, at i=%d\n", i);
386 Pix* pix_glyph = pixClipRectangle(pix, b,
nullptr);
388 tprintf(
"ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
392 Pix* pix_glyph_sq = pixScaleToSize(pix_glyph,
393 FLAGS_glyph_resized_size,
394 FLAGS_glyph_resized_size);
396 tprintf(
"ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i);
400 Pix* pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq,
401 FLAGS_glyph_num_border_pixels_to_pad,
403 if (!pix_glyph_sq_pad) {
404 tprintf(
"ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n",
409 Pix* pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad,
false);
411 snprintf(filename, 1024,
"%s_%d.jpg", FLAGS_outputbase.c_str(),
413 if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) {
414 tprintf(
"ERROR: MakeIndividualGlyphs(): Failed to write JPEG to %s,"
415 " at i=%d\n", filename, i);
419 pixDestroy(&pix_glyph);
420 pixDestroy(&pix_glyph_sq);
421 pixDestroy(&pix_glyph_sq_pad);
422 pixDestroy(&pix_glyph_sq_pad_8);
426 if (n_boxes_saved == 0) {
429 tprintf(
"Total number of characters saved = %d\n", n_boxes_saved);
436 using tesseract::ExtractFontProperties;
444 if (FLAGS_list_available_fonts) {
445 const std::vector<std::string>& all_fonts = FontUtils::ListAvailableFonts();
446 for (
unsigned int i = 0; i < all_fonts.size(); ++i) {
451 if (font_name.back() ==
',')
452 font_name.pop_back();
453 printf(
"%3u: %s\n", i, font_name.c_str());
455 "Font %s is unrecognized.\n", all_fonts[i].c_str());
461 if (FLAGS_text.empty()) {
462 tprintf(
"'--text' option is missing!\n");
465 if (FLAGS_outputbase.empty()) {
466 tprintf(
"'--outputbase' option is missing!\n");
469 if (!FLAGS_unicharset_file.empty() && FLAGS_render_ngrams) {
470 tprintf(
"Use '--unicharset_file' only if '--render_ngrams' is set.\n");
475 if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(font_name.c_str())) {
478 if (!FontUtils::IsAvailableFont(font_name.c_str(), &pango_name)) {
479 tprintf(
"Could not find font named '%s'.\n", FLAGS_font.c_str());
480 if (!pango_name.empty()) {
481 tprintf(
"Pango suggested font '%s'.\n", pango_name.c_str());
483 tprintf(
"Please correct --font arg.\n");
488 if (FLAGS_render_ngrams)
489 FLAGS_output_word_boxes =
true;
491 char font_desc_name[1024];
492 snprintf(font_desc_name, 1024,
"%s %d", font_name.c_str(),
493 static_cast<int>(FLAGS_ptsize));
509 if (FLAGS_writing_mode ==
"horizontal") {
514 }
else if (FLAGS_writing_mode ==
"vertical") {
519 }
else if (FLAGS_writing_mode ==
"vertical-upright") {
529 tprintf(
"Invalid writing mode: %s\n", FLAGS_writing_mode.c_str());
535 if (!File::ReadFileToString(FLAGS_text.c_str(), &src_utf8)) {
536 tprintf(
"Failed to read file: %s\n", FLAGS_text.c_str());
541 if (strncmp(src_utf8.c_str(),
"\xef\xbb\xbf", 3) == 0) {
542 src_utf8.erase(0, 3);
544 tlog(1,
"Render string of size %zu\n", src_utf8.length());
546 if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
549 const std::string kSeparator = FLAGS_render_ngrams ?
" " :
" ";
553 const unsigned int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
556 if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
558 tprintf(
"Failed to load unicharset from file %s\n",
559 FLAGS_unicharset_file.c_str());
566 const char *str8 = src_utf8.c_str();
567 int len = src_utf8.length();
569 std::vector<std::pair<int, int> > offsets;
571 while (offset < len) {
573 offsets.push_back(std::make_pair(offset, step));
577 if (FLAGS_render_ngrams) {
579 std::mt19937 random_gen(seed);
580 std::shuffle(offsets.begin(), offsets.end(), random_gen);
583 for (
size_t i = 0, line = 1; i < offsets.size(); ++i) {
584 const char *curr_pos = str8 + offsets[i].first;
585 int ngram_len = offsets[i].second;
588 if (!FLAGS_unicharset_file.empty() &&
592 rand_utf8.append(curr_pos, ngram_len);
593 if (rand_utf8.length() > line * kCharsPerLine) {
594 rand_utf8.append(
" \n");
596 if (line & 0x1) rand_utf8.append(kSeparator);
598 rand_utf8.append(kSeparator);
601 tlog(1,
"Rendered ngram string of size %zu\n", rand_utf8.length());
602 src_utf8.swap(rand_utf8);
604 if (FLAGS_only_extract_font_properties) {
605 tprintf(
"Extracting font properties only\n");
606 ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str());
612 std::vector<float> page_rotation;
613 const char* to_render_utf8 = src_utf8.c_str();
617 std::vector<std::string> font_names;
621 int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
622 for (
int pass = 0; pass < num_pass; ++pass) {
625 for (
size_t offset = 0;
626 offset < strlen(to_render_utf8) &&
627 (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages);
629 tlog(1,
"Starting page %d\n", im);
631 if (FLAGS_find_fonts) {
633 to_render_utf8 + offset,
634 strlen(to_render_utf8 + offset),
638 strlen(to_render_utf8 + offset), &pix);
640 if (pix !=
nullptr) {
644 rotation = -1 * page_rotation[page_num];
646 if (FLAGS_degrade_image) {
648 FLAGS_rotate_image ? &rotation :
nullptr);
650 if (FLAGS_distort_image) {
653 FLAGS_white_noise, FLAGS_smooth_noise, FLAGS_blur,
654 1, &randomizer,
nullptr);
660 page_rotation.push_back(rotation);
663 Pix* gray_pix = pixConvertTo8(pix,
false);
665 Pix* binary = pixThresholdToBinary(gray_pix, 128);
666 pixDestroy(&gray_pix);
667 char tiff_name[1024];
668 if (FLAGS_find_fonts) {
669 if (FLAGS_render_per_font) {
670 std::string fontname_for_file = tesseract::StringReplace(
671 font_used,
" ",
"_");
672 snprintf(tiff_name, 1024,
"%s.%s.tif", FLAGS_outputbase.c_str(),
673 fontname_for_file.c_str());
674 pixWriteTiff(tiff_name, binary, IFF_TIFF_G4,
"w");
675 tprintf(
"Rendered page %d to file %s\n", im, tiff_name);
677 font_names.push_back(font_used);
680 snprintf(tiff_name, 1024,
"%s.tif", FLAGS_outputbase.c_str());
681 pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ?
"w" :
"a");
682 tprintf(
"Rendered page %d to file %s\n", im, tiff_name);
685 if (FLAGS_output_individual_glyph_images) {
686 if (!MakeIndividualGlyphs(binary, render.
GetBoxes(), im)) {
687 tprintf(
"ERROR: Individual glyphs not saved\n");
692 if (FLAGS_find_fonts && offset != 0) {
699 if (!FLAGS_find_fonts) {
703 }
else if (!FLAGS_render_per_font && !font_names.empty()) {
705 filename +=
".fontlist.txt";
706 FILE* fp = fopen(filename.c_str(),
"wb");
708 tprintf(
"Failed to create output font list %s\n", filename.c_str());
710 for (
size_t i = 0; i < font_names.size(); ++i) {
711 fprintf(fp,
"%s\n", font_names[i].c_str());
720 int main(
int argc,
char** argv) {
726 backend = getenv(
"PANGOCAIRO_BACKEND");
727 if (backend ==
nullptr) {
728 static char envstring[] =
"PANGOCAIRO_BACKEND=fc";
731 printf(
"Using '%s' as pango cairo backend based on environment "
732 "variable.\n", backend);
734 tesseract::CheckSharedLibraryVersion();
736 if ((strcmp(argv[1],
"-v") == 0) ||
737 (strcmp(argv[1],
"--version") == 0)) {
738 FontUtils::PangoFontTypeInfo();
739 printf(
"Pango version: %s\n", pango_version_string());