tesseract
4.0.0-1-g2a2b
|
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <iostream>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "allheaders.h"
#include "boxchar.h"
#include "commandlineflags.h"
#include "commontraining.h"
#include "degradeimage.h"
#include "errcode.h"
#include "fileio.h"
#include "helpers.h"
#include "normstrngs.h"
#include "stringrenderer.h"
#include "tlog.h"
#include "unicharset.h"
#include "util.h"
Go to the source code of this file.
Classes | |
struct | tesseract::SpacingProperties |
Namespaces | |
tesseract | |
Functions | |
STRING_PARAM_FLAG (text, "", "File name of text input to process") | |
STRING_PARAM_FLAG (outputbase, "", "Basename for output image/box file") | |
BOOL_PARAM_FLAG (degrade_image, true, "Degrade rendered image with speckle noise, dilation/erosion " "and rotation") | |
BOOL_PARAM_FLAG (rotate_image, true, "Rotate the image in a random way.") | |
INT_PARAM_FLAG (exposure, 0, "Exposure level in photocopier") | |
INT_PARAM_FLAG (resolution, 300, "Pixels per inch") | |
INT_PARAM_FLAG (xsize, 3600, "Width of output image") | |
INT_PARAM_FLAG (ysize, 4800, "Height of output image") | |
INT_PARAM_FLAG (max_pages, 0, "Maximum number of pages to output (0=unlimited)") | |
INT_PARAM_FLAG (margin, 100, "Margin round edges of image") | |
INT_PARAM_FLAG (ptsize, 12, "Size of printed text") | |
DOUBLE_PARAM_FLAG (char_spacing, 0, "Inter-character space in ems") | |
DOUBLE_PARAM_FLAG (underline_start_prob, 0, "Fraction of words to underline (value in [0,1])") | |
DOUBLE_PARAM_FLAG (underline_continuation_prob, 0, "Fraction of words to underline (value in [0,1])") | |
INT_PARAM_FLAG (leading, 12, "Inter-line space (in pixels)") | |
STRING_PARAM_FLAG (writing_mode, "horizontal", "Specify one of the following writing" " modes.\ " 'horizontal' :Render regular horizontal text.(default)\" " 'vertical' :Render vertical text. Glyph orientation is" " selected by Pango.\" " 'vertical-upright' :Render vertical text. Glyph " " orientation is set to be upright.") | |
INT_PARAM_FLAG (box_padding, 0, "Padding around produced bounding boxes") | |
BOOL_PARAM_FLAG (strip_unrenderable_words, true, "Remove unrenderable words from source text") | |
STRING_PARAM_FLAG (font, "Arial", "Font description name to use") | |
BOOL_PARAM_FLAG (ligatures, false, "Rebuild and render ligatures") | |
BOOL_PARAM_FLAG (find_fonts, false, "Search for all fonts that can render the text") | |
BOOL_PARAM_FLAG (render_per_font, true, "If find_fonts==true, render each font to its own image. " "Image filenames are of the form output_name.font_name.tif") | |
DOUBLE_PARAM_FLAG (min_coverage, 1.0, "If find_fonts==true, the minimum coverage the font has of " "the characters in the text file to include it, between " "0 and 1.") | |
BOOL_PARAM_FLAG (list_available_fonts, false, "List available fonts and quit.") | |
BOOL_PARAM_FLAG (render_ngrams, false, "Put each space-separated entity from the" " input file into one bounding box. The ngrams in the input" " file will be randomly permuted before rendering (so that" " there is sufficient variety of characters on each line).") | |
BOOL_PARAM_FLAG (output_word_boxes, false, "Output word bounding boxes instead of character boxes. " "This is used for Cube training, and implied by " "--render_ngrams.") | |
STRING_PARAM_FLAG (unicharset_file, "", "File with characters in the unicharset. If --render_ngrams" " is true and --unicharset_file is specified, ngrams with" " characters that are not in unicharset will be omitted") | |
BOOL_PARAM_FLAG (bidirectional_rotation, false, "Rotate the generated characters both ways.") | |
BOOL_PARAM_FLAG (only_extract_font_properties, false, "Assumes that the input file contains a list of ngrams. Renders" " each ngram, extracts spacing properties and records them in" " output_base/[font_name].fontinfo file.") | |
BOOL_PARAM_FLAG (output_individual_glyph_images, false, "If true also outputs individual character images") | |
INT_PARAM_FLAG (glyph_resized_size, 0, "Each glyph is square with this side length in pixels") | |
INT_PARAM_FLAG (glyph_num_border_pixels_to_pad, 0, "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad") | |
int | main (int argc, char **argv) |
Variables | |
const int | kRandomSeed = 0x18273645 |
BOOL_PARAM_FLAG | ( | degrade_image | , |
true | , | ||
"Degrade rendered image with speckle | noise, | ||
dilation/erosion " "and rotation" | |||
) |
BOOL_PARAM_FLAG | ( | rotate_image | , |
true | , | ||
"Rotate the image in a random way." | |||
) |
BOOL_PARAM_FLAG | ( | strip_unrenderable_words | , |
true | , | ||
"Remove unrenderable words from source text" | |||
) |
BOOL_PARAM_FLAG | ( | ligatures | , |
false | , | ||
"Rebuild and render ligatures" | |||
) |
BOOL_PARAM_FLAG | ( | find_fonts | , |
false | , | ||
"Search for all fonts that can render the text" | |||
) |
BOOL_PARAM_FLAG | ( | render_per_font | , |
true | , | ||
"If | find_fonts = =true , |
||
render each font to its own image. " "Image filenames are of the form output_name.font_name.tif" | |||
) |
BOOL_PARAM_FLAG | ( | list_available_fonts | , |
false | , | ||
"List available fonts and quit." | |||
) |
BOOL_PARAM_FLAG | ( | render_ngrams | , |
false | , | ||
"Put each space-separated entity from the" " input file into one bounding box. The ngrams in the input" " file will be randomly permuted before rendering (so that" " there is sufficient variety of characters on each line)." | |||
) |
BOOL_PARAM_FLAG | ( | output_word_boxes | , |
false | , | ||
"Output word bounding boxes instead of character boxes. " "This is used for Cube | training, | ||
and implied by " "--render_ngrams." | |||
) |
BOOL_PARAM_FLAG | ( | bidirectional_rotation | , |
false | , | ||
"Rotate the generated characters both ways." | |||
) |
BOOL_PARAM_FLAG | ( | only_extract_font_properties | , |
false | , | ||
"Assumes that the input file contains a list of ngrams. Renders" " each | ngram, | ||
extracts spacing properties and records them in" " output_base/.fontinfo file." | [font_name] | ||
) |
BOOL_PARAM_FLAG | ( | output_individual_glyph_images | , |
false | , | ||
"If true also outputs individual character images" | |||
) |
DOUBLE_PARAM_FLAG | ( | char_spacing | , |
0 | , | ||
"Inter-character space in ems" | |||
) |
DOUBLE_PARAM_FLAG | ( | underline_start_prob | , |
0 | , | ||
"Fraction of words to underline (value in [0,1])" | |||
) |
DOUBLE_PARAM_FLAG | ( | underline_continuation_prob | , |
0 | , | ||
"Fraction of words to underline (value in [0,1])" | |||
) |
DOUBLE_PARAM_FLAG | ( | min_coverage | , |
1. | 0, | ||
"If | find_fonts = =true , |
||
the minimum coverage the font has of " "the characters in the text file to include | it, | ||
between " "0 and 1." | |||
) |
INT_PARAM_FLAG | ( | exposure | , |
0 | , | ||
"Exposure level in photocopier" | |||
) |
INT_PARAM_FLAG | ( | resolution | , |
300 | , | ||
"Pixels per inch" | |||
) |
INT_PARAM_FLAG | ( | xsize | , |
3600 | , | ||
"Width of output image" | |||
) |
INT_PARAM_FLAG | ( | ysize | , |
4800 | , | ||
"Height of output image" | |||
) |
INT_PARAM_FLAG | ( | max_pages | , |
0 | , | ||
"Maximum number of pages to output (0=unlimited)" | |||
) |
INT_PARAM_FLAG | ( | margin | , |
100 | , | ||
"Margin round edges of image" | |||
) |
INT_PARAM_FLAG | ( | ptsize | , |
12 | , | ||
"Size of printed text" | |||
) |
INT_PARAM_FLAG | ( | leading | , |
12 | , | ||
"Inter-line space (in pixels)" | |||
) |
INT_PARAM_FLAG | ( | box_padding | , |
0 | , | ||
"Padding around produced bounding boxes" | |||
) |
INT_PARAM_FLAG | ( | glyph_resized_size | , |
0 | , | ||
"Each glyph is square with this side length in pixels" | |||
) |
INT_PARAM_FLAG | ( | glyph_num_border_pixels_to_pad | , |
0 | |||
) |
int main | ( | int | argc, |
char ** | argv | ||
) |
Definition at line 686 of file text2image.cpp.
STRING_PARAM_FLAG | ( | text | , |
"" | , | ||
"File name of text input to process" | |||
) |
STRING_PARAM_FLAG | ( | outputbase | , |
"" | , | ||
"Basename for output image/box file" | |||
) |
STRING_PARAM_FLAG | ( | writing_mode | , |
"horizontal" | , | ||
"Specify one of the following writing" " modes.\ 'horizontal' :Render regular horizontal text.(default)\ " 'vertical' :Render vertical text. Glyph orientation is" " selected by Pango.\" " 'vertical-upright' :Render vertical text. Glyph " " orientation is set to be upright." | |||
) |
STRING_PARAM_FLAG | ( | font | , |
"Arial" | , | ||
"Font description name to use" | |||
) |
STRING_PARAM_FLAG | ( | unicharset_file | , |
"" | , | ||
"File with characters in the unicharset. If --render_ngrams" " is true and --unicharset_file is | specified, | ||
ngrams with" " characters that are not in unicharset will be omitted" | |||
) |
const int kRandomSeed = 0x18273645 |
Definition at line 58 of file text2image.cpp.