|
tesseract
4.0.0-1-g2a2b
|
#include <cstdlib>#include <cstring>#include <algorithm>#include <iostream>#include <map>#include <string>#include <utility>#include <vector>#include "allheaders.h"#include "boxchar.h"#include "commandlineflags.h"#include "commontraining.h"#include "degradeimage.h"#include "errcode.h"#include "fileio.h"#include "helpers.h"#include "normstrngs.h"#include "stringrenderer.h"#include "tlog.h"#include "unicharset.h"#include "util.h"Go to the source code of this file.
Classes | |
| struct | tesseract::SpacingProperties |
Namespaces | |
| tesseract | |
Functions | |
| STRING_PARAM_FLAG (text, "", "File name of text input to process") | |
| STRING_PARAM_FLAG (outputbase, "", "Basename for output image/box file") | |
| BOOL_PARAM_FLAG (degrade_image, true, "Degrade rendered image with speckle noise, dilation/erosion " "and rotation") | |
| BOOL_PARAM_FLAG (rotate_image, true, "Rotate the image in a random way.") | |
| INT_PARAM_FLAG (exposure, 0, "Exposure level in photocopier") | |
| INT_PARAM_FLAG (resolution, 300, "Pixels per inch") | |
| INT_PARAM_FLAG (xsize, 3600, "Width of output image") | |
| INT_PARAM_FLAG (ysize, 4800, "Height of output image") | |
| INT_PARAM_FLAG (max_pages, 0, "Maximum number of pages to output (0=unlimited)") | |
| INT_PARAM_FLAG (margin, 100, "Margin round edges of image") | |
| INT_PARAM_FLAG (ptsize, 12, "Size of printed text") | |
| DOUBLE_PARAM_FLAG (char_spacing, 0, "Inter-character space in ems") | |
| DOUBLE_PARAM_FLAG (underline_start_prob, 0, "Fraction of words to underline (value in [0,1])") | |
| DOUBLE_PARAM_FLAG (underline_continuation_prob, 0, "Fraction of words to underline (value in [0,1])") | |
| INT_PARAM_FLAG (leading, 12, "Inter-line space (in pixels)") | |
| STRING_PARAM_FLAG (writing_mode, "horizontal", "Specify one of the following writing" " modes.\ " 'horizontal' :Render regular horizontal text.(default)\" " 'vertical' :Render vertical text. Glyph orientation is" " selected by Pango.\" " 'vertical-upright' :Render vertical text. Glyph " " orientation is set to be upright.") | |
| INT_PARAM_FLAG (box_padding, 0, "Padding around produced bounding boxes") | |
| BOOL_PARAM_FLAG (strip_unrenderable_words, true, "Remove unrenderable words from source text") | |
| STRING_PARAM_FLAG (font, "Arial", "Font description name to use") | |
| BOOL_PARAM_FLAG (ligatures, false, "Rebuild and render ligatures") | |
| BOOL_PARAM_FLAG (find_fonts, false, "Search for all fonts that can render the text") | |
| BOOL_PARAM_FLAG (render_per_font, true, "If find_fonts==true, render each font to its own image. " "Image filenames are of the form output_name.font_name.tif") | |
| DOUBLE_PARAM_FLAG (min_coverage, 1.0, "If find_fonts==true, the minimum coverage the font has of " "the characters in the text file to include it, between " "0 and 1.") | |
| BOOL_PARAM_FLAG (list_available_fonts, false, "List available fonts and quit.") | |
| BOOL_PARAM_FLAG (render_ngrams, false, "Put each space-separated entity from the" " input file into one bounding box. The ngrams in the input" " file will be randomly permuted before rendering (so that" " there is sufficient variety of characters on each line).") | |
| BOOL_PARAM_FLAG (output_word_boxes, false, "Output word bounding boxes instead of character boxes. " "This is used for Cube training, and implied by " "--render_ngrams.") | |
| STRING_PARAM_FLAG (unicharset_file, "", "File with characters in the unicharset. If --render_ngrams" " is true and --unicharset_file is specified, ngrams with" " characters that are not in unicharset will be omitted") | |
| BOOL_PARAM_FLAG (bidirectional_rotation, false, "Rotate the generated characters both ways.") | |
| BOOL_PARAM_FLAG (only_extract_font_properties, false, "Assumes that the input file contains a list of ngrams. Renders" " each ngram, extracts spacing properties and records them in" " output_base/[font_name].fontinfo file.") | |
| BOOL_PARAM_FLAG (output_individual_glyph_images, false, "If true also outputs individual character images") | |
| INT_PARAM_FLAG (glyph_resized_size, 0, "Each glyph is square with this side length in pixels") | |
| INT_PARAM_FLAG (glyph_num_border_pixels_to_pad, 0, "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad") | |
| int | main (int argc, char **argv) |
Variables | |
| const int | kRandomSeed = 0x18273645 |
| BOOL_PARAM_FLAG | ( | degrade_image | , |
| true | , | ||
| "Degrade rendered image with speckle | noise, | ||
| dilation/erosion " "and rotation" | |||
| ) |
| BOOL_PARAM_FLAG | ( | rotate_image | , |
| true | , | ||
| "Rotate the image in a random way." | |||
| ) |
| BOOL_PARAM_FLAG | ( | strip_unrenderable_words | , |
| true | , | ||
| "Remove unrenderable words from source text" | |||
| ) |
| BOOL_PARAM_FLAG | ( | ligatures | , |
| false | , | ||
| "Rebuild and render ligatures" | |||
| ) |
| BOOL_PARAM_FLAG | ( | find_fonts | , |
| false | , | ||
| "Search for all fonts that can render the text" | |||
| ) |
| BOOL_PARAM_FLAG | ( | render_per_font | , |
| true | , | ||
| "If | find_fonts = =true, |
||
| render each font to its own image. " "Image filenames are of the form output_name.font_name.tif" | |||
| ) |
| BOOL_PARAM_FLAG | ( | list_available_fonts | , |
| false | , | ||
| "List available fonts and quit." | |||
| ) |
| BOOL_PARAM_FLAG | ( | render_ngrams | , |
| false | , | ||
| "Put each space-separated entity from the" " input file into one bounding box. The ngrams in the input" " file will be randomly permuted before rendering (so that" " there is sufficient variety of characters on each line)." | |||
| ) |
| BOOL_PARAM_FLAG | ( | output_word_boxes | , |
| false | , | ||
| "Output word bounding boxes instead of character boxes. " "This is used for Cube | training, | ||
| and implied by " "--render_ngrams." | |||
| ) |
| BOOL_PARAM_FLAG | ( | bidirectional_rotation | , |
| false | , | ||
| "Rotate the generated characters both ways." | |||
| ) |
| BOOL_PARAM_FLAG | ( | only_extract_font_properties | , |
| false | , | ||
| "Assumes that the input file contains a list of ngrams. Renders" " each | ngram, | ||
| extracts spacing properties and records them in" " output_base/.fontinfo file." | [font_name] | ||
| ) |
| BOOL_PARAM_FLAG | ( | output_individual_glyph_images | , |
| false | , | ||
| "If true also outputs individual character images" | |||
| ) |
| DOUBLE_PARAM_FLAG | ( | char_spacing | , |
| 0 | , | ||
| "Inter-character space in ems" | |||
| ) |
| DOUBLE_PARAM_FLAG | ( | underline_start_prob | , |
| 0 | , | ||
| "Fraction of words to underline (value in [0,1])" | |||
| ) |
| DOUBLE_PARAM_FLAG | ( | underline_continuation_prob | , |
| 0 | , | ||
| "Fraction of words to underline (value in [0,1])" | |||
| ) |
| DOUBLE_PARAM_FLAG | ( | min_coverage | , |
| 1. | 0, | ||
| "If | find_fonts = =true, |
||
| the minimum coverage the font has of " "the characters in the text file to include | it, | ||
| between " "0 and 1." | |||
| ) |
| INT_PARAM_FLAG | ( | exposure | , |
| 0 | , | ||
| "Exposure level in photocopier" | |||
| ) |
| INT_PARAM_FLAG | ( | resolution | , |
| 300 | , | ||
| "Pixels per inch" | |||
| ) |
| INT_PARAM_FLAG | ( | xsize | , |
| 3600 | , | ||
| "Width of output image" | |||
| ) |
| INT_PARAM_FLAG | ( | ysize | , |
| 4800 | , | ||
| "Height of output image" | |||
| ) |
| INT_PARAM_FLAG | ( | max_pages | , |
| 0 | , | ||
| "Maximum number of pages to output (0=unlimited)" | |||
| ) |
| INT_PARAM_FLAG | ( | margin | , |
| 100 | , | ||
| "Margin round edges of image" | |||
| ) |
| INT_PARAM_FLAG | ( | ptsize | , |
| 12 | , | ||
| "Size of printed text" | |||
| ) |
| INT_PARAM_FLAG | ( | leading | , |
| 12 | , | ||
| "Inter-line space (in pixels)" | |||
| ) |
| INT_PARAM_FLAG | ( | box_padding | , |
| 0 | , | ||
| "Padding around produced bounding boxes" | |||
| ) |
| INT_PARAM_FLAG | ( | glyph_resized_size | , |
| 0 | , | ||
| "Each glyph is square with this side length in pixels" | |||
| ) |
| INT_PARAM_FLAG | ( | glyph_num_border_pixels_to_pad | , |
| 0 | |||
| ) |
| int main | ( | int | argc, |
| char ** | argv | ||
| ) |
Definition at line 686 of file text2image.cpp.
| STRING_PARAM_FLAG | ( | text | , |
| "" | , | ||
| "File name of text input to process" | |||
| ) |
| STRING_PARAM_FLAG | ( | outputbase | , |
| "" | , | ||
| "Basename for output image/box file" | |||
| ) |
| STRING_PARAM_FLAG | ( | writing_mode | , |
| "horizontal" | , | ||
| "Specify one of the following writing" " modes.\ 'horizontal' :Render regular horizontal text.(default)\ " 'vertical' :Render vertical text. Glyph orientation is" " selected by Pango.\" " 'vertical-upright' :Render vertical text. Glyph " " orientation is set to be upright." | |||
| ) |
| STRING_PARAM_FLAG | ( | font | , |
| "Arial" | , | ||
| "Font description name to use" | |||
| ) |
| STRING_PARAM_FLAG | ( | unicharset_file | , |
| "" | , | ||
| "File with characters in the unicharset. If --render_ngrams" " is true and --unicharset_file is | specified, | ||
| ngrams with" " characters that are not in unicharset will be omitted" | |||
| ) |
| const int kRandomSeed = 0x18273645 |
Definition at line 58 of file text2image.cpp.