All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseractmain.cpp
Go to the documentation of this file.
1 /**********************************************************************
2 * File: tessedit.cpp (Formerly tessedit.c)
3 * Description: Main program for merge of tess and editor.
4 * Author: Ray Smith
5 * Created: Tue Jan 07 15:21:46 GMT 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include <iostream>
26 
27 #include "allheaders.h"
28 #include "baseapi.h"
29 #include "basedir.h"
30 #include "renderer.h"
31 #include "strngs.h"
32 #include "tprintf.h"
33 #include "openclwrapper.h"
34 #include "osdetect.h"
35 
36 /**********************************************************************
37  * main()
38  *
39  **********************************************************************/
40 
41 int main(int argc, char **argv) {
42  if ((argc == 2 && strcmp(argv[1], "-v") == 0) ||
43  (argc == 2 && strcmp(argv[1], "--version") == 0)) {
44  char *versionStrP;
45 
46  fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());
47 
48  versionStrP = getLeptonicaVersion();
49  fprintf(stderr, " %s\n", versionStrP);
50  lept_free(versionStrP);
51 
52  versionStrP = getImagelibVersions();
53  fprintf(stderr, " %s\n", versionStrP);
54  lept_free(versionStrP);
55 
56 #ifdef USE_OPENCL
57  cl_platform_id platform;
58  cl_uint num_platforms;
59  cl_device_id devices[2];
60  cl_uint num_devices;
61  char info[256];
62  int i;
63 
64  fprintf(stderr, " OpenCL info:\n");
65  clGetPlatformIDs(1, &platform, &num_platforms);
66  fprintf(stderr, " Found %d platforms.\n", num_platforms);
67  clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0);
68  fprintf(stderr, " Platform name: %s.\n", info);
69  clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0);
70  fprintf(stderr, " Version: %s.\n", info);
71  clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices);
72  fprintf(stderr, " Found %d devices.\n", num_devices);
73  for (i = 0; i < num_devices; ++i) {
74  clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0);
75  fprintf(stderr, " Device %d name: %s.\n", i+1, info);
76  }
77 #endif
78  exit(0);
79  }
80 
81  // Make the order of args a bit more forgiving than it used to be.
82  const char* lang = "eng";
83  const char* image = NULL;
84  const char* outputbase = NULL;
85  const char* datapath = NULL;
86  bool noocr = false;
87  bool list_langs = false;
88  bool print_parameters = false;
89  GenericVector<STRING> vars_vec, vars_values;
90 
92  int arg = 1;
93  while (arg < argc && (outputbase == NULL || argv[arg][0] == '-')) {
94  if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) {
95  lang = argv[arg + 1];
96  ++arg;
97  } else if (strcmp(argv[arg], "--tessdata-dir") == 0 && arg + 1 < argc) {
98  datapath = argv[arg + 1];
99  ++arg;
100  } else if (strcmp(argv[arg], "--user-words") == 0 && arg + 1 < argc) {
101  vars_vec.push_back("user_words_file");
102  vars_values.push_back(argv[arg + 1]);
103  ++arg;
104  } else if (strcmp(argv[arg], "--user-patterns") == 0 && arg + 1 < argc) {
105  vars_vec.push_back("user_patterns_file");
106  vars_values.push_back(argv[arg + 1]);
107  ++arg;
108  } else if (strcmp(argv[arg], "--list-langs") == 0) {
109  noocr = true;
110  list_langs = true;
111  } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) {
112  pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1]));
113  ++arg;
114  } else if (strcmp(argv[arg], "--print-parameters") == 0) {
115  noocr = true;
116  print_parameters = true;
117  } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) {
118  // handled properly after api init
119  ++arg;
120  } else if (image == NULL) {
121  image = argv[arg];
122  } else if (outputbase == NULL) {
123  outputbase = argv[arg];
124  }
125  ++arg;
126  }
127 
128  if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
129  list_langs = true;
130  noocr = true;
131  }
132 
133  if (outputbase == NULL && noocr == false) {
134  fprintf(stderr, "Usage:\n %s imagename|stdin outputbase|stdout "
135  "[options...] [configfile...]\n\n", argv[0]);
136 
137  fprintf(stderr, "OCR options:\n");
138  fprintf(stderr, " --tessdata-dir /path\tspecify the location of tessdata"
139  " path\n");
140  fprintf(stderr, " --user-words /path/to/file\tspecify the location of user"
141  " words file\n");
142  fprintf(stderr, " --user-patterns /path/to/file\tspecify the location of"
143  " user patterns file\n");
144  fprintf(stderr, " -l lang[+lang]\tspecify language(s) used for OCR\n");
145  fprintf(stderr, " -c configvar=value\tset value for control parameter.\n"
146  "\t\t\tMultiple -c arguments are allowed.\n");
147  fprintf(stderr, " -psm pagesegmode\tspecify page segmentation mode.\n");
148  fprintf(stderr, "These options must occur before any configfile.\n\n");
149  fprintf(stderr,
150  "pagesegmode values are:\n"
151  " 0 = Orientation and script detection (OSD) only.\n"
152  " 1 = Automatic page segmentation with OSD.\n"
153  " 2 = Automatic page segmentation, but no OSD, or OCR\n"
154  " 3 = Fully automatic page segmentation, but no OSD. (Default)\n"
155  " 4 = Assume a single column of text of variable sizes.\n"
156  " 5 = Assume a single uniform block of vertically aligned text.\n"
157  " 6 = Assume a single uniform block of text.\n"
158  " 7 = Treat the image as a single text line.\n"
159  " 8 = Treat the image as a single word.\n"
160  " 9 = Treat the image as a single word in a circle.\n"
161  " 10 = Treat the image as a single character.\n\n");
162  fprintf(stderr, "Single options:\n");
163  fprintf(stderr, " -v --version: version info\n");
164  fprintf(stderr, " --list-langs: list available languages for tesseract "
165  "engine. Can be used with --tessdata-dir.\n");
166  fprintf(stderr, " --print-parameters: print tesseract parameters to the "
167  "stdout.\n");
168  exit(1);
169  }
170 
171  if (outputbase != NULL && strcmp(outputbase, "-") &&
172  strcmp(outputbase, "stdout")) {
173  tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
175  }
176  PERF_COUNT_START("Tesseract:main")
178 
179  api.SetOutputName(outputbase);
180  int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT,
181  &(argv[arg]), argc - arg, &vars_vec, &vars_values, false);
182 
183  if (rc) {
184  fprintf(stderr, "Could not initialize tesseract.\n");
185  exit(1);
186  }
187 
188  char opt1[255], opt2[255];
189  for (arg = 0; arg < argc; arg++) {
190  if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) {
191  strncpy(opt1, argv[arg + 1], 255);
192  char *p = strchr(opt1, '=');
193  if (!p) {
194  fprintf(stderr, "Missing = in configvar assignment\n");
195  exit(1);
196  }
197  *p = 0;
198  strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255);
199  opt2[254] = 0;
200  ++arg;
201 
202  if (!api.SetVariable(opt1, opt2)) {
203  fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
204  }
205  }
206  }
207 
208  if (list_langs) {
209  GenericVector<STRING> languages;
210  api.GetAvailableLanguagesAsVector(&languages);
211  fprintf(stderr, "List of available languages (%d):\n",
212  languages.size());
213  for (int index = 0; index < languages.size(); ++index) {
214  STRING& string = languages[index];
215  fprintf(stderr, "%s\n", string.string());
216  }
217  api.End();
218  exit(0);
219  }
220 
221  if (print_parameters) {
222  FILE* fout = stdout;
223  fprintf(stdout, "Tesseract parameters:\n");
224  api.PrintVariables(fout);
225  api.End();
226  exit(0);
227  }
228 
229  // We have 2 possible sources of pagesegmode: a config file and
230  // the command line. For backwards compatability reasons, the
231  // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
232  // default for this program is tesseract::PSM_AUTO. We will let
233  // the config file take priority, so the command-line default
234  // can take priority over the tesseract default, so we use the
235  // value from the command line only if the retrieved mode
236  // is still tesseract::PSM_SINGLE_BLOCK, indicating no change
237  // in any config file. Therefore the only way to force
238  // tesseract::PSM_SINGLE_BLOCK is from the command line.
239  // It would be simpler if we could set the value before Init,
240  // but that doesn't work.
242  api.SetPageSegMode(pagesegmode);
243 
244  if (pagesegmode == tesseract::PSM_AUTO_ONLY ||
245  pagesegmode == tesseract::PSM_OSD_ONLY) {
246  int ret_val = 0;
247 
248  Pix* pixs = pixRead(image);
249  if (!pixs) {
250  fprintf(stderr, "Cannot open input file: %s\n", image);
251  exit(2);
252  }
253  api.SetImage(pixs);
254 
255  if (pagesegmode == tesseract::PSM_OSD_ONLY) {
256  OSResults osr;
257  if (api.DetectOS(&osr)) {
258  int orient = osr.best_result.orientation_id;
259  int script_id = osr.get_best_script(orient);
260  float orient_oco = osr.best_result.oconfidence;
261  float orient_sco = osr.best_result.sconfidence;
262  tprintf("Orientation: %d\nOrientation in degrees: %d\n" \
263  "Orientation confidence: %.2f\n" \
264  "Script: %d\nScript confidence: %.2f\n",
265  orient, OrientationIdToValue(orient), orient_oco,
266  script_id, orient_sco);
267  } else {
268  ret_val = 1;
269  }
270  } else {
271  tesseract::Orientation orientation;
274  float deskew_angle;
276  if (it) {
277  it->Orientation(&orientation, &direction, &order, &deskew_angle);
278  tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
279  "Deskew angle: %.4f\n",
280  orientation, direction, order, deskew_angle);
281  } else {
282  ret_val = 1;
283  }
284  delete it;
285  }
286  pixDestroy(&pixs);
287  exit(ret_val);
288  }
289 
290  bool b;
292  api.GetBoolVariable("tessedit_create_hocr", &b);
293  if (b) {
294  bool font_info;
295  api.GetBoolVariable("hocr_font_info", &font_info);
296  renderers.push_back(new tesseract::TessHOcrRenderer(outputbase, font_info));
297  }
298  api.GetBoolVariable("tessedit_create_pdf", &b);
299  if (b) {
300  renderers.push_back(new tesseract::TessPDFRenderer(outputbase,
301  api.GetDatapath()));
302  }
303  api.GetBoolVariable("tessedit_write_unlv", &b);
304  if (b) renderers.push_back(new tesseract::TessUnlvRenderer(outputbase));
305  api.GetBoolVariable("tessedit_create_boxfile", &b);
306  if (b) renderers.push_back(new tesseract::TessBoxTextRenderer(outputbase));
307  api.GetBoolVariable("tessedit_create_txt", &b);
308  if (b) renderers.push_back(new tesseract::TessTextRenderer(outputbase));
309  if (!renderers.empty()) {
310  // Since the PointerVector auto-deletes, null-out the renderers that are
311  // added to the root, and leave the root in the vector.
312  for (int r = 1; r < renderers.size(); ++r) {
313  renderers[0]->insert(renderers[r]);
314  renderers[r] = NULL;
315  }
316  if (!api.ProcessPages(image, NULL, 0, renderers[0])) {
317  fprintf(stderr, "Error during processing.\n");
318  exit(1);
319  }
320  }
321 
323  return 0; // Normal exit
324 }
static const char * Version()
Definition: baseapi.cpp:142
int size() const
Definition: genericvector.h:72
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2184
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:370
int push_back(T object)
PageIterator * AnalyseLayout()
Definition: baseapi.h:498
#define tprintf(...)
Definition: tprintf.h:31
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:216
float sconfidence
Definition: osdetect.h:43
const char * GetDatapath()
Definition: baseapi.cpp:954
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:467
int orientation_id
Definition: osdetect.h:41
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:236
float oconfidence
Definition: osdetect.h:44
void insert(T *t, int index)
Orientation and script detection only.
Definition: publictypes.h:152
const int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:563
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1070
#define PERF_COUNT_START(FUNCT_NAME)
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:276
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:160
void SetOutputName(const char *name)
Definition: baseapi.cpp:209
bool empty() const
Definition: genericvector.h:84
#define PERF_COUNT_END
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:117
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:156
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:264
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:460
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:525
OSBestResult best_result
Definition: osdetect.h:79
int main(int argc, char **argv)