tesseract  4.0.0-1-g2a2b
baseapi.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: baseapi.cpp
3  * Description: Simple API for calling tesseract.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 2006, Google Inc.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 #include "config_auto.h"
22 #endif
23 
24 #include "baseapi.h"
25 #ifdef __linux__
26 #include <csignal> // for sigaction, SA_RESETHAND, SIGBUS, SIGFPE
27 #endif
28 
29 #if defined(_WIN32)
30 #if defined(__MINGW32__)
31 // workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
32 #undef __STRICT_ANSI__
33 #endif // __MINGW32__
34 #include <fcntl.h>
35 #include <io.h>
36 #else
37 #include <dirent.h> // for closedir, opendir, readdir, DIR, dirent
38 #include <libgen.h>
39 #include <sys/types.h>
40 #include <sys/stat.h> // for stat, S_IFDIR
41 #include <unistd.h>
42 #endif // _WIN32
43 
44 #include <clocale> // for LC_ALL, LC_CTYPE, LC_NUMERIC
45 #include <cmath> // for round, M_PI
46 #include <cstdint> // for int32_t
47 #include <cstring> // for strcmp, strcpy
48 #include <fstream> // for size_t
49 #include <iostream> // for std::cin
50 #include <memory> // for std::unique_ptr
51 #include <set> // for std::pair
52 #include <vector> // for std::vector
53 #include "allheaders.h" // for pixDestroy, boxCreate, boxaAddBox, box...
54 #include "blobclass.h" // for ExtractFontName
55 #include "boxword.h" // for BoxWord
56 #include "config_auto.h" // for PACKAGE_VERSION
57 #include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE_LIST
58 #include "dawg_cache.h" // for DawgCache
59 #include "dict.h" // for Dict
60 #include "edgblob.h" // for extract_edges
61 #include "elst.h" // for ELIST_ITERATOR, ELISTIZE, ELISTIZEH
62 #include "environ.h" // for l_uint8, FALSE, TRUE
63 #include "equationdetect.h" // for EquationDetect
64 #include "errcode.h" // for ASSERT_HOST
65 #include "globaloc.h" // for SavePixForCrash, signal_exit
66 #include "helpers.h" // for IntCastRounded, chomp_string
67 #include "host.h" // for BOOL8
68 #include "imageio.h" // for IFF_TIFF_G4, IFF_TIFF, IFF_TIFF_G3
69 #include "intfx.h" // for INT_FX_RESULT_STRUCT
70 #include "mutableiterator.h" // for MutableIterator
71 #include "normalis.h" // for kBlnBaselineOffset, kBlnXHeight
72 #include "ocrclass.h" // for ETEXT_DESC
73 #include "openclwrapper.h" // for PERF_COUNT_END, PERF_COUNT_START, PERF...
74 #include "osdetect.h" // for OSResults, OSBestResult, OrientationId...
75 #include "pageres.h" // for PAGE_RES_IT, WERD_RES, PAGE_RES, CR_DE...
76 #include "paragraphs.h" // for DetectParagraphs
77 #include "params.h" // for BoolParam, IntParam, DoubleParam, Stri...
78 #include "pdblock.h" // for PDBLK
79 #include "points.h" // for FCOORD
80 #include "polyblk.h" // for POLY_BLOCK
81 #include "rect.h" // for TBOX
82 #include "renderer.h" // for TessResultRenderer
83 #include "resultiterator.h" // for ResultIterator
84 #include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST
85 #include "strngs.h" // for STRING
86 #include "tessdatamanager.h" // for TessdataManager, kTrainedDataSuffix
87 #include "tesseractclass.h" // for Tesseract
88 #include "thresholder.h" // for ImageThresholder
89 #include "tprintf.h" // for tprintf
90 #include "werd.h" // for WERD, WERD_IT, W_FUZZY_NON, W_FUZZY_SP
91 
92 BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin");
93 
94 namespace tesseract {
95 
97 const int kMinRectSize = 10;
99 const char kTesseractReject = '~';
101 const char kUNLVReject = '~';
103 const char kUNLVSuspect = '^';
108 const char* kInputFile = "noname.tif";
112 const char* kOldVarsFile = "failed_vars.txt";
114 const int kMaxIntSize = 22;
115 
116 /* Add all available languages recursively.
117 */
118 static void addAvailableLanguages(const STRING &datadir, const STRING &base,
119  GenericVector<STRING>* langs)
120 {
121  const STRING base2 = (base.string()[0] == '\0') ? base : base + "/";
122  const size_t extlen = sizeof(kTrainedDataSuffix);
123 #ifdef _WIN32
124  WIN32_FIND_DATA data;
125  HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data);
126  if (handle != INVALID_HANDLE_VALUE) {
127  BOOL result = TRUE;
128  for (; result;) {
129  char *name = data.cFileName;
130  // Skip '.', '..', and hidden files
131  if (name[0] != '.') {
132  if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ==
133  FILE_ATTRIBUTE_DIRECTORY) {
134  addAvailableLanguages(datadir, base2 + name, langs);
135  } else {
136  size_t len = strlen(name);
137  if (len > extlen && name[len - extlen] == '.' &&
138  strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
139  name[len - extlen] = '\0';
140  langs->push_back(base2 + name);
141  }
142  }
143  }
144  result = FindNextFile(handle, &data);
145  }
146  FindClose(handle);
147  }
148 #else // _WIN32
149  DIR* dir = opendir((datadir + base).string());
150  if (dir != nullptr) {
151  dirent *de;
152  while ((de = readdir(dir))) {
153  char *name = de->d_name;
154  // Skip '.', '..', and hidden files
155  if (name[0] != '.') {
156  struct stat st;
157  if (stat((datadir + base2 + name).string(), &st) == 0 &&
158  (st.st_mode & S_IFDIR) == S_IFDIR) {
159  addAvailableLanguages(datadir, base2 + name, langs);
160  } else {
161  size_t len = strlen(name);
162  if (len > extlen && name[len - extlen] == '.' &&
163  strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
164  name[len - extlen] = '\0';
165  langs->push_back(base2 + name);
166  }
167  }
168  }
169  }
170  closedir(dir);
171  }
172 #endif
173 }
174 
175 // Compare two STRING values (used for sorting).
176 static int CompareSTRING(const void* p1, const void* p2) {
177  const STRING* s1 = static_cast<const STRING*>(p1);
178  const STRING* s2 = static_cast<const STRING*>(p2);
179  return strcmp(s1->c_str(), s2->c_str());
180 }
181 
183  : tesseract_(nullptr),
184  osd_tesseract_(nullptr),
185  equ_detect_(nullptr),
186  reader_(nullptr),
187  // Thresholder is initialized to nullptr here, but will be set before use by:
188  // A constructor of a derived API, SetThresholder(), or
189  // created implicitly when used in InternalSetImage.
190  thresholder_(nullptr),
191  paragraph_models_(nullptr),
192  block_list_(nullptr),
193  page_res_(nullptr),
194  input_file_(nullptr),
195  output_file_(nullptr),
196  datapath_(nullptr),
197  language_(nullptr),
198  last_oem_requested_(OEM_DEFAULT),
199  recognition_done_(false),
200  truth_cb_(nullptr),
201  rect_left_(0),
202  rect_top_(0),
203  rect_width_(0),
204  rect_height_(0),
205  image_width_(0),
206  image_height_(0) {
207  const char *locale;
208  locale = std::setlocale(LC_ALL, nullptr);
209  ASSERT_HOST(!strcmp(locale, "C"));
210  locale = std::setlocale(LC_CTYPE, nullptr);
211  ASSERT_HOST(!strcmp(locale, "C"));
212  locale = std::setlocale(LC_NUMERIC, nullptr);
213  ASSERT_HOST(!strcmp(locale, "C"));
214 }
215 
217  End();
218 }
219 
223 const char* TessBaseAPI::Version() {
224  return PACKAGE_VERSION;
225 }
226 
234 #ifdef USE_OPENCL
235 #ifdef USE_DEVICE_SELECTION
236 #include "opencl_device_selection.h"
237 #endif
238 #endif
239 size_t TessBaseAPI::getOpenCLDevice(void **data) {
240 #ifdef USE_OPENCL
241 #ifdef USE_DEVICE_SELECTION
242  ds_device device = OpenclDevice::getDeviceSelection();
243  if (device.type == DS_DEVICE_OPENCL_DEVICE) {
244  *data = new cl_device_id;
245  memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
246  return sizeof(cl_device_id);
247  }
248 #endif
249 #endif
250 
251  *data = nullptr;
252  return 0;
253 }
254 
260 #ifdef __linux__
261  struct sigaction action;
262  memset(&action, 0, sizeof(action));
263  action.sa_handler = &signal_exit;
264  action.sa_flags = SA_RESETHAND;
265  sigaction(SIGSEGV, &action, nullptr);
266  sigaction(SIGFPE, &action, nullptr);
267  sigaction(SIGBUS, &action, nullptr);
268 #else
269  // Warn API users that an implementation is needed.
270  tprintf("CatchSignals has no non-linux implementation!\n");
271 #endif
272 }
273 
278 void TessBaseAPI::SetInputName(const char* name) {
279  if (input_file_ == nullptr)
280  input_file_ = new STRING(name);
281  else
282  *input_file_ = name;
283 }
284 
286 void TessBaseAPI::SetOutputName(const char* name) {
287  if (output_file_ == nullptr)
288  output_file_ = new STRING(name);
289  else
290  *output_file_ = name;
291 }
292 
293 bool TessBaseAPI::SetVariable(const char* name, const char* value) {
294  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
296  tesseract_->params());
297 }
298 
299 bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
300  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
302  tesseract_->params());
303 }
304 
305 bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
306  IntParam *p = ParamUtils::FindParam<IntParam>(
308  if (p == nullptr) return false;
309  *value = (int32_t)(*p);
310  return true;
311 }
312 
313 bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
314  BoolParam *p = ParamUtils::FindParam<BoolParam>(
316  if (p == nullptr) return false;
317  *value = (BOOL8)(*p);
318  return true;
319 }
320 
321 const char *TessBaseAPI::GetStringVariable(const char *name) const {
322  StringParam *p = ParamUtils::FindParam<StringParam>(
324  return (p != nullptr) ? p->string() : nullptr;
325 }
326 
327 bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
328  DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
330  if (p == nullptr) return false;
331  *value = (double)(*p);
332  return true;
333 }
334 
336 bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) {
337  return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
338 }
339 
341 void TessBaseAPI::PrintVariables(FILE *fp) const {
343 }
344 
353 int TessBaseAPI::Init(const char* datapath, const char* language,
354  OcrEngineMode oem, char **configs, int configs_size,
355  const GenericVector<STRING> *vars_vec,
356  const GenericVector<STRING> *vars_values,
357  bool set_only_non_debug_params) {
358  return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
359  vars_values, set_only_non_debug_params, nullptr);
360 }
361 
362 // In-memory version reads the traineddata file directly from the given
363 // data[data_size] array. Also implements the version with a datapath in data,
364 // flagged by data_size = 0.
365 int TessBaseAPI::Init(const char* data, int data_size, const char* language,
366  OcrEngineMode oem, char** configs, int configs_size,
367  const GenericVector<STRING>* vars_vec,
368  const GenericVector<STRING>* vars_values,
369  bool set_only_non_debug_params, FileReader reader) {
370  PERF_COUNT_START("TessBaseAPI::Init")
371  // Default language is "eng".
372  if (language == nullptr) language = "eng";
373  STRING datapath = data_size == 0 ? data : language;
374  // If the datapath, OcrEngineMode or the language have changed - start again.
375  // Note that the language_ field stores the last requested language that was
376  // initialized successfully, while tesseract_->lang stores the language
377  // actually used. They differ only if the requested language was nullptr, in
378  // which case tesseract_->lang is set to the Tesseract default ("eng").
379  if (tesseract_ != nullptr &&
380  (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
382  (*language_ != language && tesseract_->lang != language))) {
383  delete tesseract_;
384  tesseract_ = nullptr;
385  }
386  // PERF_COUNT_SUB("delete tesseract_")
387 #ifdef USE_OPENCL
388  OpenclDevice od;
389  od.InitEnv();
390 #endif
391  PERF_COUNT_SUB("OD::InitEnv()")
392  bool reset_classifier = true;
393  if (tesseract_ == nullptr) {
394  reset_classifier = false;
395  tesseract_ = new Tesseract;
396  if (reader != nullptr) reader_ = reader;
398  if (data_size != 0) {
399  mgr.LoadMemBuffer(language, data, data_size);
400  }
402  datapath.string(),
403  output_file_ != nullptr ? output_file_->string() : nullptr,
404  language, oem, configs, configs_size, vars_vec, vars_values,
405  set_only_non_debug_params, &mgr) != 0) {
406  return -1;
407  }
408  }
409 
410  PERF_COUNT_SUB("update tesseract_")
411  // Update datapath and language requested for the last valid initialization.
412  if (datapath_ == nullptr)
413  datapath_ = new STRING(datapath);
414  else
415  *datapath_ = datapath;
416  if ((strcmp(datapath_->string(), "") == 0) &&
417  (strcmp(tesseract_->datadir.string(), "") != 0))
419 
420  if (language_ == nullptr)
421  language_ = new STRING(language);
422  else
423  *language_ = language;
425 
426 #ifndef DISABLED_LEGACY_ENGINE
427  // PERF_COUNT_SUB("update last_oem_requested_")
428  // For same language and datapath, just reset the adaptive classifier.
429  if (reset_classifier) {
431  PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
432  }
433 #endif // ndef DISABLED_LEGACY_ENGINE
435  return 0;
436 }
437 
447  return (language_ == nullptr || language_->string() == nullptr) ?
448  "" : language_->string();
449 }
450 
457  GenericVector<STRING>* langs) const {
458  langs->clear();
459  if (tesseract_ != nullptr) {
460  langs->push_back(tesseract_->lang);
461  int num_subs = tesseract_->num_sub_langs();
462  for (int i = 0; i < num_subs; ++i)
463  langs->push_back(tesseract_->get_sub_lang(i)->lang);
464  }
465 }
466 
471  GenericVector<STRING>* langs) const {
472  langs->clear();
473  if (tesseract_ != nullptr) {
474  addAvailableLanguages(tesseract_->datadir, "", langs);
475  langs->sort(CompareSTRING);
476  }
477 }
478 
479 //TODO(amit): Adapt to lstm
480 #ifndef DISABLED_LEGACY_ENGINE
481 
487 int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
488  if (tesseract_ == nullptr)
489  tesseract_ = new Tesseract;
490  else
492  TessdataManager mgr;
493  return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
494 }
495 #endif // ndef DISABLED_LEGACY_ENGINE
496 
502  if (tesseract_ == nullptr) {
503  tesseract_ = new Tesseract;
504  #ifndef DISABLED_LEGACY_ENGINE
506  #endif
507  }
508 }
509 
515 void TessBaseAPI::ReadConfigFile(const char* filename) {
517 }
518 
520 void TessBaseAPI::ReadDebugConfigFile(const char* filename) {
522 }
523 
530  if (tesseract_ == nullptr)
531  tesseract_ = new Tesseract;
532  tesseract_->tessedit_pageseg_mode.set_value(mode);
533 }
534 
537  if (tesseract_ == nullptr)
538  return PSM_SINGLE_BLOCK;
539  return static_cast<PageSegMode>(
540  static_cast<int>(tesseract_->tessedit_pageseg_mode));
541 }
542 
556 char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
557  int bytes_per_pixel,
558  int bytes_per_line,
559  int left, int top,
560  int width, int height) {
561  if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
562  return nullptr; // Nothing worth doing.
563 
564  // Since this original api didn't give the exact size of the image,
565  // we have to invent a reasonable value.
566  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
567  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
568  bytes_per_pixel, bytes_per_line);
569  SetRectangle(left, top, width, height);
570 
571  return GetUTF8Text();
572 }
573 
574 #ifndef DISABLED_LEGACY_ENGINE
575 
580  if (tesseract_ == nullptr)
581  return;
584 }
585 #endif // ndef DISABLED_LEGACY_ENGINE
586 
594 void TessBaseAPI::SetImage(const unsigned char* imagedata,
595  int width, int height,
596  int bytes_per_pixel, int bytes_per_line) {
597  if (InternalSetImage()) {
598  thresholder_->SetImage(imagedata, width, height,
599  bytes_per_pixel, bytes_per_line);
601  }
602 }
603 
605  if (thresholder_)
607  else
608  tprintf("Please call SetImage before SetSourceResolution.\n");
609 }
610 
619 void TessBaseAPI::SetImage(Pix* pix) {
620  if (InternalSetImage()) {
621  if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
622  // remove alpha channel from png
623  PIX* p1 = pixRemoveAlpha(pix);
624  pixSetSpp(p1, 3);
625  pix = pixCopy(nullptr, p1);
626  pixDestroy(&p1);
627  }
628  thresholder_->SetImage(pix);
630  }
631 }
632 
638 void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
639  if (thresholder_ == nullptr)
640  return;
641  thresholder_->SetRectangle(left, top, width, height);
642  ClearResults();
643 }
644 
650  if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
651  if (tesseract_->pix_binary() == nullptr &&
653  return nullptr;
654  }
655  return pixClone(tesseract_->pix_binary());
656 }
657 
663 Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
664  return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
665 }
666 
675 Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
676  Pixa** pixa, int** blockids, int** paraids) {
677  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
678  pixa, blockids, paraids);
679 }
680 
689 Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
690  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
691 }
692 
698 Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
699  return GetComponentImages(RIL_WORD, true, pixa, nullptr);
700 }
701 
709  return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
710 }
711 
721  bool text_only, bool raw_image,
722  const int raw_padding,
723  Pixa** pixa, int** blockids,
724  int** paraids) {
725  PageIterator* page_it = GetIterator();
726  if (page_it == nullptr)
727  page_it = AnalyseLayout();
728  if (page_it == nullptr)
729  return nullptr; // Failed.
730 
731  // Count the components to get a size for the arrays.
732  int component_count = 0;
733  int left, top, right, bottom;
734 
735  TessResultCallback<bool>* get_bbox = nullptr;
736  if (raw_image) {
737  // Get bounding box in original raw image with padding.
739  level, raw_padding,
740  &left, &top, &right, &bottom);
741  } else {
742  // Get bounding box from binarized imaged. Note that this could be
743  // differently scaled from the original image.
744  get_bbox = NewPermanentTessCallback(page_it,
746  level, &left, &top, &right, &bottom);
747  }
748  do {
749  if (get_bbox->Run() &&
750  (!text_only || PTIsTextType(page_it->BlockType())))
751  ++component_count;
752  } while (page_it->Next(level));
753 
754  Boxa* boxa = boxaCreate(component_count);
755  if (pixa != nullptr)
756  *pixa = pixaCreate(component_count);
757  if (blockids != nullptr)
758  *blockids = new int[component_count];
759  if (paraids != nullptr)
760  *paraids = new int[component_count];
761 
762  int blockid = 0;
763  int paraid = 0;
764  int component_index = 0;
765  page_it->Begin();
766  do {
767  if (get_bbox->Run() &&
768  (!text_only || PTIsTextType(page_it->BlockType()))) {
769  Box* lbox = boxCreate(left, top, right - left, bottom - top);
770  boxaAddBox(boxa, lbox, L_INSERT);
771  if (pixa != nullptr) {
772  Pix* pix = nullptr;
773  if (raw_image) {
774  pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
775  &top);
776  } else {
777  pix = page_it->GetBinaryImage(level);
778  }
779  pixaAddPix(*pixa, pix, L_INSERT);
780  pixaAddBox(*pixa, lbox, L_CLONE);
781  }
782  if (paraids != nullptr) {
783  (*paraids)[component_index] = paraid;
784  if (page_it->IsAtFinalElement(RIL_PARA, level))
785  ++paraid;
786  }
787  if (blockids != nullptr) {
788  (*blockids)[component_index] = blockid;
789  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
790  ++blockid;
791  paraid = 0;
792  }
793  }
794  ++component_index;
795  }
796  } while (page_it->Next(level));
797  delete page_it;
798  delete get_bbox;
799  return boxa;
800 }
801 
803  if (thresholder_ == nullptr) {
804  return 0;
805  }
806  return thresholder_->GetScaleFactor();
807 }
808 
825 
826 PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
827  if (FindLines() == 0) {
828  if (block_list_->empty())
829  return nullptr; // The page was empty.
830  page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
831  DetectParagraphs(false);
832  return new PageIterator(
836  }
837  return nullptr;
838 }
839 
845  if (tesseract_ == nullptr)
846  return -1;
847  if (FindLines() != 0)
848  return -1;
849  delete page_res_;
850  if (block_list_->empty()) {
851  page_res_ = new PAGE_RES(false, block_list_,
853  return 0; // Empty page.
854  }
855 
857  recognition_done_ = true;
858 #ifndef DISABLED_LEGACY_ENGINE
863  } else
864 #endif // ndef DISABLED_LEGACY_ENGINE
865  {
868  }
869 
870  if (page_res_ == nullptr) {
871  return -1;
872  }
873 
877  return 0;
878  }
879 #ifndef DISABLED_LEGACY_ENGINE
882  return 0;
883  }
884 #endif // ndef DISABLED_LEGACY_ENGINE
885 
886  if (truth_cb_ != nullptr) {
887  tesseract_->wordrec_run_blamer.set_value(true);
888  PageIterator *page_it = new PageIterator(
893  image_height_, page_it, this->tesseract()->pix_grey());
894  delete page_it;
895  }
896 
897  int result = 0;
899  #ifndef GRAPHICS_DISABLED
901  #endif // GRAPHICS_DISABLED
902  // The page_res is invalid after an interactive session, so cleanup
903  // in a way that lets us continue to the next page without crashing.
904  delete page_res_;
905  page_res_ = nullptr;
906  return -1;
907  #ifndef DISABLED_LEGACY_ENGINE
909  STRING fontname;
910  ExtractFontName(*output_file_, &fontname);
912  } else if (tesseract_->tessedit_ambigs_training) {
913  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
914  // OCR the page segmented into words by tesseract.
916  *input_file_, page_res_, monitor, training_output_file);
917  fclose(training_output_file);
918  #endif // ndef DISABLED_LEGACY_ENGINE
919  } else {
920  // Now run the main recognition.
921  bool wait_for_text = true;
922  GetBoolVariable("paragraph_text_based", &wait_for_text);
923  if (!wait_for_text) DetectParagraphs(false);
924  if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
925  if (wait_for_text) DetectParagraphs(true);
926  } else {
927  result = -1;
928  }
929  }
930  return result;
931 }
932 
933 #ifndef DISABLED_LEGACY_ENGINE
934 
936  if (tesseract_ == nullptr)
937  return -1;
938  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
939  tprintf("Please call SetImage before attempting recognition.\n");
940  return -1;
941  }
942  if (page_res_ != nullptr)
943  ClearResults();
944  if (FindLines() != 0)
945  return -1;
946  // Additional conditions under which chopper test cannot be run
947  if (tesseract_->interactive_display_mode) return -1;
948 
949  recognition_done_ = true;
950 
951  page_res_ = new PAGE_RES(false, block_list_,
953 
954  PAGE_RES_IT page_res_it(page_res_);
955 
956  while (page_res_it.word() != nullptr) {
957  WERD_RES *word_res = page_res_it.word();
958  GenericVector<TBOX> boxes;
959  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
960  page_res_it.row()->row, word_res);
961  page_res_it.forward();
962  }
963  return 0;
964 }
965 #endif // ndef DISABLED_LEGACY_ENGINE
966 
967 // Takes ownership of the input pix.
969 
971 
973  if (input_file_)
974  return input_file_->c_str();
975  return nullptr;
976 }
977 
978 const char * TessBaseAPI::GetDatapath() {
979  return tesseract_->datadir.c_str();
980 }
981 
984 }
985 
986 // If flist exists, get data from there. Otherwise get data from buf.
987 // Seems convoluted, but is the easiest way I know of to meet multiple
988 // goals. Support streaming from stdin, and also work on platforms
989 // lacking fmemopen.
990 bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
991  STRING *buf,
992  const char* retry_config,
993  int timeout_millisec,
994  TessResultRenderer* renderer,
995  int tessedit_page_number) {
996  if (!flist && !buf) return false;
997  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
998  char pagename[MAX_PATH];
999 
1000  GenericVector<STRING> lines;
1001  if (!flist) {
1002  buf->split('\n', &lines);
1003  if (lines.empty()) return false;
1004  }
1005 
1006  // Skip to the requested page number.
1007  for (int i = 0; i < page; i++) {
1008  if (flist) {
1009  if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
1010  }
1011  }
1012 
1013  // Begin producing output
1014  if (renderer && !renderer->BeginDocument(unknown_title_)) {
1015  return false;
1016  }
1017 
1018  // Loop over all pages - or just the requested one
1019  while (true) {
1020  if (flist) {
1021  if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
1022  } else {
1023  if (page >= lines.size()) break;
1024  snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
1025  }
1026  chomp_string(pagename);
1027  Pix *pix = pixRead(pagename);
1028  if (pix == nullptr) {
1029  tprintf("Image file %s cannot be read!\n", pagename);
1030  return false;
1031  }
1032  tprintf("Page %d : %s\n", page, pagename);
1033  bool r = ProcessPage(pix, page, pagename, retry_config,
1034  timeout_millisec, renderer);
1035  pixDestroy(&pix);
1036  if (!r) return false;
1037  if (tessedit_page_number >= 0) break;
1038  ++page;
1039  }
1040 
1041  // Finish producing output
1042  if (renderer && !renderer->EndDocument()) {
1043  return false;
1044  }
1045  return true;
1046 }
1047 
1048 bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
1049  size_t size,
1050  const char* filename,
1051  const char* retry_config,
1052  int timeout_millisec,
1053  TessResultRenderer* renderer,
1054  int tessedit_page_number) {
1055 #ifndef ANDROID_BUILD
1056  Pix *pix = nullptr;
1057  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
1058  size_t offset = 0;
1059  for (; ; ++page) {
1060  if (tessedit_page_number >= 0)
1061  page = tessedit_page_number;
1062  pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
1063  : pixReadFromMultipageTiff(filename, &offset);
1064  if (pix == nullptr) break;
1065  tprintf("Page %d\n", page + 1);
1066  char page_str[kMaxIntSize];
1067  snprintf(page_str, kMaxIntSize - 1, "%d", page);
1068  SetVariable("applybox_page", page_str);
1069  bool r = ProcessPage(pix, page, filename, retry_config,
1070  timeout_millisec, renderer);
1071  pixDestroy(&pix);
1072  if (!r) return false;
1073  if (tessedit_page_number >= 0) break;
1074  if (!offset) break;
1075  }
1076  return true;
1077 #else
1078  return false;
1079 #endif
1080 }
1081 
1082 // Master ProcessPages calls ProcessPagesInternal and then does any post-
1083 // processing required due to being in a training mode.
1084 bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
1085  int timeout_millisec,
1086  TessResultRenderer* renderer) {
1087  bool result =
1088  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1089  #ifndef DISABLED_LEGACY_ENGINE
1090  if (result) {
1093  tprintf("Write of TR file failed: %s\n", output_file_->string());
1094  return false;
1095  }
1096  }
1097  #endif // ndef DISABLED_LEGACY_ENGINE
1098  return result;
1099 }
1100 
1101 // In the ideal scenario, Tesseract will start working on data as soon
1102 // as it can. For example, if you stream a filelist through stdin, we
1103 // should start the OCR process as soon as the first filename is
1104 // available. This is particularly useful when hooking Tesseract up to
1105 // slow hardware such as a book scanning machine.
1106 //
1107 // Unfortunately there are tradeoffs. You can't seek on stdin. That
1108 // makes automatic detection of datatype (TIFF? filelist? PNG?)
1109 // impractical. So we support a command line flag to explicitly
1110 // identify the scenario that really matters: filelists on
1111 // stdin. We'll still do our best if the user likes pipes.
1112 bool TessBaseAPI::ProcessPagesInternal(const char* filename,
1113  const char* retry_config,
1114  int timeout_millisec,
1115  TessResultRenderer* renderer) {
1116  PERF_COUNT_START("ProcessPages")
1117  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1118  if (stdInput) {
1119 #ifdef WIN32
1120  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1121  tprintf("ERROR: cin to binary: %s", strerror(errno));
1122 #endif // WIN32
1123  }
1124 
1125  if (stream_filelist) {
1126  return ProcessPagesFileList(stdin, nullptr, retry_config,
1127  timeout_millisec, renderer,
1129  }
1130 
1131  // At this point we are officially in autodection territory.
1132  // That means any data in stdin must be buffered, to make it
1133  // seekable.
1134  std::string buf;
1135  const l_uint8 *data = nullptr;
1136  if (stdInput) {
1137  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1138  (std::istreambuf_iterator<char>()));
1139  data = reinterpret_cast<const l_uint8 *>(buf.data());
1140  } else {
1141  // Check whether the input file can be read.
1142  if (FILE* file = fopen(filename, "rb")) {
1143  fclose(file);
1144  } else {
1145  fprintf(stderr, "Error, cannot read input file %s: %s\n",
1146  filename, strerror(errno));
1147  return false;
1148  }
1149  }
1150 
1151  // Here is our autodetection
1152  int format;
1153  int r = (stdInput) ?
1154  findFileFormatBuffer(data, &format) :
1155  findFileFormat(filename, &format);
1156 
1157  // Maybe we have a filelist
1158  if (r != 0 || format == IFF_UNKNOWN) {
1159  STRING s;
1160  if (stdInput) {
1161  s = buf.c_str();
1162  } else {
1163  std::ifstream t(filename);
1164  std::string u((std::istreambuf_iterator<char>(t)),
1165  std::istreambuf_iterator<char>());
1166  s = u.c_str();
1167  }
1168  return ProcessPagesFileList(nullptr, &s, retry_config,
1169  timeout_millisec, renderer,
1171  }
1172 
1173  // Maybe we have a TIFF which is potentially multipage
1174  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1175  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1176  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1177  format == IFF_TIFF_ZIP);
1178 
1179  // Fail early if we can, before producing any output
1180  Pix *pix = nullptr;
1181  if (!tiff) {
1182  pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
1183  if (pix == nullptr) {
1184  return false;
1185  }
1186  }
1187 
1188  // Begin the output
1189  if (renderer && !renderer->BeginDocument(unknown_title_)) {
1190  pixDestroy(&pix);
1191  return false;
1192  }
1193 
1194  // Produce output
1195  r = (tiff) ?
1196  ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1197  timeout_millisec, renderer,
1199  ProcessPage(pix, 0, filename, retry_config,
1200  timeout_millisec, renderer);
1201 
1202  // Clean up memory as needed
1203  pixDestroy(&pix);
1204 
1205  // End the output
1206  if (!r || (renderer && !renderer->EndDocument())) {
1207  return false;
1208  }
1210  return true;
1211 }
1212 
1213 bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
1214  const char* retry_config, int timeout_millisec,
1215  TessResultRenderer* renderer) {
1216  PERF_COUNT_START("ProcessPage")
1217  SetInputName(filename);
1218  SetImage(pix);
1219  bool failed = false;
1220 
1222  // Disabled character recognition
1223  PageIterator* it = AnalyseLayout();
1224 
1225  if (it == nullptr) {
1226  failed = true;
1227  } else {
1228  delete it;
1229  }
1231  failed = FindLines() != 0;
1232  } else if (timeout_millisec > 0) {
1233  // Running with a timeout.
1234  ETEXT_DESC monitor;
1235  monitor.cancel = nullptr;
1236  monitor.cancel_this = nullptr;
1237  monitor.set_deadline_msecs(timeout_millisec);
1238 
1239  // Now run the main recognition.
1240  failed = Recognize(&monitor) < 0;
1241  } else {
1242  // Normal layout and character recognition with no timeout.
1243  failed = Recognize(nullptr) < 0;
1244  }
1245 
1247 #ifndef ANDROID_BUILD
1248  Pix* page_pix = GetThresholdedImage();
1249  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1250 #endif // ANDROID_BUILD
1251  }
1252 
1253  if (failed && retry_config != nullptr && retry_config[0] != '\0') {
1254  // Save current config variables before switching modes.
1255  FILE* fp = fopen(kOldVarsFile, "wb");
1256  if (fp == nullptr) {
1257  tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
1258  } else {
1259  PrintVariables(fp);
1260  fclose(fp);
1261  }
1262  // Switch to alternate mode for retry.
1263  ReadConfigFile(retry_config);
1264  SetImage(pix);
1265  Recognize(nullptr);
1266  // Restore saved config variables.
1268  }
1269 
1270  if (renderer && !failed) {
1271  failed = !renderer->AddImage(this);
1272  }
1273 
1275  return !failed;
1276 }
1277 
1283  if (tesseract_ == nullptr || page_res_ == nullptr)
1284  return nullptr;
1285  return new LTRResultIterator(
1289 }
1290 
1300  if (tesseract_ == nullptr || page_res_ == nullptr)
1301  return nullptr;
1306 }
1307 
1317  if (tesseract_ == nullptr || page_res_ == nullptr)
1318  return nullptr;
1319  return new MutableIterator(page_res_, tesseract_,
1323 }
1324 
1327  if (tesseract_ == nullptr ||
1328  (!recognition_done_ && Recognize(nullptr) < 0))
1329  return nullptr;
1330  STRING text("");
1331  ResultIterator *it = GetIterator();
1332  do {
1333  if (it->Empty(RIL_PARA)) continue;
1334  const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
1335  text += para_text.get();
1336  } while (it->Next(RIL_PARA));
1337  char* result = new char[text.length() + 1];
1338  strncpy(result, text.string(), text.length() + 1);
1339  delete it;
1340  return result;
1341 }
1342 
1346 static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
1347  tesseract::Orientation orientation;
1348  tesseract::WritingDirection writing_direction;
1349  tesseract::TextlineOrder textline_order;
1350  float deskew_angle;
1351  it->Orientation(&orientation, &writing_direction, &textline_order,
1352  &deskew_angle);
1353  return orientation;
1354 }
1355 
1364 static void AddBaselineCoordsTohOCR(const PageIterator *it,
1365  PageIteratorLevel level,
1366  STRING* hocr_str) {
1367  tesseract::Orientation orientation = GetBlockTextOrientation(it);
1368  if (orientation != ORIENTATION_PAGE_UP) {
1369  hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
1370  return;
1371  }
1372 
1373  int left, top, right, bottom;
1374  it->BoundingBox(level, &left, &top, &right, &bottom);
1375 
1376  // Try to get the baseline coordinates at this level.
1377  int x1, y1, x2, y2;
1378  if (!it->Baseline(level, &x1, &y1, &x2, &y2))
1379  return;
1380  // Following the description of this field of the hOCR spec, we convert the
1381  // baseline coordinates so that "the bottom left of the bounding box is the
1382  // origin".
1383  x1 -= left;
1384  x2 -= left;
1385  y1 -= bottom;
1386  y2 -= bottom;
1387 
1388  // Now fit a line through the points so we can extract coefficients for the
1389  // equation: y = p1 x + p0
1390  double p1 = 0;
1391  double p0 = 0;
1392  if (x1 == x2) {
1393  // Problem computing the polynomial coefficients.
1394  return;
1395  }
1396  p1 = (y2 - y1) / static_cast<double>(x2 - x1);
1397  p0 = y1 - static_cast<double>(p1 * x1);
1398 
1399  hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
1400  hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
1401 }
1402 
1403 static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
1404  int num2) {
1405  const size_t BUFSIZE = 64;
1406  char id_buffer[BUFSIZE];
1407  if (num2 >= 0) {
1408  snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
1409  } else {
1410  snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
1411  }
1412  id_buffer[BUFSIZE - 1] = '\0';
1413  *hocr_str += " id='";
1414  *hocr_str += id_buffer;
1415  *hocr_str += "'";
1416 }
1417 
1418 static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
1419  int num2, int num3) {
1420  const size_t BUFSIZE = 64;
1421  char id_buffer[BUFSIZE];
1422  snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3);
1423  id_buffer[BUFSIZE - 1] = '\0';
1424  *hocr_str += " id='";
1425  *hocr_str += id_buffer;
1426  *hocr_str += "'";
1427 }
1428 
1429 static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
1430  STRING* hocr_str) {
1431  int left, top, right, bottom;
1432  it->BoundingBox(level, &left, &top, &right, &bottom);
1433  // This is the only place we use double quotes instead of single quotes,
1434  // but it may too late to change for consistency
1435  hocr_str->add_str_int(" title=\"bbox ", left);
1436  hocr_str->add_str_int(" ", top);
1437  hocr_str->add_str_int(" ", right);
1438  hocr_str->add_str_int(" ", bottom);
1439  // Add baseline coordinates & heights for textlines only.
1440  if (level == RIL_TEXTLINE) {
1441  AddBaselineCoordsTohOCR(it, level, hocr_str);
1442  // add custom height measures
1443  float row_height, descenders, ascenders; // row attributes
1444  it->RowAttributes(&row_height, &descenders, &ascenders);
1445  // TODO(rays): Do we want to limit these to a single decimal place?
1446  hocr_str->add_str_double("; x_size ", row_height);
1447  hocr_str->add_str_double("; x_descenders ", descenders * -1);
1448  hocr_str->add_str_double("; x_ascenders ", ascenders);
1449  }
1450  *hocr_str += "\">";
1451 }
1452 
1453 static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
1454  STRING* hocr_str) {
1455  int left, top, right, bottom;
1456  it->BoundingBox(level, &left, &top, &right, &bottom);
1457  hocr_str->add_str_int("\t", left);
1458  hocr_str->add_str_int("\t", top);
1459  hocr_str->add_str_int("\t", right - left);
1460  hocr_str->add_str_int("\t", bottom - top);
1461 }
1462 
1472 char* TessBaseAPI::GetHOCRText(int page_number) {
1473  return GetHOCRText(nullptr, page_number);
1474 }
1475 
1485 char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
1486  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
1487  return nullptr;
1488 
1489  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
1490  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
1491  bool para_is_ltr = true; // Default direction is LTR
1492  const char* paragraph_lang = nullptr;
1493  bool font_info = false;
1494  GetBoolVariable("hocr_font_info", &font_info);
1495 
1496  STRING hocr_str("");
1497 
1498  if (input_file_ == nullptr)
1499  SetInputName(nullptr);
1500 
1501 #ifdef _WIN32
1502  // convert input name from ANSI encoding to utf-8
1503  int str16_len =
1504  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
1505  wchar_t *uni16_str = new WCHAR[str16_len];
1506  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1507  uni16_str, str16_len);
1508  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0,
1509  nullptr, nullptr);
1510  char *utf8_str = new char[utf8_len];
1511  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1512  utf8_len, nullptr, nullptr);
1513  *input_file_ = utf8_str;
1514  delete[] uni16_str;
1515  delete[] utf8_str;
1516 #endif
1517 
1518  hocr_str += " <div class='ocr_page'";
1519  AddIdTohOCR(&hocr_str, "page", page_id, -1);
1520  hocr_str += " title='image \"";
1521  if (input_file_) {
1522  hocr_str += HOcrEscape(input_file_->string());
1523  } else {
1524  hocr_str += "unknown";
1525  }
1526  hocr_str.add_str_int("\"; bbox ", rect_left_);
1527  hocr_str.add_str_int(" ", rect_top_);
1528  hocr_str.add_str_int(" ", rect_width_);
1529  hocr_str.add_str_int(" ", rect_height_);
1530  hocr_str.add_str_int("; ppageno ", page_number);
1531  hocr_str += "'>\n";
1532 
1533  ResultIterator *res_it = GetIterator();
1534  while (!res_it->Empty(RIL_BLOCK)) {
1535  if (res_it->Empty(RIL_WORD)) {
1536  res_it->Next(RIL_WORD);
1537  continue;
1538  }
1539 
1540  // Open any new block/paragraph/textline.
1541  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1542  para_is_ltr = true; // reset to default direction
1543  hocr_str += " <div class='ocr_carea'";
1544  AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
1545  AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
1546  }
1547  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1548  hocr_str += "\n <p class='ocr_par'";
1549  para_is_ltr = res_it->ParagraphIsLtr();
1550  if (!para_is_ltr) {
1551  hocr_str += " dir='rtl'";
1552  }
1553  AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
1554  paragraph_lang = res_it->WordRecognitionLanguage();
1555  if (paragraph_lang) {
1556  hocr_str += " lang='";
1557  hocr_str += paragraph_lang;
1558  hocr_str += "'";
1559  }
1560  AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
1561  }
1562  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1563  hocr_str += "\n <span class='ocr_line'";
1564  AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
1565  AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
1566  }
1567 
1568  // Now, process the word...
1569  std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
1571  confidencemap = res_it->GetBestLSTMSymbolChoices();
1572  }
1573  hocr_str += "\n <span class='ocrx_word'";
1574  AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
1575  int left, top, right, bottom;
1576  bool bold, italic, underlined, monospace, serif, smallcaps;
1577  int pointsize, font_id;
1578  const char *font_name;
1579  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1580  font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
1581  &monospace, &serif, &smallcaps,
1582  &pointsize, &font_id);
1583  hocr_str.add_str_int(" title='bbox ", left);
1584  hocr_str.add_str_int(" ", top);
1585  hocr_str.add_str_int(" ", right);
1586  hocr_str.add_str_int(" ", bottom);
1587  hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
1588  if (font_info) {
1589  if (font_name) {
1590  hocr_str += "; x_font ";
1591  hocr_str += HOcrEscape(font_name);
1592  }
1593  hocr_str.add_str_int("; x_fsize ", pointsize);
1594  }
1595  hocr_str += "'";
1596  const char* lang = res_it->WordRecognitionLanguage();
1597  if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
1598  hocr_str += " lang='";
1599  hocr_str += lang;
1600  hocr_str += "'";
1601  }
1602  switch (res_it->WordDirection()) {
1603  // Only emit direction if different from current paragraph direction
1604  case DIR_LEFT_TO_RIGHT:
1605  if (!para_is_ltr) hocr_str += " dir='ltr'";
1606  break;
1607  case DIR_RIGHT_TO_LEFT:
1608  if (para_is_ltr) hocr_str += " dir='rtl'";
1609  break;
1610  case DIR_MIX:
1611  case DIR_NEUTRAL:
1612  default: // Do nothing.
1613  break;
1614  }
1615  hocr_str += ">";
1616  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
1617  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
1618  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
1619  if (bold) hocr_str += "<strong>";
1620  if (italic) hocr_str += "<em>";
1621  do {
1622  const std::unique_ptr<const char[]> grapheme(
1623  res_it->GetUTF8Text(RIL_SYMBOL));
1624  if (grapheme && grapheme[0] != 0) {
1625  hocr_str += HOcrEscape(grapheme.get());
1626  }
1627  res_it->Next(RIL_SYMBOL);
1628  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1629  if (italic) hocr_str += "</em>";
1630  if (bold) hocr_str += "</strong>";
1631  // If the lstm choice mode is required it is added here
1632  if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
1633  for (size_t i = 0; i < confidencemap->size(); i++) {
1634  hocr_str += "\n <span class='ocrx_cinfo'";
1635  AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
1636  hocr_str += ">";
1637  std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
1638  for (std::pair<const char*, float> conf : timestep) {
1639  hocr_str += "<span class='ocr_glyph'";
1640  AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
1641  hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
1642  hocr_str += "'";
1643  hocr_str += ">";
1644  hocr_str += conf.first;
1645  hocr_str += "</span>";
1646  gcnt++;
1647  }
1648  hocr_str += "</span>";
1649  tcnt++;
1650  }
1651  } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
1652  for (size_t i = 0; i < confidencemap->size(); i++) {
1653  std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
1654  if (timestep.size() > 0) {
1655  hocr_str += "\n <span class='ocrx_cinfo'";
1656  AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
1657  hocr_str += " chosen='";
1658  hocr_str += timestep[0].first;
1659  hocr_str += "'>";
1660  for (size_t j = 1; j < timestep.size(); j++) {
1661  hocr_str += "<span class='ocr_glyph'";
1662  AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
1663  hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
1664  hocr_str += "'";
1665  hocr_str += ">";
1666  hocr_str += timestep[j].first;
1667  hocr_str += "</span>";
1668  gcnt++;
1669  }
1670  hocr_str += "</span>";
1671  tcnt++;
1672  }
1673  }
1674  }
1675  hocr_str += "</span>";
1676  tcnt = 1;
1677  gcnt = 1;
1678  wcnt++;
1679  // Close any ending block/paragraph/textline.
1680  if (last_word_in_line) {
1681  hocr_str += "\n </span>";
1682  lcnt++;
1683  }
1684  if (last_word_in_para) {
1685  hocr_str += "\n </p>\n";
1686  pcnt++;
1687  para_is_ltr = true; // back to default direction
1688  }
1689  if (last_word_in_block) {
1690  hocr_str += " </div>\n";
1691  bcnt++;
1692  }
1693  }
1694  hocr_str += " </div>\n";
1695 
1696  char *ret = new char[hocr_str.length() + 1];
1697  strcpy(ret, hocr_str.string());
1698  delete res_it;
1699  return ret;
1700 }
1701 
1707 char* TessBaseAPI::GetTSVText(int page_number) {
1708  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
1709  return nullptr;
1710 
1711  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1712  int page_id = page_number + 1; // we use 1-based page numbers.
1713 
1714  STRING tsv_str("");
1715 
1716  int page_num = page_id;
1717  int block_num = 0;
1718  int par_num = 0;
1719  int line_num = 0;
1720  int word_num = 0;
1721 
1722  tsv_str.add_str_int("1\t", page_num); // level 1 - page
1723  tsv_str.add_str_int("\t", block_num);
1724  tsv_str.add_str_int("\t", par_num);
1725  tsv_str.add_str_int("\t", line_num);
1726  tsv_str.add_str_int("\t", word_num);
1727  tsv_str.add_str_int("\t", rect_left_);
1728  tsv_str.add_str_int("\t", rect_top_);
1729  tsv_str.add_str_int("\t", rect_width_);
1730  tsv_str.add_str_int("\t", rect_height_);
1731  tsv_str += "\t-1\t\n";
1732 
1733  ResultIterator* res_it = GetIterator();
1734  while (!res_it->Empty(RIL_BLOCK)) {
1735  if (res_it->Empty(RIL_WORD)) {
1736  res_it->Next(RIL_WORD);
1737  continue;
1738  }
1739 
1740  // Add rows for any new block/paragraph/textline.
1741  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1742  block_num++;
1743  par_num = 0;
1744  line_num = 0;
1745  word_num = 0;
1746  tsv_str.add_str_int("2\t", page_num); // level 2 - block
1747  tsv_str.add_str_int("\t", block_num);
1748  tsv_str.add_str_int("\t", par_num);
1749  tsv_str.add_str_int("\t", line_num);
1750  tsv_str.add_str_int("\t", word_num);
1751  AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
1752  tsv_str += "\t-1\t\n"; // end of row for block
1753  }
1754  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1755  par_num++;
1756  line_num = 0;
1757  word_num = 0;
1758  tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
1759  tsv_str.add_str_int("\t", block_num);
1760  tsv_str.add_str_int("\t", par_num);
1761  tsv_str.add_str_int("\t", line_num);
1762  tsv_str.add_str_int("\t", word_num);
1763  AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
1764  tsv_str += "\t-1\t\n"; // end of row for para
1765  }
1766  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1767  line_num++;
1768  word_num = 0;
1769  tsv_str.add_str_int("4\t", page_num); // level 4 - line
1770  tsv_str.add_str_int("\t", block_num);
1771  tsv_str.add_str_int("\t", par_num);
1772  tsv_str.add_str_int("\t", line_num);
1773  tsv_str.add_str_int("\t", word_num);
1774  AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
1775  tsv_str += "\t-1\t\n"; // end of row for line
1776  }
1777 
1778  // Now, process the word...
1779  int left, top, right, bottom;
1780  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1781  word_num++;
1782  tsv_str.add_str_int("5\t", page_num); // level 5 - word
1783  tsv_str.add_str_int("\t", block_num);
1784  tsv_str.add_str_int("\t", par_num);
1785  tsv_str.add_str_int("\t", line_num);
1786  tsv_str.add_str_int("\t", word_num);
1787  tsv_str.add_str_int("\t", left);
1788  tsv_str.add_str_int("\t", top);
1789  tsv_str.add_str_int("\t", right - left);
1790  tsv_str.add_str_int("\t", bottom - top);
1791  tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
1792  tsv_str += "\t";
1793 
1794  // Increment counts if at end of block/paragraph/textline.
1795  if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
1796  if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
1797  if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
1798 
1799  do {
1800  tsv_str +=
1801  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
1802  res_it->Next(RIL_SYMBOL);
1803  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1804  tsv_str += "\n"; // end of row
1805  wcnt++;
1806  }
1807 
1808  char* ret = new char[tsv_str.length() + 1];
1809  strcpy(ret, tsv_str.string());
1810  delete res_it;
1811  return ret;
1812 }
1813 
1815 const int kNumbersPerBlob = 5;
1820 const int kBytesPerNumber = 5;
1828 const int kBytesPer64BitNumber = 20;
1836  UNICHAR_LEN;
1837 
1844 char* TessBaseAPI::GetBoxText(int page_number) {
1845  if (tesseract_ == nullptr ||
1846  (!recognition_done_ && Recognize(nullptr) < 0))
1847  return nullptr;
1848  int blob_count;
1849  int utf8_length = TextLength(&blob_count);
1850  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1852  char* result = new char[total_length];
1853  result[0] = '\0';
1854  int output_length = 0;
1856  do {
1857  int left, top, right, bottom;
1858  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1859  const std::unique_ptr</*non-const*/ char[]> text(
1860  it->GetUTF8Text(RIL_SYMBOL));
1861  // Tesseract uses space for recognition failure. Fix to a reject
1862  // character, kTesseractReject so we don't create illegal box files.
1863  for (int i = 0; text[i] != '\0'; ++i) {
1864  if (text[i] == ' ')
1865  text[i] = kTesseractReject;
1866  }
1867  snprintf(result + output_length, total_length - output_length,
1868  "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
1869  right, image_height_ - top, page_number);
1870  output_length += strlen(result + output_length);
1871  // Just in case...
1872  if (output_length + kMaxBytesPerLine > total_length)
1873  break;
1874  }
1875  } while (it->Next(RIL_SYMBOL));
1876  delete it;
1877  return result;
1878 }
1879 
1885 const int kUniChs[] = {
1886  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
1887 };
1889 const int kLatinChs[] = {
1890  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
1891 };
1892 
1899  if (tesseract_ == nullptr ||
1900  (!recognition_done_ && Recognize(nullptr) < 0))
1901  return nullptr;
1902  bool tilde_crunch_written = false;
1903  bool last_char_was_newline = true;
1904  bool last_char_was_tilde = false;
1905 
1906  int total_length = TextLength(nullptr);
1907  PAGE_RES_IT page_res_it(page_res_);
1908  char* result = new char[total_length];
1909  char* ptr = result;
1910  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
1911  page_res_it.forward()) {
1912  WERD_RES *word = page_res_it.word();
1913  // Process the current word.
1914  if (word->unlv_crunch_mode != CR_NONE) {
1915  if (word->unlv_crunch_mode != CR_DELETE &&
1916  (!tilde_crunch_written ||
1917  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1918  word->word->space() > 0 &&
1919  !word->word->flag(W_FUZZY_NON) &&
1920  !word->word->flag(W_FUZZY_SP)))) {
1921  if (!word->word->flag(W_BOL) &&
1922  word->word->space() > 0 &&
1923  !word->word->flag(W_FUZZY_NON) &&
1924  !word->word->flag(W_FUZZY_SP)) {
1925  /* Write a space to separate from preceding good text */
1926  *ptr++ = ' ';
1927  last_char_was_tilde = false;
1928  }
1929  if (!last_char_was_tilde) {
1930  // Write a reject char.
1931  last_char_was_tilde = true;
1932  *ptr++ = kUNLVReject;
1933  tilde_crunch_written = true;
1934  last_char_was_newline = false;
1935  }
1936  }
1937  } else {
1938  // NORMAL PROCESSING of non tilde crunched words.
1939  tilde_crunch_written = false;
1941  const char* wordstr = word->best_choice->unichar_string().string();
1942  const STRING& lengths = word->best_choice->unichar_lengths();
1943  int length = lengths.length();
1944  int i = 0;
1945  int offset = 0;
1946 
1947  if (last_char_was_tilde &&
1948  word->word->space() == 0 && wordstr[offset] == ' ') {
1949  // Prevent adjacent tilde across words - we know that adjacent tildes
1950  // within words have been removed.
1951  // Skip the first character.
1952  offset = lengths[i++];
1953  }
1954  if (i < length && wordstr[offset] != 0) {
1955  if (!last_char_was_newline)
1956  *ptr++ = ' ';
1957  else
1958  last_char_was_newline = false;
1959  for (; i < length; offset += lengths[i++]) {
1960  if (wordstr[offset] == ' ' ||
1961  wordstr[offset] == kTesseractReject) {
1962  *ptr++ = kUNLVReject;
1963  last_char_was_tilde = true;
1964  } else {
1965  if (word->reject_map[i].rejected())
1966  *ptr++ = kUNLVSuspect;
1967  UNICHAR ch(wordstr + offset, lengths[i]);
1968  int uni_ch = ch.first_uni();
1969  for (int j = 0; kUniChs[j] != 0; ++j) {
1970  if (kUniChs[j] == uni_ch) {
1971  uni_ch = kLatinChs[j];
1972  break;
1973  }
1974  }
1975  if (uni_ch <= 0xff) {
1976  *ptr++ = static_cast<char>(uni_ch);
1977  last_char_was_tilde = false;
1978  } else {
1979  *ptr++ = kUNLVReject;
1980  last_char_was_tilde = true;
1981  }
1982  }
1983  }
1984  }
1985  }
1986  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1987  /* Add a new line output */
1988  *ptr++ = '\n';
1989  tilde_crunch_written = false;
1990  last_char_was_newline = true;
1991  last_char_was_tilde = false;
1992  }
1993  }
1994  *ptr++ = '\n';
1995  *ptr = '\0';
1996  return result;
1997 }
1998 
1999 #ifndef DISABLED_LEGACY_ENGINE
2000 
2010 bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf,
2011  const char** script_name,
2012  float* script_conf) {
2013  OSResults osr;
2014 
2015  bool osd = DetectOS(&osr);
2016  if (!osd) {
2017  return false;
2018  }
2019 
2020  int orient_id = osr.best_result.orientation_id;
2021  int script_id = osr.get_best_script(orient_id);
2022  if (orient_conf) *orient_conf = osr.best_result.oconfidence;
2023  if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees
2024 
2025  if (script_name) {
2026  const char* script = osr.unicharset->get_script_from_script_id(script_id);
2027 
2028  *script_name = script;
2029  }
2030 
2031  if (script_conf) *script_conf = osr.best_result.sconfidence;
2032 
2033  return true;
2034 }
2035 
2041 char* TessBaseAPI::GetOsdText(int page_number) {
2042  int orient_deg;
2043  float orient_conf;
2044  const char* script_name;
2045  float script_conf;
2046 
2047  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
2048  &script_conf))
2049  return nullptr;
2050 
2051  // clockwise rotation needed to make the page upright
2052  int rotate = OrientationIdToValue(orient_deg / 90);
2053 
2054  const int kOsdBufsize = 255;
2055  char* osd_buf = new char[kOsdBufsize];
2056  snprintf(osd_buf, kOsdBufsize,
2057  "Page number: %d\n"
2058  "Orientation in degrees: %d\n"
2059  "Rotate: %d\n"
2060  "Orientation confidence: %.2f\n"
2061  "Script: %s\n"
2062  "Script confidence: %.2f\n",
2063  page_number, orient_deg, rotate, orient_conf, script_name,
2064  script_conf);
2065 
2066  return osd_buf;
2067 }
2068 
2069 #endif // ndef DISABLED_LEGACY_ENGINE
2070 
2073  int* conf = AllWordConfidences();
2074  if (!conf) return 0;
2075  int sum = 0;
2076  int *pt = conf;
2077  while (*pt >= 0) sum += *pt++;
2078  if (pt != conf) sum /= pt - conf;
2079  delete [] conf;
2080  return sum;
2081 }
2082 
2085  if (tesseract_ == nullptr ||
2086  (!recognition_done_ && Recognize(nullptr) < 0))
2087  return nullptr;
2088  int n_word = 0;
2089  PAGE_RES_IT res_it(page_res_);
2090  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
2091  n_word++;
2092 
2093  int* conf = new int[n_word+1];
2094  n_word = 0;
2095  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
2096  WERD_RES *word = res_it.word();
2097  WERD_CHOICE* choice = word->best_choice;
2098  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
2099  // This is the eq for converting Tesseract confidence to 1..100
2100  if (w_conf < 0) w_conf = 0;
2101  if (w_conf > 100) w_conf = 100;
2102  conf[n_word++] = w_conf;
2103  }
2104  conf[n_word] = -1;
2105  return conf;
2106 }
2107 
2108 #ifndef DISABLED_LEGACY_ENGINE
2109 
2119 bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
2120  int debug = 0;
2121  GetIntVariable("applybox_debug", &debug);
2122  bool success = true;
2123  PageSegMode current_psm = GetPageSegMode();
2124  SetPageSegMode(mode);
2125  SetVariable("classify_enable_learning", "0");
2126  const std::unique_ptr<const char[]> text(GetUTF8Text());
2127  if (debug) {
2128  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
2129  }
2130  if (text != nullptr) {
2131  PAGE_RES_IT it(page_res_);
2132  WERD_RES* word_res = it.word();
2133  if (word_res != nullptr) {
2134  word_res->word->set_text(wordstr);
2135  // Check to see if text matches wordstr.
2136  int w = 0;
2137  int t;
2138  for (t = 0; text[t] != '\0'; ++t) {
2139  if (text[t] == '\n' || text[t] == ' ')
2140  continue;
2141  while (wordstr[w] == ' ') ++w;
2142  if (text[t] != wordstr[w])
2143  break;
2144  ++w;
2145  }
2146  if (text[t] != '\0' || wordstr[w] != '\0') {
2147  // No match.
2148  delete page_res_;
2149  GenericVector<TBOX> boxes;
2153  PAGE_RES_IT pr_it(page_res_);
2154  if (pr_it.word() == nullptr)
2155  success = false;
2156  else
2157  word_res = pr_it.word();
2158  } else {
2159  word_res->BestChoiceToCorrectText();
2160  }
2161  if (success) {
2162  tesseract_->EnableLearning = true;
2163  tesseract_->LearnWord(nullptr, word_res);
2164  }
2165  } else {
2166  success = false;
2167  }
2168  } else {
2169  success = false;
2170  }
2171  SetPageSegMode(current_psm);
2172  return success;
2173 }
2174 #endif // ndef DISABLED_LEGACY_ENGINE
2175 
2183  if (thresholder_ != nullptr)
2184  thresholder_->Clear();
2185  ClearResults();
2186  if (tesseract_ != nullptr) SetInputImage(nullptr);
2187 }
2188 
2196  Clear();
2197  delete thresholder_;
2198  thresholder_ = nullptr;
2199  delete page_res_;
2200  page_res_ = nullptr;
2201  delete block_list_;
2202  block_list_ = nullptr;
2203  if (paragraph_models_ != nullptr) {
2205  delete paragraph_models_;
2206  paragraph_models_ = nullptr;
2207  }
2208  if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
2209  delete tesseract_;
2210  tesseract_ = nullptr;
2211  delete osd_tesseract_;
2212  osd_tesseract_ = nullptr;
2213  delete equ_detect_;
2214  equ_detect_ = nullptr;
2215  delete input_file_;
2216  input_file_ = nullptr;
2217  delete output_file_;
2218  output_file_ = nullptr;
2219  delete datapath_;
2220  datapath_ = nullptr;
2221  delete language_;
2222  language_ = nullptr;
2223 }
2224 
2225 // Clear any library-level memory caches.
2226 // There are a variety of expensive-to-load constant data structures (mostly
2227 // language dictionaries) that are cached globally -- surviving the Init()
2228 // and End() of individual TessBaseAPI's. This function allows the clearing
2229 // of these caches.
2232 }
2233 
2238 int TessBaseAPI::IsValidWord(const char *word) {
2239  return tesseract_->getDict().valid_word(word);
2240 }
2241 // Returns true if utf8_character is defined in the UniCharset.
2242 bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
2243  return tesseract_->unicharset.contains_unichar(utf8_character);
2244 }
2245 
2246 
2247 // TODO(rays) Obsolete this function and replace with a more aptly named
2248 // function that returns image coordinates rather than tesseract coordinates.
2249 bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) {
2250  PageIterator* it = AnalyseLayout();
2251  if (it == nullptr) {
2252  return false;
2253  }
2254  int x1, x2, y1, y2;
2255  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
2256  // Calculate offset and slope (NOTE: Kind of ugly)
2257  if (x2 <= x1) x2 = x1 + 1;
2258  // Convert the point pair to slope/offset of the baseline (in image coords.)
2259  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
2260  *out_offset = static_cast<int>(y1 - *out_slope * x1);
2261  // Get the y-coord of the baseline at the left and right edges of the
2262  // textline's bounding box.
2263  int left, top, right, bottom;
2264  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
2265  delete it;
2266  return false;
2267  }
2268  int left_y = IntCastRounded(*out_slope * left + *out_offset);
2269  int right_y = IntCastRounded(*out_slope * right + *out_offset);
2270  // Shift the baseline down so it passes through the nearest bottom-corner
2271  // of the textline's bounding box. This is the difference between the y
2272  // at the lowest (max) edge of the box and the actual box bottom.
2273  *out_offset += bottom - std::max(left_y, right_y);
2274  // Switch back to bottom-up tesseract coordinates. Requires negation of
2275  // the slope and height - offset for the offset.
2276  *out_slope = -*out_slope;
2277  *out_offset = rect_height_ - *out_offset;
2278  delete it;
2279 
2280  return true;
2281 }
2282 
2285  if (tesseract_ != nullptr) {
2287  }
2288 }
2289 
2299  if (tesseract_ != nullptr) {
2301  // Set it for the sublangs too.
2302  int num_subs = tesseract_->num_sub_langs();
2303  for (int i = 0; i < num_subs; ++i) {
2305  }
2306  }
2307 }
2308 
2309 #ifndef DISABLED_LEGACY_ENGINE
2310 
2312  if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
2313 }
2314 #endif // ndef DISABLED_LEGACY_ENGINE
2315 
2318  if (tesseract_ == nullptr) {
2319  tprintf("Please call Init before attempting to set an image.\n");
2320  return false;
2321  }
2322  if (thresholder_ == nullptr)
2324  ClearResults();
2325  return true;
2326 }
2327 
2334 bool TessBaseAPI::Threshold(Pix** pix) {
2335  ASSERT_HOST(pix != nullptr);
2336  if (*pix != nullptr)
2337  pixDestroy(pix);
2338  // Zero resolution messes up the algorithms, so make sure it is credible.
2339  int user_dpi = 0;
2340  bool a = GetIntVariable("user_defined_dpi", &user_dpi);
2341  int y_res = thresholder_->GetScaledYResolution();
2342  if (user_dpi && (user_dpi < kMinCredibleResolution ||
2343  user_dpi > kMaxCredibleResolution)) {
2344  tprintf("Warning: User defined image dpi is outside of expected range "
2345  "(%d - %d)!\n",
2347  }
2348  // Always use user defined dpi
2349  if (user_dpi) {
2351  } else if (y_res < kMinCredibleResolution ||
2352  y_res > kMaxCredibleResolution) {
2353  tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
2354  y_res, kMinCredibleResolution);
2356  }
2357  PageSegMode pageseg_mode =
2358  static_cast<PageSegMode>(
2359  static_cast<int>(tesseract_->tessedit_pageseg_mode));
2360  if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
2364  if (!thresholder_->IsBinary()) {
2367  } else {
2368  tesseract_->set_pix_thresholds(nullptr);
2369  tesseract_->set_pix_grey(nullptr);
2370  }
2371  // Set the internal resolution that is used for layout parameters from the
2372  // estimated resolution, rather than the image resolution, which may be
2373  // fabricated, but we will use the image resolution, if there is one, to
2374  // report output point sizes.
2375  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
2378  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
2379  tprintf("Estimated internal resolution %d out of range! "
2380  "Corrected to %d.\n",
2381  thresholder_->GetScaledEstimatedResolution(), estimated_res);
2382  }
2383  tesseract_->set_source_resolution(estimated_res);
2384  SavePixForCrash(estimated_res, *pix);
2385  return true;
2386 }
2387 
2390  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
2391  tprintf("Please call SetImage before attempting recognition.\n");
2392  return -1;
2393  }
2394  if (recognition_done_)
2395  ClearResults();
2396  if (!block_list_->empty()) {
2397  return 0;
2398  }
2399  if (tesseract_ == nullptr) {
2400  tesseract_ = new Tesseract;
2401  #ifndef DISABLED_LEGACY_ENGINE
2403  #endif
2404  }
2405  if (tesseract_->pix_binary() == nullptr &&
2407  return -1;
2408  }
2409 
2411 
2412 #ifndef DISABLED_LEGACY_ENGINE
2414  if (equ_detect_ == nullptr && datapath_ != nullptr) {
2415  equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
2416  }
2417  if (equ_detect_ == nullptr) {
2418  tprintf("Warning: Could not set equation detector\n");
2419  } else {
2421  }
2422  }
2423 #endif // ndef DISABLED_LEGACY_ENGINE
2424 
2425  Tesseract* osd_tess = osd_tesseract_;
2426  OSResults osr;
2428  osd_tess == nullptr) {
2429  if (strcmp(language_->string(), "osd") == 0) {
2430  osd_tess = tesseract_;
2431  } else {
2432  osd_tesseract_ = new Tesseract;
2433  TessdataManager mgr(reader_);
2434  if (datapath_ == nullptr) {
2435  tprintf("Warning: Auto orientation and script detection requested,"
2436  " but data path is undefined\n");
2437  delete osd_tesseract_;
2438  osd_tesseract_ = nullptr;
2439  } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
2440  "osd", OEM_TESSERACT_ONLY,
2441  nullptr, 0, nullptr, nullptr,
2442  false, &mgr) == 0) {
2443  osd_tess = osd_tesseract_;
2446  } else {
2447  tprintf("Warning: Auto orientation and script detection requested,"
2448  " but osd language failed to load\n");
2449  delete osd_tesseract_;
2450  osd_tesseract_ = nullptr;
2451  }
2452  }
2453  }
2454 
2455  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
2456  return -1;
2457 
2458  // If Devanagari is being recognized, we use different images for page seg
2459  // and for OCR.
2460  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
2461  return 0;
2462 }
2463 
2466  if (tesseract_ != nullptr) {
2467  tesseract_->Clear();
2468  }
2469  delete page_res_;
2470  page_res_ = nullptr;
2471  recognition_done_ = false;
2472  if (block_list_ == nullptr)
2473  block_list_ = new BLOCK_LIST;
2474  else
2475  block_list_->clear();
2476  if (paragraph_models_ != nullptr) {
2478  delete paragraph_models_;
2479  paragraph_models_ = nullptr;
2480  }
2481  SavePixForCrash(0, nullptr);
2482 }
2483 
2491 int TessBaseAPI::TextLength(int* blob_count) {
2492  if (tesseract_ == nullptr || page_res_ == nullptr)
2493  return 0;
2494 
2495  PAGE_RES_IT page_res_it(page_res_);
2496  int total_length = 2;
2497  int total_blobs = 0;
2498  // Iterate over the data structures to extract the recognition result.
2499  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
2500  page_res_it.forward()) {
2501  WERD_RES *word = page_res_it.word();
2502  WERD_CHOICE* choice = word->best_choice;
2503  if (choice != nullptr) {
2504  total_blobs += choice->length() + 2;
2505  total_length += choice->unichar_string().length() + 2;
2506  for (int i = 0; i < word->reject_map.length(); ++i) {
2507  if (word->reject_map[i].rejected())
2508  ++total_length;
2509  }
2510  }
2511  }
2512  if (blob_count != nullptr)
2513  *blob_count = total_blobs;
2514  return total_length;
2515 }
2516 
2517 #ifndef DISABLED_LEGACY_ENGINE
2518 
2523  if (tesseract_ == nullptr)
2524  return false;
2525  ClearResults();
2526  if (tesseract_->pix_binary() == nullptr &&
2528  return false;
2529  }
2530 
2531  if (input_file_ == nullptr)
2532  input_file_ = new STRING(kInputFile);
2534 }
2535 #endif // ndef DISABLED_LEGACY_ENGINE
2536 
2538  tesseract_->min_orientation_margin.set_value(margin);
2539 }
2540 
2555 void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
2556  bool** vertical_writing) {
2557  delete[] *block_orientation;
2558  *block_orientation = nullptr;
2559  delete[] *vertical_writing;
2560  *vertical_writing = nullptr;
2561  BLOCK_IT block_it(block_list_);
2562 
2563  block_it.move_to_first();
2564  int num_blocks = 0;
2565  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2566  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2567  continue;
2568  }
2569  ++num_blocks;
2570  }
2571  if (!num_blocks) {
2572  tprintf("WARNING: Found no blocks\n");
2573  return;
2574  }
2575  *block_orientation = new int[num_blocks];
2576  *vertical_writing = new bool[num_blocks];
2577  block_it.move_to_first();
2578  int i = 0;
2579  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2580  block_it.forward()) {
2581  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2582  continue;
2583  }
2584  FCOORD re_rotation = block_it.data()->re_rotation();
2585  float re_theta = re_rotation.angle();
2586  FCOORD classify_rotation = block_it.data()->classify_rotation();
2587  float classify_theta = classify_rotation.angle();
2588  double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
2589  if (rot_theta < 0) rot_theta += 4;
2590  int num_rotations = static_cast<int>(rot_theta + 0.5);
2591  (*block_orientation)[i] = num_rotations;
2592  // The classify_rotation is non-zero only if the text has vertical
2593  // writing direction.
2594  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2595  ++i;
2596  }
2597 }
2598 
2599 
2600 void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
2601  int debug_level = 0;
2602  GetIntVariable("paragraph_debug_level", &debug_level);
2603  if (paragraph_models_ == nullptr)
2605  MutableIterator *result_it = GetMutableIterator();
2606  do { // Detect paragraphs for this block
2608  ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
2609  result_it, &models);
2610  *paragraph_models_ += models;
2611  } while (result_it->Next(RIL_BLOCK));
2612  delete result_it;
2613 }
2614 
2616 const char* TessBaseAPI::GetUnichar(int unichar_id) {
2617  return tesseract_->unicharset.id_to_unichar(unichar_id);
2618 }
2619 
2621 const Dawg *TessBaseAPI::GetDawg(int i) const {
2622  if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
2623  return tesseract_->getDict().GetDawg(i);
2624 }
2625 
2628  return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
2629 }
2630 
2632 STRING HOcrEscape(const char* text) {
2633  STRING ret;
2634  const char *ptr;
2635  for (ptr = text; *ptr; ptr++) {
2636  switch (*ptr) {
2637  case '<': ret += "&lt;"; break;
2638  case '>': ret += "&gt;"; break;
2639  case '&': ret += "&amp;"; break;
2640  case '"': ret += "&quot;"; break;
2641  case '\'': ret += "&#39;"; break;
2642  default: ret += *ptr;
2643  }
2644  }
2645  return ret;
2646 }
2647 
2648 
2649 #ifndef DISABLED_LEGACY_ENGINE
2650 
2651 
2652 // ____________________________________________________________________________
2653 // Ocropus add-ons.
2654 
2657  ASSERT_HOST(FindLines() == 0);
2658  BLOCK_LIST* result = block_list_;
2659  block_list_ = nullptr;
2660  return result;
2661 }
2662 
2668 void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
2669  delete block_list;
2670 }
2671 
2672 
2674  float xheight,
2675  float descender,
2676  float ascender) {
2677  int32_t xstarts[] = {-32000};
2678  double quad_coeffs[] = {0, 0, baseline};
2679  return new ROW(1,
2680  xstarts,
2681  quad_coeffs,
2682  xheight,
2683  ascender - (baseline + xheight),
2684  descender - baseline,
2685  0,
2686  0);
2687 }
2688 
2691  int width = pixGetWidth(pix);
2692  int height = pixGetHeight(pix);
2693  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
2694 
2695  // Create C_BLOBs from the page
2696  extract_edges(pix, &block);
2697 
2698  // Merge all C_BLOBs
2699  C_BLOB_LIST *list = block.blob_list();
2700  C_BLOB_IT c_blob_it(list);
2701  if (c_blob_it.empty())
2702  return nullptr;
2703  // Move all the outlines to the first blob.
2704  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2705  for (c_blob_it.forward();
2706  !c_blob_it.at_first();
2707  c_blob_it.forward()) {
2708  C_BLOB *c_blob = c_blob_it.data();
2709  ol_it.add_list_after(c_blob->out_list());
2710  }
2711  // Convert the first blob to the output TBLOB.
2712  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2713 }
2714 
2720 void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
2721  TBOX box = tblob->bounding_box();
2722  float x_center = (box.left() + box.right()) / 2.0f;
2723  float baseline = row->base_line(x_center);
2724  float scale = kBlnXHeight / row->x_height();
2725  tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
2726  0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
2727 }
2728 
2733 static TBLOB *make_tesseract_blob(float baseline, float xheight,
2734  float descender, float ascender,
2735  bool numeric_mode, Pix* pix) {
2736  TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
2737 
2738  // Normalize TBLOB
2739  ROW *row =
2740  TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
2741  TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
2742  delete row;
2743  return tblob;
2744 }
2745 
2751 void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
2752  int length,
2753  float baseline,
2754  float xheight,
2755  float descender,
2756  float ascender) {
2757  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
2758  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
2760  tesseract_->pix_binary());
2761  float threshold;
2762  float best_rating = -100;
2763 
2764 
2765  // Classify to get a raw choice.
2766  BLOB_CHOICE_LIST choices;
2767  tesseract_->AdaptiveClassifier(blob, &choices);
2768  BLOB_CHOICE_IT choice_it;
2769  choice_it.set_to_list(&choices);
2770  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2771  choice_it.forward()) {
2772  if (choice_it.data()->rating() > best_rating) {
2773  best_rating = choice_it.data()->rating();
2774  }
2775  }
2776 
2777  threshold = tesseract_->matcher_good_threshold;
2778 
2779  if (blob->outlines)
2780  tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
2782  delete blob;
2783 }
2784 
2785 
2786 PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
2787  PAGE_RES *page_res = new PAGE_RES(false, block_list,
2789  tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
2790  return page_res;
2791 }
2792 
2793 PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
2794  PAGE_RES* pass1_result) {
2795  if (!pass1_result)
2796  pass1_result = new PAGE_RES(false, block_list,
2798  tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
2799  return pass1_result;
2800 }
2801 
2804  int length; // of unicode_repr
2805  float cost;
2807 
2808  TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
2809  length = (len == -1 ? strlen(repr) : len);
2810  unicode_repr = new char[length + 1];
2811  strncpy(unicode_repr, repr, length);
2812  }
2813 
2815  : unicode_repr(nullptr),
2816  length(0),
2817  cost(0.0f)
2818  { // Satisfies ELISTIZE.
2819  }
2821  delete [] unicode_repr;
2822  }
2823 };
2824 
2825 ELISTIZEH(TESS_CHAR)
2826 ELISTIZE(TESS_CHAR)
2827 
2828 static void add_space(TESS_CHAR_IT* it) {
2829  TESS_CHAR *t = new TESS_CHAR(0, " ");
2830  it->add_after_then_move(t);
2831 }
2832 
2833 
2834 static float rating_to_cost(float rating) {
2835  rating = 100 + rating;
2836  // cuddled that to save from coverage profiler
2837  // (I have never seen ratings worse than -100,
2838  // but the check won't hurt)
2839  if (rating < 0) rating = 0;
2840  return rating;
2841 }
2842 
2847 static void extract_result(TESS_CHAR_IT* out,
2848  PAGE_RES* page_res) {
2849  PAGE_RES_IT page_res_it(page_res);
2850  int word_count = 0;
2851  while (page_res_it.word() != nullptr) {
2852  WERD_RES *word = page_res_it.word();
2853  const char *str = word->best_choice->unichar_string().string();
2854  const char *len = word->best_choice->unichar_lengths().string();
2855  TBOX real_rect = word->word->bounding_box();
2856 
2857  if (word_count)
2858  add_space(out);
2859  int n = strlen(len);
2860  for (int i = 0; i < n; i++) {
2861  TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
2862  str, *len);
2863  tc->box = real_rect.intersection(word->box_word->BlobBox(i));
2864  out->add_after_then_move(tc);
2865  str += *len;
2866  len++;
2867  }
2868  page_res_it.forward();
2869  word_count++;
2870  }
2871 }
2872 
2878  int** lengths,
2879  float** costs,
2880  int** x0,
2881  int** y0,
2882  int** x1,
2883  int** y1,
2884  PAGE_RES* page_res) {
2885  TESS_CHAR_LIST tess_chars;
2886  TESS_CHAR_IT tess_chars_it(&tess_chars);
2887  extract_result(&tess_chars_it, page_res);
2888  tess_chars_it.move_to_first();
2889  int n = tess_chars.length();
2890  int text_len = 0;
2891  *lengths = new int[n];
2892  *costs = new float[n];
2893  *x0 = new int[n];
2894  *y0 = new int[n];
2895  *x1 = new int[n];
2896  *y1 = new int[n];
2897  int i = 0;
2898  for (tess_chars_it.mark_cycle_pt();
2899  !tess_chars_it.cycled_list();
2900  tess_chars_it.forward(), i++) {
2901  TESS_CHAR *tc = tess_chars_it.data();
2902  text_len += (*lengths)[i] = tc->length;
2903  (*costs)[i] = tc->cost;
2904  (*x0)[i] = tc->box.left();
2905  (*y0)[i] = tc->box.bottom();
2906  (*x1)[i] = tc->box.right();
2907  (*y1)[i] = tc->box.top();
2908  }
2909  char *p = *text = new char[text_len];
2910 
2911  tess_chars_it.move_to_first();
2912  for (tess_chars_it.mark_cycle_pt();
2913  !tess_chars_it.cycled_list();
2914  tess_chars_it.forward()) {
2915  TESS_CHAR *tc = tess_chars_it.data();
2916  strncpy(p, tc->unicode_repr, tc->length);
2917  p += tc->length;
2918  }
2919  return n;
2920 }
2921 
2923 // The resulting features are returned in int_features, which must be
2924 // of size MAX_NUM_INT_FEATURES. The number of features is returned in
2925 // num_features (or 0 if there was a failure).
2926 // On return feature_outline_index is filled with an index of the outline
2927 // corresponding to each feature in int_features.
2928 // TODO(rays) Fix the caller to out outline_counts instead.
2930  INT_FEATURE_STRUCT* int_features,
2931  int* num_features,
2932  int* feature_outline_index) {
2933  GenericVector<int> outline_counts;
2936  INT_FX_RESULT_STRUCT fx_info;
2937  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2938  &cn_features, &fx_info, &outline_counts);
2939  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
2940  *num_features = 0;
2941  return; // Feature extraction failed.
2942  }
2943  *num_features = cn_features.size();
2944  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2945  // TODO(rays) Pass outline_counts back and simplify the calling code.
2946  if (feature_outline_index != nullptr) {
2947  int f = 0;
2948  for (int i = 0; i < outline_counts.size(); ++i) {
2949  while (f < outline_counts[i])
2950  feature_outline_index[f++] = i;
2951  }
2952  }
2953 }
2954 
2955 // This method returns the row to which a box of specified dimensions would
2956 // belong. If no good match is found, it returns nullptr.
2957 ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
2958  int left, int top, int right, int bottom) {
2959  TBOX box(left, bottom, right, top);
2960  BLOCK_IT b_it(blocks);
2961  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2962  BLOCK* block = b_it.data();
2963  if (!box.major_overlap(block->pdblk.bounding_box()))
2964  continue;
2965  ROW_IT r_it(block->row_list());
2966  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2967  ROW* row = r_it.data();
2968  if (!box.major_overlap(row->bounding_box()))
2969  continue;
2970  WERD_IT w_it(row->word_list());
2971  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2972  WERD* word = w_it.data();
2973  if (box.major_overlap(word->bounding_box()))
2974  return row;
2975  }
2976  }
2977  }
2978  return nullptr;
2979 }
2980 
2983  int num_max_matches,
2984  int* unichar_ids,
2985  float* ratings,
2986  int* num_matches_returned) {
2987  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
2988  tesseract_->AdaptiveClassifier(blob, choices);
2989  BLOB_CHOICE_IT choices_it(choices);
2990  int& index = *num_matches_returned;
2991  index = 0;
2992  for (choices_it.mark_cycle_pt();
2993  !choices_it.cycled_list() && index < num_max_matches;
2994  choices_it.forward()) {
2995  BLOB_CHOICE* choice = choices_it.data();
2996  unichar_ids[index] = choice->unichar_id();
2997  ratings[index] = choice->rating();
2998  ++index;
2999  }
3000  *num_matches_returned = index;
3001  delete choices;
3002 }
3003 #endif // ndef DISABLED_LEGACY_ENGINE
3004 
3005 } // namespace tesseract.
UNICHARSET * unicharset
Definition: osdetect.h:80
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:280
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1282
BLOCK_RES * block() const
Definition: pageres.h:757
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:193
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2389
void ReSegmentByClassification(PAGE_RES *page_res)
void GetLoadedLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:456
void SetOutputName(const char *name)
Definition: baseapi.cpp:286
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:536
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2491
int UNICHAR_ID
Definition: unichar.h:35
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
PolyBlockType BlockType() const
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:47
Pix * pix_grey() const
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:337
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:528
int size() const
Definition: genericvector.h:71
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:529
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
Definition: baseapi.cpp:1485
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:844
bool SetDebugVariable(const char *name, const char *value)
Definition: baseapi.cpp:299
#define TRUE
Definition: capi.h:51
GenericVector< IntParam * > int_params
Definition: params.h:44
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:63
GenericVector< BoolParam * > bool_params
Definition: params.h:45
int first_uni() const
Definition: unichar.cpp:99
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:171
#define BOOL_VAR(name, val, comment)
Definition: params.h:279
Tesseract * tesseract() const
Definition: baseapi.h:783
const char * GetInitLanguagesAsString() const
Definition: baseapi.cpp:446
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
ROW_RES * row() const
Definition: pageres.h:754
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:444
void SavePixForCrash(int resolution, Pix *pix)
Definition: globaloc.cpp:35
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:948
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:172
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:753
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:191
void set_pix_original(Pix *original_pix)
Dict & getDict() override
char * GetTSVText(int page_number)
Definition: baseapi.cpp:1707
const char kUNLVSuspect
Definition: baseapi.cpp:103
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:369
const char kUNLVReject
Definition: baseapi.cpp:101
bool GetTextDirection(int *out_offset, float *out_slope)
Definition: baseapi.cpp:2249
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
const int kBytesPerNumber
Definition: baseapi.cpp:1820
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2632
REJMAP reject_map
Definition: pageres.h:287
void * cancel_this
monitor-aware progress callback
Definition: ocrclass.h:132
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:313
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:849
const char * string() const
Definition: strngs.cpp:196
bool LoadMemBuffer(const char *name, const char *data, int size)
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:192
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
int InitLangMod(const char *datapath, const char *language)
Definition: baseapi.cpp:487
TBOX bounding_box() const
Definition: werd.cpp:159
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:89
float base_line(float xpos) const
Definition: ocrrow.h:59
bool classify_bln_numeric_mode
Definition: classify.h:541
#define DIR
Definition: polyaprx.cpp:44
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
Definition: tessedit.cpp:462
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:46
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:118
Definition: rect.h:34
Pix * pix_original() const
WERD_LIST * word_list()
Definition: ocrrow.h:55
static void ResetToDefaults(ParamsVectors *member_params)
Definition: params.cpp:196
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2600
int32_t length() const
Definition: rejctmap.h:223
Definition: werd.h:35
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
void RunAdaptiveClassifier(TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
Definition: baseapi.cpp:2982
GenericVector< StringParam * > string_params
Definition: params.h:46
const int kBlnXHeight
Definition: normalis.h:24
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:871
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
virtual Pix * GetPixRectGrey()
void set_pix_grey(Pix *grey_pix)
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2317
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
float angle() const
find angle
Definition: points.h:248
void GetFeaturesForBlob(TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
Definition: baseapi.cpp:2929
C_BLOB_LIST * blob_list()
get blobs
Definition: ocrblock.h:130
float rating() const
Definition: ratngs.h:327
virtual char * GetUTF8Text(PageIteratorLevel level) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
BLOCK_LIST * FindLinesCreateBlockList()
Definition: baseapi.cpp:2656
float certainty() const
Definition: ratngs.h:330
const int kBlnBaselineOffset
Definition: normalis.h:25
#define PERF_COUNT_START(FUNCT_NAME)
virtual Pix * GetPixRectThresholds()
const int kUniChs[]
Definition: baseapi.cpp:1885
void set_text(const char *new_text)
Definition: werd.h:124
#define UNICHAR_LEN
Definition: unichar.h:31
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:190
void SetFillLatticeFunc(FillLatticeFunc f)
Definition: baseapi.cpp:2311
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:2334
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:51
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:594
#define BOOL
Definition: capi.h:50
const char * WordRecognitionLanguage() const
uint8_t space()
Definition: werd.h:102
const char * string() const
Definition: params.h:202
bool tessedit_resegment_from_line_boxes
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:485
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:415
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:321
const int kBytesPer64BitNumber
Definition: baseapi.cpp:1828
TESS_LOCAL PAGE_RES * RecognitionPass2(BLOCK_LIST *block_list, PAGE_RES *pass1_result)
Definition: baseapi.cpp:2793
static ROW * FindRowForBox(BLOCK_LIST *blocks, int left, int top, int right, int bottom)
Definition: baseapi.cpp:2957
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1844
Pix * GetThresholdedImage()
Definition: baseapi.cpp:649
int(Dict::* DictFunc)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: baseapi.h:76
bool BeginDocument(const char *title)
Definition: renderer.cpp:72
Boxa * GetConnectedComponents(Pixa **cc)
Definition: baseapi.cpp:708
virtual std::vector< std::vector< std::pair< const char *, float > > > * GetBestLSTMSymbolChoices() const
float sconfidence
Definition: osdetect.h:45
bool GetVariableAsString(const char *name, STRING *val)
Definition: baseapi.cpp:336
OSBestResult best_result
Definition: osdetect.h:81
const char * GetDatapath()
Definition: baseapi.cpp:978
const char kTesseractReject
Definition: baseapi.cpp:99
const char * GetUnichar(int unichar_id)
Definition: baseapi.cpp:2616
int GetScaledYResolution() const
Definition: thresholder.h:93
int orientation_id
Definition: osdetect.h:43
const char * c_str() const
Definition: strngs.cpp:207
WERD_RES * restart_page()
Definition: pageres.h:698
void SetEquationDetect(EquationDetect *detector)
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
Definition: tessedit.cpp:296
const char * WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
void SetProbabilityInContextFunc(ProbabilityInContextFunc f)
Definition: baseapi.cpp:2298
void ReadDebugConfigFile(const char *filename)
Definition: baseapi.cpp:520
BLOCK * block
Definition: pageres.h:117
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:968
void split(const char c, GenericVector< STRING > *splited)
Definition: strngs.cpp:284
void TrainLineRecognizer(const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list)
Definition: linerec.cpp:43
const Dawg * GetDawg(int i) const
Definition: baseapi.cpp:2621
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode)
Definition: baseapi.cpp:2720
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int16_t left() const
Definition: rect.h:72
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:875
bool flag(WERD_FLAGS mask) const
Definition: werd.h:126
const STRING & unichar_lengths() const
Definition: ratngs.h:548
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
Definition: baseapi.cpp:2119
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:417
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
Definition: params.cpp:133
StrongScriptDirection WordDirection() const
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:32
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:720
int16_t top() const
Definition: rect.h:58
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1084
bool GetDoubleVariable(const char *name, double *value) const
Definition: baseapi.cpp:327
void chomp_string(char *str)
Definition: helpers.h:83
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:341
int RecognizeForChopTest(ETEXT_DESC *monitor)
Definition: baseapi.cpp:935
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:310
int GetThresholdedImageScaleFactor() const
Definition: baseapi.cpp:802
float x_height() const
Definition: ocrrow.h:64
static void ClearPersistentCache()
Definition: baseapi.cpp:2230
static void CatchSignals()
Definition: baseapi.cpp:259
const int kMaxIntSize
Definition: baseapi.cpp:114
UNICHARSET unicharset
Definition: ccutil.h:68
TruthCallback * truth_cb_
Definition: baseapi.h:884
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:407
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:83
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670
char * TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
Definition: baseapi.cpp:556
STRING lang
Definition: ccutil.h:66
bool major_overlap(const TBOX &box) const
Definition: rect.h:368
#define FALSE
Definition: capi.h:52
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:327
double matcher_good_threshold
Definition: classify.h:461
void add_str_double(const char *str, double number)
Definition: strngs.cpp:389
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:308
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:129
int IntCastRounded(double x)
Definition: helpers.h:168
void set_pix_thresholds(Pix *thresholds)
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2522
WERD_RES * word() const
Definition: pageres.h:751
int GetScaledEstimatedResolution() const
Definition: thresholder.h:106
#define ELISTIZE(CLASSNAME)
Definition: elst.h:961
void BestChoiceToCorrectText()
Definition: pageres.cpp:929
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
Definition: werd.h:59
Pix * pix_binary() const
ParamsVectors * params()
Definition: ccutil.h:62
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:305
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86
Boxa * GetStrips(Pixa **pixa, int **blockids)
Definition: baseapi.cpp:689
TBOX bounding_box() const
Definition: ocrrow.h:88
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:880
unsigned char BOOL8
Definition: host.h:34
TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
Definition: baseapi.cpp:2751
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
Definition: ocrrow.h:36
void SetSourceResolution(int ppi)
Definition: baseapi.cpp:604
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:514
bool empty() const
Definition: genericvector.h:90
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
TBOX bounding_box() const
Definition: blobs.cpp:478
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1213
Definition: werd.h:34
TESS_CHAR(float _cost, const char *repr, int len=-1)
Definition: baseapi.cpp:2808
void set_min_orientation_margin(double margin)
Definition: baseapi.cpp:2537
Definition: ocrblock.h:30
int length() const
Definition: ratngs.h:303
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:470
bool stream_filelist
Definition: baseapi.cpp:92
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:882
void DeleteUnusedDawgs()
Definition: dawg_cache.h:43
void SetRectangle(int left, int top, int width, int height)
Definition: baseapi.cpp:638
FILE * init_recog_training(const STRING &fname)
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
void set_source_resolution(int ppi)
void TidyUp(PAGE_RES *page_res)
void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing)
Definition: baseapi.cpp:2555
int push_back(T object)
virtual R Run()=0
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:515
int GetSourceYResolution() const
Definition: thresholder.h:90
const int kLatinChs[]
Definition: baseapi.cpp:1889
void add_str_int(const char *str, int number)
Definition: strngs.cpp:379
GenericVector< DoubleParam * > double_params
Definition: params.h:47
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:75
void SetRectangle(int left, int top, int width, int height)
const int kMinRectSize
Definition: baseapi.cpp:97
static TBLOB * MakeTBLOB(Pix *pix)
Definition: baseapi.cpp:2690
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
float rating() const
Definition: ratngs.h:80
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:872
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:167
Definition: strngs.h:45
static void DeleteBlockList(BLOCK_LIST *block_list)
Definition: baseapi.cpp:2668
void signal_exit(int signal_code)
Definition: globaloc.cpp:53
STRING datadir
Definition: ccutil.h:64
static size_t getOpenCLDevice(void **device)
Definition: baseapi.cpp:239
bool IsValidCharacter(const char *utf8_character)
Definition: baseapi.cpp:2242
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
float Confidence(PageIteratorLevel level) const
Tesseract * get_sub_lang(int index) const
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:824
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel *> *models)
STRING * language_
Last initialized language.
Definition: baseapi.h:881
static TESS_LOCAL int TesseractExtractResult(char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
Definition: baseapi.cpp:2877
OcrEngineMode oem() const
Definition: baseapi.h:785
virtual bool Next(PageIteratorLevel level)
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
void delete_data_pointers()
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:60
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:129
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82
Definition: points.h:189
const char * kOldVarsFile
Definition: baseapi.cpp:112
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:102
const char * kInputFile
Definition: baseapi.cpp:108
const UNICHARSET & getUnicharset() const
Definition: dict.h:98
struct TessResultRenderer TessResultRenderer
Definition: capi.h:83
Pix * GetBinaryImage(PageIteratorLevel level) const
const STRING & unichar_string() const
Definition: ratngs.h:541
const int kMaxBytesPerLine
Definition: baseapi.cpp:1835
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:675
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1316
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
void SetInputName(const char *name)
Definition: baseapi.cpp:278
double(Dict::* ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Definition: baseapi.h:79
int16_t right() const
Definition: rect.h:79
virtual ~TessBaseAPI()
Definition: baseapi.cpp:216
float oconfidence
Definition: osdetect.h:46
static const char * Version()
Definition: baseapi.cpp:223
bool Empty(PageIteratorLevel level) const
void ClearAdaptiveClassifier()
Definition: baseapi.cpp:579
static ROW * MakeTessOCRRow(float baseline, float xheight, float descender, float ascender)
Definition: baseapi.cpp:2673
virtual bool Next(PageIteratorLevel level)
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:568
const char * GetInputName()
Definition: baseapi.cpp:972
#define PERF_COUNT_END
bool wordrec_run_blamer
Definition: wordrec.h:237
Orientation and script detection only.
Definition: publictypes.h:164
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:111
Boxa * GetRegions(Pixa **pixa)
Definition: baseapi.cpp:663
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:60
WERD_RES * forward()
Definition: pageres.h:731
int IsValidWord(const char *word)
Definition: baseapi.cpp:2238
virtual void Run(A1, A2, A3, A4)=0
Definition: blobs.h:268
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
void CorrectClassifyWords(PAGE_RES *page_res)
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:112
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:857
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:353
constexpr int kMinCredibleResolution
Definition: publictypes.h:38
#define PERF_COUNT_SUB(SUB)
int16_t bottom() const
Definition: rect.h:65
TESSLINE * outlines
Definition: blobs.h:384
bool AnyLSTMLang() const
#define MAX_PATH
Definition: platform.h:42
void set_deadline_msecs(int32_t deadline_msecs)
Definition: ocrclass.h:152
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
Boxa * GetWords(Pixa **pixa)
Definition: baseapi.cpp:698
void SetDictFunc(DictFunc f)
Definition: baseapi.cpp:2284
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
TESS_LOCAL PAGE_RES * RecognitionPass1(BLOCK_LIST *block_list)
Definition: baseapi.cpp:2786
PDBLK pdblk
Definition: ocrblock.h:192
const int kNumbersPerBlob
Definition: baseapi.cpp:1815
int32_t length() const
Definition: strngs.cpp:191
WERD_CHOICE * best_choice
Definition: pageres.h:235
char * GetOsdText(int page_number)
Definition: baseapi.cpp:2041
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
constexpr int kMaxCredibleResolution
Definition: publictypes.h:40
tesseract::BoxWord * box_word
Definition: pageres.h:266
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:2010
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:251
int num_sub_langs() const
ResultIterator * GetIterator()
Definition: baseapi.cpp:1299
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:357
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:293
void(Wordrec::* FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: baseapi.h:86
float y() const
Definition: points.h:211
int NumDawgs() const
Definition: baseapi.cpp:2627
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1112
ROW * row
Definition: pageres.h:143
void assign(const char *cstr, int len)
Definition: strngs.cpp:420
#define ASSERT_HOST(x)
Definition: errcode.h:84
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
Returns false on error.
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:873
char * GetUTF8Text(PageIteratorLevel level) const
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1826
WERD * word
Definition: pageres.h:189
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const