tesseract  5.0.0-alpha-619-ge9db
baseapi.h
Go to the documentation of this file.
1 // File: baseapi.h
3 // Description: Simple API for calling tesseract.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_API_BASEAPI_H_
20 #define TESSERACT_API_BASEAPI_H_
21 
22 #include <cstdio>
23 #include <functional> // for std::function
24 
25 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
26 // complexity of includes here. Use forward declarations wherever possible
27 // and hide includes of complex types in baseapi.cpp.
28 #include <tesseract/version.h>
29 
30 #include "apitypes.h"
31 #include "pageiterator.h"
32 #include "platform.h"
33 #include "publictypes.h"
34 #include "resultiterator.h"
35 #include "serialis.h"
36 #include "thresholder.h"
37 #include "unichar.h"
38 
39 template <typename T>
41 class PAGE_RES;
42 class PAGE_RES_IT;
43 class ParagraphModel;
44 struct BlamerBundle;
45 class BLOCK_LIST;
46 class DENORM;
47 class MATRIX;
48 class ROW;
49 class STRING;
50 class WERD;
51 struct Pix;
52 struct Box;
53 struct Pixa;
54 struct Boxa;
55 class ETEXT_DESC;
56 struct OSResults;
57 class TBOX;
58 class UNICHARSET;
59 class WERD_CHOICE_LIST;
60 
61 struct INT_FEATURE_STRUCT;
63 struct TBLOB;
64 
65 namespace tesseract {
66 
67 class Dawg;
68 class Dict;
69 class EquationDetect;
70 class PageIterator;
71 class LTRResultIterator;
72 class ResultIterator;
73 class MutableIterator;
74 class TessResultRenderer;
75 class Tesseract;
76 class Trie;
77 class Wordrec;
78 
79 using DictFunc = int (Dict::*)(void*, const UNICHARSET&, UNICHAR_ID,
80  bool) const;
81 using ProbabilityInContextFunc = double (Dict::*)(const char*, const char*, int,
82  const char*, int);
83 using ParamsModelClassifyFunc = float (Dict::*)(const char*, void*);
84 using FillLatticeFunc = void (Wordrec::*)(const MATRIX&,
85  const WERD_CHOICE_LIST&,
87 using TruthCallback =
88  std::function<void(const UNICHARSET&, int, PageIterator*, Pix*)>;
89 
99  public:
100  TessBaseAPI();
101  virtual ~TessBaseAPI();
102  // Copy constructor and assignment operator are currently unsupported.
103  TessBaseAPI(TessBaseAPI const&) = delete;
104  TessBaseAPI& operator=(TessBaseAPI const&) = delete;
105 
109  static const char* Version();
110 
118  static size_t getOpenCLDevice(void** device);
119 
124  void SetInputName(const char* name);
132  const char* GetInputName();
133  // Takes ownership of the input pix.
134  void SetInputImage(Pix* pix);
135  Pix* GetInputImage();
136  int GetSourceYResolution();
137  const char* GetDatapath();
138 
140  void SetOutputName(const char* name);
141 
155  bool SetVariable(const char* name, const char* value);
156  bool SetDebugVariable(const char* name, const char* value);
157 
162  bool GetIntVariable(const char* name, int* value) const;
163  bool GetBoolVariable(const char* name, bool* value) const;
164  bool GetDoubleVariable(const char* name, double* value) const;
165 
170  const char* GetStringVariable(const char* name) const;
171 
175  void PrintVariables(FILE* fp) const;
176 
180  bool GetVariableAsString(const char* name, STRING* val);
181 
219  int Init(const char* datapath, const char* language, OcrEngineMode mode,
220  char** configs, int configs_size,
221  const GenericVector<STRING>* vars_vec,
222  const GenericVector<STRING>* vars_values,
223  bool set_only_non_debug_params);
224  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
225  return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
226  }
227  int Init(const char* datapath, const char* language) {
228  return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
229  false);
230  }
231  // In-memory version reads the traineddata file directly from the given
232  // data[data_size] array, and/or reads data via a FileReader.
233  int Init(const char* data, int data_size, const char* language,
234  OcrEngineMode mode, char** configs, int configs_size,
235  const GenericVector<STRING>* vars_vec,
236  const GenericVector<STRING>* vars_values,
237  bool set_only_non_debug_params, FileReader reader);
238 
247  const char* GetInitLanguagesAsString() const;
248 
254  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
255 
259  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
260 
267  int InitLangMod(const char* datapath, const char* language);
268 
273  void InitForAnalysePage();
274 
281  void ReadConfigFile(const char* filename);
283  void ReadDebugConfigFile(const char* filename);
284 
290  void SetPageSegMode(PageSegMode mode);
291 
293  PageSegMode GetPageSegMode() const;
294 
312  char* TesseractRect(const unsigned char* imagedata, int bytes_per_pixel,
313  int bytes_per_line, int left, int top, int width,
314  int height);
315 
320  void ClearAdaptiveClassifier();
321 
328  /* @{ */
329 
337  void SetImage(const unsigned char* imagedata, int width, int height,
338  int bytes_per_pixel, int bytes_per_line);
339 
348  void SetImage(Pix* pix);
349 
354  void SetSourceResolution(int ppi);
355 
361  void SetRectangle(int left, int top, int width, int height);
362 
370  void SetThresholder(ImageThresholder* thresholder) {
371  delete thresholder_;
372  thresholder_ = thresholder;
373  ClearResults();
374  }
375 
381  Pix* GetThresholdedImage();
382 
388  Boxa* GetRegions(Pixa** pixa);
389 
401  Boxa* GetTextlines(bool raw_image, int raw_padding, Pixa** pixa,
402  int** blockids, int** paraids);
403  /*
404  Helper method to extract from the thresholded image. (most common usage)
405  */
406  Boxa* GetTextlines(Pixa** pixa, int** blockids) {
407  return GetTextlines(false, 0, pixa, blockids, nullptr);
408  }
409 
418  Boxa* GetStrips(Pixa** pixa, int** blockids);
419 
425  Boxa* GetWords(Pixa** pixa);
426 
435  Boxa* GetConnectedComponents(Pixa** cc);
436 
449  Boxa* GetComponentImages(PageIteratorLevel level, bool text_only,
450  bool raw_image, int raw_padding, Pixa** pixa,
451  int** blockids, int** paraids);
452  // Helper function to get binary images with no padding (most common usage).
453  Boxa* GetComponentImages(const PageIteratorLevel level, const bool text_only,
454  Pixa** pixa, int** blockids) {
455  return GetComponentImages(level, text_only, false, 0, pixa, blockids,
456  nullptr);
457  }
458 
465  int GetThresholdedImageScaleFactor() const;
466 
482  PageIterator* AnalyseLayout();
483  PageIterator* AnalyseLayout(bool merge_similar_words);
484 
491  int Recognize(ETEXT_DESC* monitor);
492 
498 #ifndef DISABLED_LEGACY_ENGINE
499 
500  int RecognizeForChopTest(ETEXT_DESC* monitor);
501 #endif
502 
525  bool ProcessPages(const char* filename, const char* retry_config,
526  int timeout_millisec, TessResultRenderer* renderer);
527  // Does the real work of ProcessPages.
528  bool ProcessPagesInternal(const char* filename, const char* retry_config,
529  int timeout_millisec, TessResultRenderer* renderer);
530 
540  bool ProcessPage(Pix* pix, int page_index, const char* filename,
541  const char* retry_config, int timeout_millisec,
542  TessResultRenderer* renderer);
543 
552  ResultIterator* GetIterator();
553 
562  MutableIterator* GetMutableIterator();
563 
568  char* GetUTF8Text();
569 
579  char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
580 
587  char* GetHOCRText(int page_number);
588 
593  char* GetAltoText(ETEXT_DESC* monitor, int page_number);
594 
599  char* GetAltoText(int page_number);
600 
606  char* GetTSVText(int page_number);
607 
614  char* GetLSTMBoxText(int page_number);
615 
623  char* GetBoxText(int page_number);
624 
631  char* GetWordStrBoxText(int page_number);
632 
638  char* GetUNLVText();
639 
649  bool DetectOrientationScript(int* orient_deg, float* orient_conf,
650  const char** script_name, float* script_conf);
651 
657  char* GetOsdText(int page_number);
658 
660  int MeanTextConf();
667  int* AllWordConfidences();
668 
669 #ifndef DISABLED_LEGACY_ENGINE
670 
680  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
681 #endif // ndef DISABLED_LEGACY_ENGINE
682 
689  void Clear();
690 
697  void End();
698 
706  static void ClearPersistentCache();
707 
714  int IsValidWord(const char* word);
715  // Returns true if utf8_character is defined in the UniCharset.
716  bool IsValidCharacter(const char* utf8_character);
717 
718  bool GetTextDirection(int* out_offset, float* out_slope);
719 
721  void SetDictFunc(DictFunc f);
722 
726  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
727 
732  bool DetectOS(OSResults*);
733 
738  void GetBlockTextOrientations(int** block_orientation,
739  bool** vertical_writing);
740 
741 #ifndef DISABLED_LEGACY_ENGINE
742 
744  void SetFillLatticeFunc(FillLatticeFunc f);
745 
747  BLOCK_LIST* FindLinesCreateBlockList();
748 
754  static void DeleteBlockList(BLOCK_LIST* block_list);
755 
757  static ROW* MakeTessOCRRow(float baseline, float xheight, float descender,
758  float ascender);
759 
761  static TBLOB* MakeTBLOB(Pix* pix);
762 
768  static void NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode);
769 
771  void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
772  int* num_features, int* feature_outline_index);
773 
778  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right,
779  int bottom);
780 
785  void RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, int* unichar_ids,
786  float* ratings, int* num_matches_returned);
787 #endif // ndef DISABLED_LEGACY_ENGINE
788 
790  const char* GetUnichar(int unichar_id);
791 
793  const Dawg* GetDawg(int i) const;
794 
796  int NumDawgs() const;
797 
798  Tesseract* tesseract() const {
799  return tesseract_;
800  }
801 
802  OcrEngineMode oem() const {
803  return last_oem_requested_;
804  }
805 
807  truth_cb_ = cb;
808  }
809 
810  void set_min_orientation_margin(double margin);
811  /* @} */
812 
813  protected:
816  TESS_LOCAL bool InternalSetImage();
817 
822  TESS_LOCAL virtual bool Threshold(Pix** pix);
823 
828  TESS_LOCAL int FindLines();
829 
831  void ClearResults();
832 
838  TESS_LOCAL LTRResultIterator* GetLTRIterator();
839 
846  TESS_LOCAL int TextLength(int* blob_count);
847 
849  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
850 
851 #ifndef DISABLED_LEGACY_ENGINE
852 
854  /* @{ */
855 
860  TESS_LOCAL void AdaptToCharacter(const char* unichar_repr, int length,
861  float baseline, float xheight,
862  float descender, float ascender);
863 
865  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
866 
867  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
868  PAGE_RES* pass1_result);
869 
874  TESS_LOCAL static int TesseractExtractResult(char** text, int** lengths,
875  float** costs, int** x0,
876  int** y0, int** x1, int** y1,
877  PAGE_RES* page_res);
878 
879  TESS_LOCAL const PAGE_RES* GetPageRes() const {
880  return page_res_;
881  }
882  /* @} */
883 #endif // ndef DISABLED_LEGACY_ENGINE
884 
885  protected:
892  BLOCK_LIST* block_list_;
901 
906  /* @{ */
913  /* @} */
914 
915  private:
916  // A list of image filenames gets special consideration
917  bool ProcessPagesFileList(FILE* fp, STRING* buf, const char* retry_config,
918  int timeout_millisec, TessResultRenderer* renderer,
919  int tessedit_page_number);
920  // TIFF supports multipage so gets special consideration.
921  bool ProcessPagesMultipageTiff(const unsigned char* data, size_t size,
922  const char* filename, const char* retry_config,
923  int timeout_millisec,
924  TessResultRenderer* renderer,
925  int tessedit_page_number);
926 }; // class TessBaseAPI.
927 
929 STRING HOcrEscape(const char* text);
930 } // namespace tesseract.
931 
932 #endif // TESSERACT_API_BASEAPI_H_
tesseract::TessBaseAPI::Init
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:224
tesseract::TessBaseAPI::page_res_
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:893
tesseract::TessBaseAPI::last_oem_requested_
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:898
tesseract::EquationDetect
Definition: equationdetect.h:38
tesseract::DictFunc
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
Definition: baseapi.h:80
tesseract::TessBaseAPI::image_height_
int image_height_
Definition: baseapi.h:912
tesseract::Wordrec
Definition: wordrec.h:192
tesseract::TessBaseAPI::paragraph_models_
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:891
TESS_LOCAL
#define TESS_LOCAL
Definition: platform.h:55
tesseract::Trie
Definition: trie.h:54
tesseract::TessBaseAPI::SetThresholder
void SetThresholder(ImageThresholder *thresholder)
Definition: baseapi.h:370
baseline
Definition: mfoutline.h:62
tesseract::TessBaseAPI::Init
int Init(const char *datapath, const char *language)
Definition: baseapi.h:227
tesseract::Tesseract
Definition: tesseractclass.h:172
tesseract::PageIterator
Definition: pageiterator.h:52
platform.h
MATRIX
Definition: matrix.h:574
OSResults
Definition: osdetect.h:50
tesseract::TessBaseAPI::block_list_
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:892
STRING
Definition: strngs.h:45
tesseract::TessBaseAPI::rect_top_
int rect_top_
Definition: baseapi.h:908
apitypes.h
tesseract::FileReader
bool(*)(const char *filename, GenericVector< char > *data) FileReader
Definition: serialis.h:47
tesseract::TessBaseAPI::GetPageRes
const TESS_LOCAL PAGE_RES * GetPageRes() const
Definition: baseapi.h:879
tesseract::ProbabilityInContextFunc
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
Definition: baseapi.h:82
tesseract::TessBaseAPI::rect_height_
int rect_height_
Definition: baseapi.h:910
ETEXT_DESC
Definition: ocrclass.h:95
tesseract::LTRResultIterator
Definition: ltrresultiterator.h:47
ParagraphModel
Definition: ocrpara.h:114
tesseract::TessBaseAPI::InitTruthCallback
void InitTruthCallback(TruthCallback cb)
Definition: baseapi.h:806
resultiterator.h
tesseract::ParamsModelClassifyFunc
float(Dict::*)(const char *, void *) ParamsModelClassifyFunc
Definition: baseapi.h:83
tesseract::OcrEngineMode
OcrEngineMode
Definition: publictypes.h:265
tesseract::TessBaseAPI::truth_cb_
TruthCallback truth_cb_
fxn for setting truth_* in WERD_RES
Definition: baseapi.h:900
tesseract::TessBaseAPI::rect_width_
int rect_width_
Definition: baseapi.h:909
tesseract::TessBaseAPI::equ_detect_
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:888
tesstrain_utils.int
int
Definition: tesstrain_utils.py:154
tesseract::TessBaseAPI::osd_tesseract_
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:887
tesseract::OEM_DEFAULT
Definition: publictypes.h:271
tesseract::TessBaseAPI::tesseract_
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:886
publictypes.h
tesseract::TessBaseAPI
Definition: baseapi.h:98
tesseract::TessBaseAPI::input_file_
STRING * input_file_
Name used by training code.
Definition: baseapi.h:894
UNICHARSET
Definition: unicharset.h:145
tesseract::TessBaseAPI::recognition_done_
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:899
tesseract::PageIteratorLevel
PageIteratorLevel
Definition: publictypes.h:216
pageiterator.h
tesseract
Definition: baseapi.h:65
PAGE_RES
Definition: pageres.h:73
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
GenericVector
Definition: baseapi.h:40
PAGE_RES_IT
Definition: pageres.h:668
tesseract::TessBaseAPI::thresholder_
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:890
tesseract::ResultIterator
Definition: resultiterator.h:44
tesseract::TessBaseAPI::rect_left_
int rect_left_
Definition: baseapi.h:907
tesseract::Dict
Definition: dict.h:91
thresholder.h
tesseract::TessBaseAPI::reader_
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:889
tesseract::TessBaseAPI::GetComponentImages
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:453
tesseract::TessResultRenderer
Definition: renderer.h:49
tesseract::Dawg
Definition: dawg.h:113
INT_FEATURE_STRUCT
Definition: intproto.h:131
TBLOB
Definition: blobs.h:282
tesseract::PageSegMode
PageSegMode
Definition: publictypes.h:159
WERD
Definition: werd.h:55
unichar.h
ROW
Definition: ocrrow.h:35
TESS_API
#define TESS_API
Definition: platform.h:54
tesseract::MutableIterator
Definition: mutableiterator.h:44
tesseract::TessBaseAPI::GetTextlines
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:406
tesseract::DetectParagraphs
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
Definition: paragraphs.cpp:2284
serialis.h
TessBaseAPI
struct TessBaseAPI TessBaseAPI
Definition: capi.h:72
tesseract::TessBaseAPI::datapath_
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:896
tesseract::TessBaseAPI::tesseract
Tesseract * tesseract() const
Definition: baseapi.h:798
BlamerBundle
Definition: blamer.h:103
tesseract::TruthCallback
std::function< void(const UNICHARSET &, int, PageIterator *, Pix *)> TruthCallback
Definition: baseapi.h:88
tesseract::ImageThresholder
Definition: thresholder.h:35
tesseract::TessBaseAPI::oem
OcrEngineMode oem() const
Definition: baseapi.h:802
tesseract::FillLatticeFunc
void(Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *) FillLatticeFunc
Definition: baseapi.h:86
tesseract::TessBaseAPI::image_width_
int image_width_
Definition: baseapi.h:911
tesseract::TessBaseAPI::output_file_
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:895
tesseract::TessBaseAPI::language_
STRING * language_
Last initialized language.
Definition: baseapi.h:897
tesseract::HOcrEscape
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2307
TBOX
Definition: rect.h:33
DENORM
Definition: normalis.h:49