tesseract  4.0.0-1-g2a2b
Advanced API

Functions

void tesseract::TessBaseAPI::SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void tesseract::TessBaseAPI::SetImage (Pix *pix)
 
void tesseract::TessBaseAPI::SetSourceResolution (int ppi)
 
void tesseract::TessBaseAPI::SetRectangle (int left, int top, int width, int height)
 
void tesseract::TessBaseAPI::SetThresholder (ImageThresholder *thresholder)
 
Pix * tesseract::TessBaseAPI::GetThresholdedImage ()
 
Boxa * tesseract::TessBaseAPI::GetRegions (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetStrips (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetWords (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetConnectedComponents (Pixa **cc)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor () const
 
PageIteratortesseract::TessBaseAPI::AnalyseLayout ()
 
PageIteratortesseract::TessBaseAPI::AnalyseLayout (bool merge_similar_words)
 
int tesseract::TessBaseAPI::Recognize (ETEXT_DESC *monitor)
 
int tesseract::TessBaseAPI::RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool tesseract::TessBaseAPI::ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIteratortesseract::TessBaseAPI::GetIterator ()
 
MutableIteratortesseract::TessBaseAPI::GetMutableIterator ()
 
char * tesseract::TessBaseAPI::GetUTF8Text ()
 
char * tesseract::TessBaseAPI::GetHOCRText (ETEXT_DESC *monitor, int page_number)
 
char * tesseract::TessBaseAPI::GetHOCRText (int page_number)
 
char * tesseract::TessBaseAPI::GetTSVText (int page_number)
 
char * tesseract::TessBaseAPI::GetBoxText (int page_number)
 
char * tesseract::TessBaseAPI::GetUNLVText ()
 
bool tesseract::TessBaseAPI::DetectOrientationScript (int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
 
char * tesseract::TessBaseAPI::GetOsdText (int page_number)
 
int tesseract::TessBaseAPI::MeanTextConf ()
 
int * tesseract::TessBaseAPI::AllWordConfidences ()
 
bool tesseract::TessBaseAPI::AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void tesseract::TessBaseAPI::Clear ()
 
void tesseract::TessBaseAPI::End ()
 
static void tesseract::TessBaseAPI::ClearPersistentCache ()
 
int tesseract::TessBaseAPI::IsValidWord (const char *word)
 
bool tesseract::TessBaseAPI::IsValidCharacter (const char *utf8_character)
 
bool tesseract::TessBaseAPI::GetTextDirection (int *out_offset, float *out_slope)
 
void tesseract::TessBaseAPI::SetDictFunc (DictFunc f)
 
void tesseract::TessBaseAPI::SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
bool tesseract::TessBaseAPI::DetectOS (OSResults *)
 
void tesseract::TessBaseAPI::GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
void tesseract::TessBaseAPI::SetFillLatticeFunc (FillLatticeFunc f)
 
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ()
 
static void tesseract::TessBaseAPI::DeleteBlockList (BLOCK_LIST *block_list)
 
static ROWtesseract::TessBaseAPI::MakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBtesseract::TessBaseAPI::MakeTBLOB (Pix *pix)
 
static void tesseract::TessBaseAPI::NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
void tesseract::TessBaseAPI::GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
static ROWtesseract::TessBaseAPI::FindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 
void tesseract::TessBaseAPI::RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * tesseract::TessBaseAPI::GetUnichar (int unichar_id)
 
const Dawgtesseract::TessBaseAPI::GetDawg (int i) const
 
int tesseract::TessBaseAPI::NumDawgs () const
 
Tesseracttesseract::TessBaseAPI::tesseract () const
 
OcrEngineMode tesseract::TessBaseAPI::oem () const
 
void tesseract::TessBaseAPI::InitTruthCallback (TruthCallback *cb)
 
void tesseract::TessBaseAPI::set_min_orientation_margin (double margin)
 

Detailed Description

The following methods break TesseractRect into pieces, so you can get hold of the thresholded image, get the text in different formats, get bounding boxes, confidences etc.

Function Documentation

◆ AdaptToWordStr()

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 2119 of file baseapi.cpp.

2119  {
2120  int debug = 0;
2121  GetIntVariable("applybox_debug", &debug);
2122  bool success = true;
2123  PageSegMode current_psm = GetPageSegMode();
2124  SetPageSegMode(mode);
2125  SetVariable("classify_enable_learning", "0");
2126  const std::unique_ptr<const char[]> text(GetUTF8Text());
2127  if (debug) {
2128  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
2129  }
2130  if (text != nullptr) {
2131  PAGE_RES_IT it(page_res_);
2132  WERD_RES* word_res = it.word();
2133  if (word_res != nullptr) {
2134  word_res->word->set_text(wordstr);
2135  // Check to see if text matches wordstr.
2136  int w = 0;
2137  int t;
2138  for (t = 0; text[t] != '\0'; ++t) {
2139  if (text[t] == '\n' || text[t] == ' ')
2140  continue;
2141  while (wordstr[w] == ' ') ++w;
2142  if (text[t] != wordstr[w])
2143  break;
2144  ++w;
2145  }
2146  if (text[t] != '\0' || wordstr[w] != '\0') {
2147  // No match.
2148  delete page_res_;
2149  GenericVector<TBOX> boxes;
2153  PAGE_RES_IT pr_it(page_res_);
2154  if (pr_it.word() == nullptr)
2155  success = false;
2156  else
2157  word_res = pr_it.word();
2158  } else {
2159  word_res->BestChoiceToCorrectText();
2160  }
2161  if (success) {
2162  tesseract_->EnableLearning = true;
2163  tesseract_->LearnWord(nullptr, word_res);
2164  }
2165  } else {
2166  success = false;
2167  }
2168  } else {
2169  success = false;
2170  }
2171  SetPageSegMode(current_psm);
2172  return success;
2173 }
void ReSegmentByClassification(PAGE_RES *page_res)
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:536
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:529
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
void set_text(const char *new_text)
Definition: werd.h:124
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
void BestChoiceToCorrectText()
Definition: pageres.cpp:929
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:305
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void TidyUp(PAGE_RES *page_res)
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:251
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:293
WERD * word
Definition: pageres.h:189

◆ AllWordConfidences()

int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 2084 of file baseapi.cpp.

2084  {
2085  if (tesseract_ == nullptr ||
2086  (!recognition_done_ && Recognize(nullptr) < 0))
2087  return nullptr;
2088  int n_word = 0;
2089  PAGE_RES_IT res_it(page_res_);
2090  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
2091  n_word++;
2092 
2093  int* conf = new int[n_word+1];
2094  n_word = 0;
2095  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
2096  WERD_RES *word = res_it.word();
2097  WERD_CHOICE* choice = word->best_choice;
2098  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
2099  // This is the eq for converting Tesseract confidence to 1..100
2100  if (w_conf < 0) w_conf = 0;
2101  if (w_conf > 100) w_conf = 100;
2102  conf[n_word++] = w_conf;
2103  }
2104  conf[n_word] = -1;
2105  return conf;
2106 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:844
float certainty() const
Definition: ratngs.h:330
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
WERD_CHOICE * best_choice
Definition: pageres.h:235
WERD * word
Definition: pageres.h:189

◆ AnalyseLayout() [1/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( )

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns nullptr on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 824 of file baseapi.cpp.

824 { return AnalyseLayout(false); }
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:824

◆ AnalyseLayout() [2/2]

PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Definition at line 826 of file baseapi.cpp.

826  {
827  if (FindLines() == 0) {
828  if (block_list_->empty())
829  return nullptr; // The page was empty.
830  page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
831  DetectParagraphs(false);
832  return new PageIterator(
836  }
837  return nullptr;
838 }
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2389
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2600
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
int GetScaledYResolution() const
Definition: thresholder.h:93
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877

◆ Clear()

void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 2182 of file baseapi.cpp.

2182  {
2183  if (thresholder_ != nullptr)
2184  thresholder_->Clear();
2185  ClearResults();
2186  if (tesseract_ != nullptr) SetInputImage(nullptr);
2187 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:46
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:968
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870

◆ ClearPersistentCache()

void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 2230 of file baseapi.cpp.

2230  {
2232 }
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:193
void DeleteUnusedDawgs()
Definition: dawg_cache.h:43

◆ DeleteBlockList()

void tesseract::TessBaseAPI::DeleteBlockList ( BLOCK_LIST *  block_list)
static

Delete a block list. This is to keep BLOCK_LIST pointer opaque and let go of including the other headers.

Definition at line 2668 of file baseapi.cpp.

2668  {
2669  delete block_list;
2670 }

◆ DetectOrientationScript()

bool tesseract::TessBaseAPI::DetectOrientationScript ( int *  orient_deg,
float *  orient_conf,
const char **  script_name,
float *  script_conf 
)

Detect the orientation of the input image and apparent script (alphabet). orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) orient_conf is the confidence (15.0 is reasonably confident) script_name is an ASCII string, the name of the script, e.g. "Latin" script_conf is confidence level in the script Returns true on success and writes values to each parameter as an output

Definition at line 2010 of file baseapi.cpp.

2012  {
2013  OSResults osr;
2014 
2015  bool osd = DetectOS(&osr);
2016  if (!osd) {
2017  return false;
2018  }
2019 
2020  int orient_id = osr.best_result.orientation_id;
2021  int script_id = osr.get_best_script(orient_id);
2022  if (orient_conf) *orient_conf = osr.best_result.oconfidence;
2023  if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees
2024 
2025  if (script_name) {
2026  const char* script = osr.unicharset->get_script_from_script_id(script_id);
2027 
2028  *script_name = script;
2029  }
2030 
2031  if (script_conf) *script_conf = osr.best_result.sconfidence;
2032 
2033  return true;
2034 }
UNICHARSET * unicharset
Definition: osdetect.h:80
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:849
float sconfidence
Definition: osdetect.h:45
OSBestResult best_result
Definition: osdetect.h:81
int orientation_id
Definition: osdetect.h:43
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2522
float oconfidence
Definition: osdetect.h:46
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:112

◆ DetectOS()

bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2522 of file baseapi.cpp.

2522  {
2523  if (tesseract_ == nullptr)
2524  return false;
2525  ClearResults();
2526  if (tesseract_->pix_binary() == nullptr &&
2528  return false;
2529  }
2530 
2531  if (input_file_ == nullptr)
2532  input_file_ = new STRING(kInputFile);
2534 }
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:190
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:2334
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
Pix * pix_binary() const
Definition: strngs.h:45
const char * kInputFile
Definition: baseapi.cpp:108
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878

◆ End()

void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 2195 of file baseapi.cpp.

2195  {
2196  Clear();
2197  delete thresholder_;
2198  thresholder_ = nullptr;
2199  delete page_res_;
2200  page_res_ = nullptr;
2201  delete block_list_;
2202  block_list_ = nullptr;
2203  if (paragraph_models_ != nullptr) {
2205  delete paragraph_models_;
2206  paragraph_models_ = nullptr;
2207  }
2208  if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
2209  delete tesseract_;
2210  tesseract_ = nullptr;
2211  delete osd_tesseract_;
2212  osd_tesseract_ = nullptr;
2213  delete equ_detect_;
2214  equ_detect_ = nullptr;
2215  delete input_file_;
2216  input_file_ = nullptr;
2217  delete output_file_;
2218  output_file_ = nullptr;
2219  delete datapath_;
2220  datapath_ = nullptr;
2221  delete language_;
2222  language_ = nullptr;
2223 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:871
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:875
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:880
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:872
STRING * language_
Last initialized language.
Definition: baseapi.h:881
void delete_data_pointers()
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878

◆ FindLinesCreateBlockList()

BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ( )

Find lines from the image making the BLOCK_LIST.

Definition at line 2656 of file baseapi.cpp.

2656  {
2657  ASSERT_HOST(FindLines() == 0);
2658  BLOCK_LIST* result = block_list_;
2659  block_list_ = nullptr;
2660  return result;
2661 }
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2389
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ FindRowForBox()

ROW * tesseract::TessBaseAPI::FindRowForBox ( BLOCK_LIST *  blocks,
int  left,
int  top,
int  right,
int  bottom 
)
static

This method returns the row to which a box of specified dimensions would belong. If no good match is found, it returns nullptr.

Definition at line 2957 of file baseapi.cpp.

2958  {
2959  TBOX box(left, bottom, right, top);
2960  BLOCK_IT b_it(blocks);
2961  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2962  BLOCK* block = b_it.data();
2963  if (!box.major_overlap(block->pdblk.bounding_box()))
2964  continue;
2965  ROW_IT r_it(block->row_list());
2966  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2967  ROW* row = r_it.data();
2968  if (!box.major_overlap(row->bounding_box()))
2969  continue;
2970  WERD_IT w_it(row->word_list());
2971  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2972  WERD* word = w_it.data();
2973  if (box.major_overlap(word->bounding_box()))
2974  return row;
2975  }
2976  }
2977  }
2978  return nullptr;
2979 }
TBOX bounding_box() const
Definition: werd.cpp:159
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:118
Definition: rect.h:34
WERD_LIST * word_list()
Definition: ocrrow.h:55
Definition: werd.h:59
TBOX bounding_box() const
Definition: ocrrow.h:88
Definition: ocrrow.h:36
Definition: ocrblock.h:30
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:60
PDBLK pdblk
Definition: ocrblock.h:192

◆ GetBlockTextOrientations()

void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2555 of file baseapi.cpp.

2556  {
2557  delete[] *block_orientation;
2558  *block_orientation = nullptr;
2559  delete[] *vertical_writing;
2560  *vertical_writing = nullptr;
2561  BLOCK_IT block_it(block_list_);
2562 
2563  block_it.move_to_first();
2564  int num_blocks = 0;
2565  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2566  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2567  continue;
2568  }
2569  ++num_blocks;
2570  }
2571  if (!num_blocks) {
2572  tprintf("WARNING: Found no blocks\n");
2573  return;
2574  }
2575  *block_orientation = new int[num_blocks];
2576  *vertical_writing = new bool[num_blocks];
2577  block_it.move_to_first();
2578  int i = 0;
2579  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2580  block_it.forward()) {
2581  if (!block_it.data()->pdblk.poly_block()->IsText()) {
2582  continue;
2583  }
2584  FCOORD re_rotation = block_it.data()->re_rotation();
2585  float re_theta = re_rotation.angle();
2586  FCOORD classify_rotation = block_it.data()->classify_rotation();
2587  float classify_theta = classify_rotation.angle();
2588  double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
2589  if (rot_theta < 0) rot_theta += 4;
2590  int num_rotations = static_cast<int>(rot_theta + 0.5);
2591  (*block_orientation)[i] = num_rotations;
2592  // The classify_rotation is non-zero only if the text has vertical
2593  // writing direction.
2594  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2595  ++i;
2596  }
2597 }
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
float angle() const
find angle
Definition: points.h:248
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: points.h:189
float y() const
Definition: points.h:211

◆ GetBoxText()

char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file. Returned string must be freed with the delete [] operator.

The recognized text is returned as a char* which is coded as a UTF8 box file. page_number is a 0-base page index that will appear in the box file. Returned string must be freed with the delete [] operator.

Definition at line 1844 of file baseapi.cpp.

1844  {
1845  if (tesseract_ == nullptr ||
1846  (!recognition_done_ && Recognize(nullptr) < 0))
1847  return nullptr;
1848  int blob_count;
1849  int utf8_length = TextLength(&blob_count);
1850  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1852  char* result = new char[total_length];
1853  result[0] = '\0';
1854  int output_length = 0;
1855  LTRResultIterator* it = GetLTRIterator();
1856  do {
1857  int left, top, right, bottom;
1858  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1859  const std::unique_ptr</*non-const*/ char[]> text(
1860  it->GetUTF8Text(RIL_SYMBOL));
1861  // Tesseract uses space for recognition failure. Fix to a reject
1862  // character, kTesseractReject so we don't create illegal box files.
1863  for (int i = 0; text[i] != '\0'; ++i) {
1864  if (text[i] == ' ')
1865  text[i] = kTesseractReject;
1866  }
1867  snprintf(result + output_length, total_length - output_length,
1868  "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
1869  right, image_height_ - top, page_number);
1870  output_length += strlen(result + output_length);
1871  // Just in case...
1872  if (output_length + kMaxBytesPerLine > total_length)
1873  break;
1874  }
1875  } while (it->Next(RIL_SYMBOL));
1876  delete it;
1877  return result;
1878 }
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1282
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2491
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:844
const char kTesseractReject
Definition: baseapi.cpp:99
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
const int kMaxBytesPerLine
Definition: baseapi.cpp:1835
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1826

◆ GetComponentImages() [1/2]

Boxa * tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not nullptr, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 720 of file baseapi.cpp.

724  {
725  PageIterator* page_it = GetIterator();
726  if (page_it == nullptr)
727  page_it = AnalyseLayout();
728  if (page_it == nullptr)
729  return nullptr; // Failed.
730 
731  // Count the components to get a size for the arrays.
732  int component_count = 0;
733  int left, top, right, bottom;
734 
735  TessResultCallback<bool>* get_bbox = nullptr;
736  if (raw_image) {
737  // Get bounding box in original raw image with padding.
739  level, raw_padding,
740  &left, &top, &right, &bottom);
741  } else {
742  // Get bounding box from binarized imaged. Note that this could be
743  // differently scaled from the original image.
744  get_bbox = NewPermanentTessCallback(page_it,
746  level, &left, &top, &right, &bottom);
747  }
748  do {
749  if (get_bbox->Run() &&
750  (!text_only || PTIsTextType(page_it->BlockType())))
751  ++component_count;
752  } while (page_it->Next(level));
753 
754  Boxa* boxa = boxaCreate(component_count);
755  if (pixa != nullptr)
756  *pixa = pixaCreate(component_count);
757  if (blockids != nullptr)
758  *blockids = new int[component_count];
759  if (paraids != nullptr)
760  *paraids = new int[component_count];
761 
762  int blockid = 0;
763  int paraid = 0;
764  int component_index = 0;
765  page_it->Begin();
766  do {
767  if (get_bbox->Run() &&
768  (!text_only || PTIsTextType(page_it->BlockType()))) {
769  Box* lbox = boxCreate(left, top, right - left, bottom - top);
770  boxaAddBox(boxa, lbox, L_INSERT);
771  if (pixa != nullptr) {
772  Pix* pix = nullptr;
773  if (raw_image) {
774  pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
775  &top);
776  } else {
777  pix = page_it->GetBinaryImage(level);
778  }
779  pixaAddPix(*pixa, pix, L_INSERT);
780  pixaAddBox(*pixa, lbox, L_CLONE);
781  }
782  if (paraids != nullptr) {
783  (*paraids)[component_index] = paraid;
784  if (page_it->IsAtFinalElement(RIL_PARA, level))
785  ++paraid;
786  }
787  if (blockids != nullptr) {
788  (*blockids)[component_index] = blockid;
789  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
790  ++blockid;
791  paraid = 0;
792  }
793  }
794  ++component_index;
795  }
796  } while (page_it->Next(level));
797  delete page_it;
798  delete get_bbox;
799  return boxa;
800 }
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
virtual R Run()=0
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:824
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
ResultIterator * GetIterator()
Definition: baseapi.cpp:1299

◆ GetComponentImages() [2/2]

Boxa* tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 461 of file baseapi.h.

463  {
464  return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
465  }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:720

◆ GetConnectedComponents()

Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 708 of file baseapi.cpp.

708  {
709  return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
710 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:720

◆ GetDawg()

const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2621 of file baseapi.cpp.

2621  {
2622  if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
2623  return tesseract_->getDict().GetDawg(i);
2624 }
Dict & getDict() override
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:417
int NumDawgs() const
Definition: baseapi.cpp:2627

◆ GetFeaturesForBlob()

void tesseract::TessBaseAPI::GetFeaturesForBlob ( TBLOB blob,
INT_FEATURE_STRUCT int_features,
int *  num_features,
int *  feature_outline_index 
)

This method returns the features associated with the input image.

This method returns the features associated with the input blob.

Definition at line 2929 of file baseapi.cpp.

2932  {
2933  GenericVector<int> outline_counts;
2936  INT_FX_RESULT_STRUCT fx_info;
2937  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2938  &cn_features, &fx_info, &outline_counts);
2939  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
2940  *num_features = 0;
2941  return; // Feature extraction failed.
2942  }
2943  *num_features = cn_features.size();
2944  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2945  // TODO(rays) Pass outline_counts back and simplify the calling code.
2946  if (feature_outline_index != nullptr) {
2947  int f = 0;
2948  for (int i = 0; i < outline_counts.size(); ++i) {
2949  while (f < outline_counts[i])
2950  feature_outline_index[f++] = i;
2951  }
2952  }
2953 }
int size() const
Definition: genericvector.h:71
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:444
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:129
bool empty() const
Definition: genericvector.h:90

◆ GetHOCRText() [1/2]

char * tesseract::TessBaseAPI::GetHOCRText ( ETEXT_DESC monitor,
int  page_number 
)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. monitor can be used to cancel the recognition receive progress callbacks Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 1485 of file baseapi.cpp.

1485  {
1486  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
1487  return nullptr;
1488 
1489  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
1490  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
1491  bool para_is_ltr = true; // Default direction is LTR
1492  const char* paragraph_lang = nullptr;
1493  bool font_info = false;
1494  GetBoolVariable("hocr_font_info", &font_info);
1495 
1496  STRING hocr_str("");
1497 
1498  if (input_file_ == nullptr)
1499  SetInputName(nullptr);
1500 
1501 #ifdef _WIN32
1502  // convert input name from ANSI encoding to utf-8
1503  int str16_len =
1504  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
1505  wchar_t *uni16_str = new WCHAR[str16_len];
1506  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1507  uni16_str, str16_len);
1508  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0,
1509  nullptr, nullptr);
1510  char *utf8_str = new char[utf8_len];
1511  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1512  utf8_len, nullptr, nullptr);
1513  *input_file_ = utf8_str;
1514  delete[] uni16_str;
1515  delete[] utf8_str;
1516 #endif
1517 
1518  hocr_str += " <div class='ocr_page'";
1519  AddIdTohOCR(&hocr_str, "page", page_id, -1);
1520  hocr_str += " title='image \"";
1521  if (input_file_) {
1522  hocr_str += HOcrEscape(input_file_->string());
1523  } else {
1524  hocr_str += "unknown";
1525  }
1526  hocr_str.add_str_int("\"; bbox ", rect_left_);
1527  hocr_str.add_str_int(" ", rect_top_);
1528  hocr_str.add_str_int(" ", rect_width_);
1529  hocr_str.add_str_int(" ", rect_height_);
1530  hocr_str.add_str_int("; ppageno ", page_number);
1531  hocr_str += "'>\n";
1532 
1533  ResultIterator *res_it = GetIterator();
1534  while (!res_it->Empty(RIL_BLOCK)) {
1535  if (res_it->Empty(RIL_WORD)) {
1536  res_it->Next(RIL_WORD);
1537  continue;
1538  }
1539 
1540  // Open any new block/paragraph/textline.
1541  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1542  para_is_ltr = true; // reset to default direction
1543  hocr_str += " <div class='ocr_carea'";
1544  AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
1545  AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
1546  }
1547  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1548  hocr_str += "\n <p class='ocr_par'";
1549  para_is_ltr = res_it->ParagraphIsLtr();
1550  if (!para_is_ltr) {
1551  hocr_str += " dir='rtl'";
1552  }
1553  AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
1554  paragraph_lang = res_it->WordRecognitionLanguage();
1555  if (paragraph_lang) {
1556  hocr_str += " lang='";
1557  hocr_str += paragraph_lang;
1558  hocr_str += "'";
1559  }
1560  AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
1561  }
1562  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1563  hocr_str += "\n <span class='ocr_line'";
1564  AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
1565  AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
1566  }
1567 
1568  // Now, process the word...
1569  std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
1571  confidencemap = res_it->GetBestLSTMSymbolChoices();
1572  }
1573  hocr_str += "\n <span class='ocrx_word'";
1574  AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
1575  int left, top, right, bottom;
1576  bool bold, italic, underlined, monospace, serif, smallcaps;
1577  int pointsize, font_id;
1578  const char *font_name;
1579  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1580  font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
1581  &monospace, &serif, &smallcaps,
1582  &pointsize, &font_id);
1583  hocr_str.add_str_int(" title='bbox ", left);
1584  hocr_str.add_str_int(" ", top);
1585  hocr_str.add_str_int(" ", right);
1586  hocr_str.add_str_int(" ", bottom);
1587  hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
1588  if (font_info) {
1589  if (font_name) {
1590  hocr_str += "; x_font ";
1591  hocr_str += HOcrEscape(font_name);
1592  }
1593  hocr_str.add_str_int("; x_fsize ", pointsize);
1594  }
1595  hocr_str += "'";
1596  const char* lang = res_it->WordRecognitionLanguage();
1597  if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
1598  hocr_str += " lang='";
1599  hocr_str += lang;
1600  hocr_str += "'";
1601  }
1602  switch (res_it->WordDirection()) {
1603  // Only emit direction if different from current paragraph direction
1604  case DIR_LEFT_TO_RIGHT:
1605  if (!para_is_ltr) hocr_str += " dir='ltr'";
1606  break;
1607  case DIR_RIGHT_TO_LEFT:
1608  if (para_is_ltr) hocr_str += " dir='rtl'";
1609  break;
1610  case DIR_MIX:
1611  case DIR_NEUTRAL:
1612  default: // Do nothing.
1613  break;
1614  }
1615  hocr_str += ">";
1616  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
1617  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
1618  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
1619  if (bold) hocr_str += "<strong>";
1620  if (italic) hocr_str += "<em>";
1621  do {
1622  const std::unique_ptr<const char[]> grapheme(
1623  res_it->GetUTF8Text(RIL_SYMBOL));
1624  if (grapheme && grapheme[0] != 0) {
1625  hocr_str += HOcrEscape(grapheme.get());
1626  }
1627  res_it->Next(RIL_SYMBOL);
1628  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1629  if (italic) hocr_str += "</em>";
1630  if (bold) hocr_str += "</strong>";
1631  // If the lstm choice mode is required it is added here
1632  if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
1633  for (size_t i = 0; i < confidencemap->size(); i++) {
1634  hocr_str += "\n <span class='ocrx_cinfo'";
1635  AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
1636  hocr_str += ">";
1637  std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
1638  for (std::pair<const char*, float> conf : timestep) {
1639  hocr_str += "<span class='ocr_glyph'";
1640  AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
1641  hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
1642  hocr_str += "'";
1643  hocr_str += ">";
1644  hocr_str += conf.first;
1645  hocr_str += "</span>";
1646  gcnt++;
1647  }
1648  hocr_str += "</span>";
1649  tcnt++;
1650  }
1651  } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
1652  for (size_t i = 0; i < confidencemap->size(); i++) {
1653  std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
1654  if (timestep.size() > 0) {
1655  hocr_str += "\n <span class='ocrx_cinfo'";
1656  AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
1657  hocr_str += " chosen='";
1658  hocr_str += timestep[0].first;
1659  hocr_str += "'>";
1660  for (size_t j = 1; j < timestep.size(); j++) {
1661  hocr_str += "<span class='ocr_glyph'";
1662  AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
1663  hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
1664  hocr_str += "'";
1665  hocr_str += ">";
1666  hocr_str += timestep[j].first;
1667  hocr_str += "</span>";
1668  gcnt++;
1669  }
1670  hocr_str += "</span>";
1671  tcnt++;
1672  }
1673  }
1674  }
1675  hocr_str += "</span>";
1676  tcnt = 1;
1677  gcnt = 1;
1678  wcnt++;
1679  // Close any ending block/paragraph/textline.
1680  if (last_word_in_line) {
1681  hocr_str += "\n </span>";
1682  lcnt++;
1683  }
1684  if (last_word_in_para) {
1685  hocr_str += "\n </p>\n";
1686  pcnt++;
1687  para_is_ltr = true; // back to default direction
1688  }
1689  if (last_word_in_block) {
1690  hocr_str += " </div>\n";
1691  bcnt++;
1692  }
1693  }
1694  hocr_str += " </div>\n";
1695 
1696  char *ret = new char[hocr_str.length() + 1];
1697  strcpy(ret, hocr_str.string());
1698  delete res_it;
1699  return ret;
1700 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:844
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2632
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:313
const char * string() const
Definition: strngs.cpp:196
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
void add_str_int(const char *str, int number)
Definition: strngs.cpp:379
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Definition: strngs.h:45
void SetInputName(const char *name)
Definition: baseapi.cpp:278
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
ResultIterator * GetIterator()
Definition: baseapi.cpp:1299

◆ GetHOCRText() [2/2]

char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays. Returned string must be freed with the delete [] operator.

Definition at line 1472 of file baseapi.cpp.

1472  {
1473  return GetHOCRText(nullptr, page_number);
1474 }
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
Definition: baseapi.cpp:1485

◆ GetIterator()

ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1299 of file baseapi.cpp.

1299  {
1300  if (tesseract_ == nullptr || page_res_ == nullptr)
1301  return nullptr;
1302  return ResultIterator::StartOfParagraph(LTRResultIterator(
1306 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
int GetScaledYResolution() const
Definition: thresholder.h:93
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877

◆ GetMutableIterator()

MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1316 of file baseapi.cpp.

1316  {
1317  if (tesseract_ == nullptr || page_res_ == nullptr)
1318  return nullptr;
1319  return new MutableIterator(page_res_, tesseract_,
1323 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
int GetScaledYResolution() const
Definition: thresholder.h:93
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877

◆ GetOsdText()

char * tesseract::TessBaseAPI::GetOsdText ( int  page_number)

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator. page_number is a 0-based page index that will appear in the osd file.

Definition at line 2041 of file baseapi.cpp.

2041  {
2042  int orient_deg;
2043  float orient_conf;
2044  const char* script_name;
2045  float script_conf;
2046 
2047  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
2048  &script_conf))
2049  return nullptr;
2050 
2051  // clockwise rotation needed to make the page upright
2052  int rotate = OrientationIdToValue(orient_deg / 90);
2053 
2054  const int kOsdBufsize = 255;
2055  char* osd_buf = new char[kOsdBufsize];
2056  snprintf(osd_buf, kOsdBufsize,
2057  "Page number: %d\n"
2058  "Orientation in degrees: %d\n"
2059  "Rotate: %d\n"
2060  "Orientation confidence: %.2f\n"
2061  "Script: %s\n"
2062  "Script confidence: %.2f\n",
2063  page_number, orient_deg, rotate, orient_conf, script_name,
2064  script_conf);
2065 
2066  return osd_buf;
2067 }
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:568
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:2010

◆ GetRegions()

Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 663 of file baseapi.cpp.

663  {
664  return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
665 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:720

◆ GetStrips()

Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 689 of file baseapi.cpp.

689  {
690  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
691 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:720

◆ GetTextDirection()

bool tesseract::TessBaseAPI::GetTextDirection ( int *  out_offset,
float *  out_slope 
)

Definition at line 2249 of file baseapi.cpp.

2249  {
2250  PageIterator* it = AnalyseLayout();
2251  if (it == nullptr) {
2252  return false;
2253  }
2254  int x1, x2, y1, y2;
2255  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
2256  // Calculate offset and slope (NOTE: Kind of ugly)
2257  if (x2 <= x1) x2 = x1 + 1;
2258  // Convert the point pair to slope/offset of the baseline (in image coords.)
2259  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
2260  *out_offset = static_cast<int>(y1 - *out_slope * x1);
2261  // Get the y-coord of the baseline at the left and right edges of the
2262  // textline's bounding box.
2263  int left, top, right, bottom;
2264  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
2265  delete it;
2266  return false;
2267  }
2268  int left_y = IntCastRounded(*out_slope * left + *out_offset);
2269  int right_y = IntCastRounded(*out_slope * right + *out_offset);
2270  // Shift the baseline down so it passes through the nearest bottom-corner
2271  // of the textline's bounding box. This is the difference between the y
2272  // at the lowest (max) edge of the box and the actual box bottom.
2273  *out_offset += bottom - std::max(left_y, right_y);
2274  // Switch back to bottom-up tesseract coordinates. Requires negation of
2275  // the slope and height - offset for the offset.
2276  *out_slope = -*out_slope;
2277  *out_offset = rect_height_ - *out_offset;
2278  delete it;
2279 
2280  return true;
2281 }
int IntCastRounded(double x)
Definition: helpers.h:168
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:824

◆ GetTextlines() [1/2]

Boxa * tesseract::TessBaseAPI::GetTextlines ( const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not nullptr, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not nullptr, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 675 of file baseapi.cpp.

676  {
677  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
678  pixa, blockids, paraids);
679 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:720

◆ GetTextlines() [2/2]

Boxa* tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 412 of file baseapi.h.

412  {
413  return GetTextlines(false, 0, pixa, blockids, nullptr);
414  }
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:675

◆ GetThresholdedImage()

Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 649 of file baseapi.cpp.

649  {
650  if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
651  if (tesseract_->pix_binary() == nullptr &&
653  return nullptr;
654  }
655  return pixClone(tesseract_->pix_binary());
656 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
virtual TESS_LOCAL bool Threshold(Pix **pix)
Definition: baseapi.cpp:2334
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
Pix * pix_binary() const

◆ GetThresholdedImageScaleFactor()

int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 802 of file baseapi.cpp.

802  {
803  if (thresholder_ == nullptr) {
804  return 0;
805  }
806  return thresholder_->GetScaleFactor();
807 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874

◆ GetTSVText()

char * tesseract::TessBaseAPI::GetTSVText ( int  page_number)

Make a TSV-formatted string from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Returned string must be freed with the delete [] operator.

Definition at line 1707 of file baseapi.cpp.

1707  {
1708  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
1709  return nullptr;
1710 
1711  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1712  int page_id = page_number + 1; // we use 1-based page numbers.
1713 
1714  STRING tsv_str("");
1715 
1716  int page_num = page_id;
1717  int block_num = 0;
1718  int par_num = 0;
1719  int line_num = 0;
1720  int word_num = 0;
1721 
1722  tsv_str.add_str_int("1\t", page_num); // level 1 - page
1723  tsv_str.add_str_int("\t", block_num);
1724  tsv_str.add_str_int("\t", par_num);
1725  tsv_str.add_str_int("\t", line_num);
1726  tsv_str.add_str_int("\t", word_num);
1727  tsv_str.add_str_int("\t", rect_left_);
1728  tsv_str.add_str_int("\t", rect_top_);
1729  tsv_str.add_str_int("\t", rect_width_);
1730  tsv_str.add_str_int("\t", rect_height_);
1731  tsv_str += "\t-1\t\n";
1732 
1733  ResultIterator* res_it = GetIterator();
1734  while (!res_it->Empty(RIL_BLOCK)) {
1735  if (res_it->Empty(RIL_WORD)) {
1736  res_it->Next(RIL_WORD);
1737  continue;
1738  }
1739 
1740  // Add rows for any new block/paragraph/textline.
1741  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1742  block_num++;
1743  par_num = 0;
1744  line_num = 0;
1745  word_num = 0;
1746  tsv_str.add_str_int("2\t", page_num); // level 2 - block
1747  tsv_str.add_str_int("\t", block_num);
1748  tsv_str.add_str_int("\t", par_num);
1749  tsv_str.add_str_int("\t", line_num);
1750  tsv_str.add_str_int("\t", word_num);
1751  AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
1752  tsv_str += "\t-1\t\n"; // end of row for block
1753  }
1754  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1755  par_num++;
1756  line_num = 0;
1757  word_num = 0;
1758  tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
1759  tsv_str.add_str_int("\t", block_num);
1760  tsv_str.add_str_int("\t", par_num);
1761  tsv_str.add_str_int("\t", line_num);
1762  tsv_str.add_str_int("\t", word_num);
1763  AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
1764  tsv_str += "\t-1\t\n"; // end of row for para
1765  }
1766  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1767  line_num++;
1768  word_num = 0;
1769  tsv_str.add_str_int("4\t", page_num); // level 4 - line
1770  tsv_str.add_str_int("\t", block_num);
1771  tsv_str.add_str_int("\t", par_num);
1772  tsv_str.add_str_int("\t", line_num);
1773  tsv_str.add_str_int("\t", word_num);
1774  AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
1775  tsv_str += "\t-1\t\n"; // end of row for line
1776  }
1777 
1778  // Now, process the word...
1779  int left, top, right, bottom;
1780  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1781  word_num++;
1782  tsv_str.add_str_int("5\t", page_num); // level 5 - word
1783  tsv_str.add_str_int("\t", block_num);
1784  tsv_str.add_str_int("\t", par_num);
1785  tsv_str.add_str_int("\t", line_num);
1786  tsv_str.add_str_int("\t", word_num);
1787  tsv_str.add_str_int("\t", left);
1788  tsv_str.add_str_int("\t", top);
1789  tsv_str.add_str_int("\t", right - left);
1790  tsv_str.add_str_int("\t", bottom - top);
1791  tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
1792  tsv_str += "\t";
1793 
1794  // Increment counts if at end of block/paragraph/textline.
1795  if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
1796  if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
1797  if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
1798 
1799  do {
1800  tsv_str +=
1801  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
1802  res_it->Next(RIL_SYMBOL);
1803  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1804  tsv_str += "\n"; // end of row
1805  wcnt++;
1806  }
1807 
1808  char* ret = new char[tsv_str.length() + 1];
1809  strcpy(ret, tsv_str.string());
1810  delete res_it;
1811  return ret;
1812 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:844
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Definition: strngs.h:45
ResultIterator * GetIterator()
Definition: baseapi.cpp:1299

◆ GetUnichar()

const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id)

This method returns the string form of the specified unichar.

Definition at line 2616 of file baseapi.cpp.

2616  {
2617  return tesseract_->unicharset.id_to_unichar(unichar_id);
2618 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
UNICHARSET unicharset
Definition: ccutil.h:68
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290

◆ GetUNLVText()

char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes. Returned string must be freed with the delete [] operator.

Definition at line 1898 of file baseapi.cpp.

1898  {
1899  if (tesseract_ == nullptr ||
1900  (!recognition_done_ && Recognize(nullptr) < 0))
1901  return nullptr;
1902  bool tilde_crunch_written = false;
1903  bool last_char_was_newline = true;
1904  bool last_char_was_tilde = false;
1905 
1906  int total_length = TextLength(nullptr);
1907  PAGE_RES_IT page_res_it(page_res_);
1908  char* result = new char[total_length];
1909  char* ptr = result;
1910  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
1911  page_res_it.forward()) {
1912  WERD_RES *word = page_res_it.word();
1913  // Process the current word.
1914  if (word->unlv_crunch_mode != CR_NONE) {
1915  if (word->unlv_crunch_mode != CR_DELETE &&
1916  (!tilde_crunch_written ||
1917  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1918  word->word->space() > 0 &&
1919  !word->word->flag(W_FUZZY_NON) &&
1920  !word->word->flag(W_FUZZY_SP)))) {
1921  if (!word->word->flag(W_BOL) &&
1922  word->word->space() > 0 &&
1923  !word->word->flag(W_FUZZY_NON) &&
1924  !word->word->flag(W_FUZZY_SP)) {
1925  /* Write a space to separate from preceding good text */
1926  *ptr++ = ' ';
1927  last_char_was_tilde = false;
1928  }
1929  if (!last_char_was_tilde) {
1930  // Write a reject char.
1931  last_char_was_tilde = true;
1932  *ptr++ = kUNLVReject;
1933  tilde_crunch_written = true;
1934  last_char_was_newline = false;
1935  }
1936  }
1937  } else {
1938  // NORMAL PROCESSING of non tilde crunched words.
1939  tilde_crunch_written = false;
1941  const char* wordstr = word->best_choice->unichar_string().string();
1942  const STRING& lengths = word->best_choice->unichar_lengths();
1943  int length = lengths.length();
1944  int i = 0;
1945  int offset = 0;
1946 
1947  if (last_char_was_tilde &&
1948  word->word->space() == 0 && wordstr[offset] == ' ') {
1949  // Prevent adjacent tilde across words - we know that adjacent tildes
1950  // within words have been removed.
1951  // Skip the first character.
1952  offset = lengths[i++];
1953  }
1954  if (i < length && wordstr[offset] != 0) {
1955  if (!last_char_was_newline)
1956  *ptr++ = ' ';
1957  else
1958  last_char_was_newline = false;
1959  for (; i < length; offset += lengths[i++]) {
1960  if (wordstr[offset] == ' ' ||
1961  wordstr[offset] == kTesseractReject) {
1962  *ptr++ = kUNLVReject;
1963  last_char_was_tilde = true;
1964  } else {
1965  if (word->reject_map[i].rejected())
1966  *ptr++ = kUNLVSuspect;
1967  UNICHAR ch(wordstr + offset, lengths[i]);
1968  int uni_ch = ch.first_uni();
1969  for (int j = 0; kUniChs[j] != 0; ++j) {
1970  if (kUniChs[j] == uni_ch) {
1971  uni_ch = kLatinChs[j];
1972  break;
1973  }
1974  }
1975  if (uni_ch <= 0xff) {
1976  *ptr++ = static_cast<char>(uni_ch);
1977  last_char_was_tilde = false;
1978  } else {
1979  *ptr++ = kUNLVReject;
1980  last_char_was_tilde = true;
1981  }
1982  }
1983  }
1984  }
1985  }
1986  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1987  /* Add a new line output */
1988  *ptr++ = '\n';
1989  tilde_crunch_written = false;
1990  last_char_was_newline = true;
1991  last_char_was_tilde = false;
1992  }
1993  }
1994  *ptr++ = '\n';
1995  *ptr = '\0';
1996  return result;
1997 }
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:280
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2491
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:844
const char kUNLVSuspect
Definition: baseapi.cpp:103
const char kUNLVReject
Definition: baseapi.cpp:101
REJMAP reject_map
Definition: pageres.h:287
const char * string() const
Definition: strngs.cpp:196
Definition: werd.h:35
const int kUniChs[]
Definition: baseapi.cpp:1885
uint8_t space()
Definition: werd.h:102
const char kTesseractReject
Definition: baseapi.cpp:99
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool flag(WERD_FLAGS mask) const
Definition: werd.h:126
const STRING & unichar_lengths() const
Definition: ratngs.h:548
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:310
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
Definition: werd.h:34
const int kLatinChs[]
Definition: baseapi.cpp:1889
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Definition: strngs.h:45
const STRING & unichar_string() const
Definition: ratngs.h:541
int32_t length() const
Definition: strngs.cpp:191
WERD_CHOICE * best_choice
Definition: pageres.h:235
WERD * word
Definition: pageres.h:189

◆ GetUTF8Text()

char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1326 of file baseapi.cpp.

1326  {
1327  if (tesseract_ == nullptr ||
1328  (!recognition_done_ && Recognize(nullptr) < 0))
1329  return nullptr;
1330  STRING text("");
1331  ResultIterator *it = GetIterator();
1332  do {
1333  if (it->Empty(RIL_PARA)) continue;
1334  const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
1335  text += para_text.get();
1336  } while (it->Next(RIL_PARA));
1337  char* result = new char[text.length() + 1];
1338  strncpy(result, text.string(), text.length() + 1);
1339  delete it;
1340  return result;
1341 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:844
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
Definition: strngs.h:45
ResultIterator * GetIterator()
Definition: baseapi.cpp:1299

◆ GetWords()

Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 698 of file baseapi.cpp.

698  {
699  return GetComponentImages(RIL_WORD, true, pixa, nullptr);
700 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:720

◆ InitTruthCallback()

void tesseract::TessBaseAPI::InitTruthCallback ( TruthCallback cb)
inline

Definition at line 787 of file baseapi.h.

787 { truth_cb_ = cb; }
TruthCallback * truth_cb_
Definition: baseapi.h:884

◆ IsValidCharacter()

bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character)

Definition at line 2242 of file baseapi.cpp.

2242  {
2243  return tesseract_->unicharset.contains_unichar(utf8_character);
2244 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
UNICHARSET unicharset
Definition: ccutil.h:68
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:670

◆ IsValidWord()

int tesseract::TessBaseAPI::IsValidWord ( const char *  word)

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 2238 of file baseapi.cpp.

2238  {
2239  return tesseract_->getDict().valid_word(word);
2240 }
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:753
Dict & getDict() override
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870

◆ MakeTBLOB()

TBLOB * tesseract::TessBaseAPI::MakeTBLOB ( Pix *  pix)
static

Returns a TBLOB corresponding to the entire input image.

Creates a TBLOB* from the whole pix.

Definition at line 2690 of file baseapi.cpp.

2690  {
2691  int width = pixGetWidth(pix);
2692  int height = pixGetHeight(pix);
2693  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
2694 
2695  // Create C_BLOBs from the page
2696  extract_edges(pix, &block);
2697 
2698  // Merge all C_BLOBs
2699  C_BLOB_LIST *list = block.blob_list();
2700  C_BLOB_IT c_blob_it(list);
2701  if (c_blob_it.empty())
2702  return nullptr;
2703  // Move all the outlines to the first blob.
2704  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2705  for (c_blob_it.forward();
2706  !c_blob_it.at_first();
2707  c_blob_it.forward()) {
2708  C_BLOB *c_blob = c_blob_it.data();
2709  ol_it.add_list_after(c_blob->out_list());
2710  }
2711  // Convert the first blob to the output TBLOB.
2712  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2713 }
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:337
#define TRUE
Definition: capi.h:51
Definition: ocrblock.h:30
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334

◆ MakeTessOCRRow()

ROW * tesseract::TessBaseAPI::MakeTessOCRRow ( float  baseline,
float  xheight,
float  descender,
float  ascender 
)
static

Returns a ROW object created from the input row specification.

Definition at line 2673 of file baseapi.cpp.

2676  {
2677  int32_t xstarts[] = {-32000};
2678  double quad_coeffs[] = {0, 0, baseline};
2679  return new ROW(1,
2680  xstarts,
2681  quad_coeffs,
2682  xheight,
2683  ascender - (baseline + xheight),
2684  descender - baseline,
2685  0,
2686  0);
2687 }
Definition: ocrrow.h:36

◆ MeanTextConf()

int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 2072 of file baseapi.cpp.

2072  {
2073  int* conf = AllWordConfidences();
2074  if (!conf) return 0;
2075  int sum = 0;
2076  int *pt = conf;
2077  while (*pt >= 0) sum += *pt++;
2078  if (pt != conf) sum /= pt - conf;
2079  delete [] conf;
2080  return sum;
2081 }

◆ NormalizeTBLOB()

void tesseract::TessBaseAPI::NormalizeTBLOB ( TBLOB tblob,
ROW row,
bool  numeric_mode 
)
static

This method baseline normalizes a TBLOB in-place. The input row is used for normalization. The denorm is an optional parameter in which the normalization-antidote is returned.

Definition at line 2720 of file baseapi.cpp.

2720  {
2721  TBOX box = tblob->bounding_box();
2722  float x_center = (box.left() + box.right()) / 2.0f;
2723  float baseline = row->base_line(x_center);
2724  float scale = kBlnXHeight / row->x_height();
2725  tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
2726  0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
2727 }
float base_line(float xpos) const
Definition: ocrrow.h:59
Definition: rect.h:34
const int kBlnXHeight
Definition: normalis.h:24
const int kBlnBaselineOffset
Definition: normalis.h:25
int16_t left() const
Definition: rect.h:72
float x_height() const
Definition: ocrrow.h:64
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:407
TBOX bounding_box() const
Definition: blobs.cpp:478
int16_t right() const
Definition: rect.h:79

◆ NumDawgs()

int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2627 of file baseapi.cpp.

2627  {
2628  return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
2629 }
Dict & getDict() override
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:415
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870

◆ oem()

OcrEngineMode tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 785 of file baseapi.h.

785 { return last_oem_requested_; }
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:882

◆ ProcessPage()

bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for desciptions of other parameters.

Definition at line 1213 of file baseapi.cpp.

1215  {
1216  PERF_COUNT_START("ProcessPage")
1217  SetInputName(filename);
1218  SetImage(pix);
1219  bool failed = false;
1220 
1221  if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
1222  // Disabled character recognition
1223  PageIterator* it = AnalyseLayout();
1224 
1225  if (it == nullptr) {
1226  failed = true;
1227  } else {
1228  delete it;
1229  }
1231  failed = FindLines() != 0;
1232  } else if (timeout_millisec > 0) {
1233  // Running with a timeout.
1234  ETEXT_DESC monitor;
1235  monitor.cancel = nullptr;
1236  monitor.cancel_this = nullptr;
1237  monitor.set_deadline_msecs(timeout_millisec);
1238 
1239  // Now run the main recognition.
1240  failed = Recognize(&monitor) < 0;
1241  } else {
1242  // Normal layout and character recognition with no timeout.
1243  failed = Recognize(nullptr) < 0;
1244  }
1245 
1247 #ifndef ANDROID_BUILD
1248  Pix* page_pix = GetThresholdedImage();
1249  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1250 #endif // ANDROID_BUILD
1251  }
1252 
1253  if (failed && retry_config != nullptr && retry_config[0] != '\0') {
1254  // Save current config variables before switching modes.
1255  FILE* fp = fopen(kOldVarsFile, "wb");
1256  if (fp == nullptr) {
1257  tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
1258  } else {
1259  PrintVariables(fp);
1260  fclose(fp);
1261  }
1262  // Switch to alternate mode for retry.
1263  ReadConfigFile(retry_config);
1264  SetImage(pix);
1265  Recognize(nullptr);
1266  // Restore saved config variables.
1268  }
1269 
1270  if (renderer && !failed) {
1271  failed = !renderer->AddImage(this);
1272  }
1273 
1275  return !failed;
1276 }
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2389
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:844
void * cancel_this
monitor-aware progress callback
Definition: ocrclass.h:132
#define PERF_COUNT_START(FUNCT_NAME)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:594
Pix * GetThresholdedImage()
Definition: baseapi.cpp:649
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:341
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:515
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:167
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:824
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:129
const char * kOldVarsFile
Definition: baseapi.cpp:112
void SetInputName(const char *name)
Definition: baseapi.cpp:278
#define PERF_COUNT_END
Orientation and script detection only.
Definition: publictypes.h:164
void set_deadline_msecs(int32_t deadline_msecs)
Definition: ocrclass.h:152

◆ ProcessPages()

bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not nullptr, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1084 of file baseapi.cpp.

1086  {
1087  bool result =
1088  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1089  #ifndef DISABLED_LEGACY_ENGINE
1090  if (result) {
1093  tprintf("Write of TR file failed: %s\n", output_file_->string());
1094  return false;
1095  }
1096  }
1097  #endif // ndef DISABLED_LEGACY_ENGINE
1098  return result;
1099 }
const char * string() const
Definition: strngs.cpp:196
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:102
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1112

◆ ProcessPagesInternal()

bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1112 of file baseapi.cpp.

1115  {
1116  PERF_COUNT_START("ProcessPages")
1117  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1118  if (stdInput) {
1119 #ifdef WIN32
1120  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1121  tprintf("ERROR: cin to binary: %s", strerror(errno));
1122 #endif // WIN32
1123  }
1124 
1125  if (stream_filelist) {
1126  return ProcessPagesFileList(stdin, nullptr, retry_config,
1127  timeout_millisec, renderer,
1129  }
1130 
1131  // At this point we are officially in autodection territory.
1132  // That means any data in stdin must be buffered, to make it
1133  // seekable.
1134  std::string buf;
1135  const l_uint8 *data = nullptr;
1136  if (stdInput) {
1137  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1138  (std::istreambuf_iterator<char>()));
1139  data = reinterpret_cast<const l_uint8 *>(buf.data());
1140  } else {
1141  // Check whether the input file can be read.
1142  if (FILE* file = fopen(filename, "rb")) {
1143  fclose(file);
1144  } else {
1145  fprintf(stderr, "Error, cannot read input file %s: %s\n",
1146  filename, strerror(errno));
1147  return false;
1148  }
1149  }
1150 
1151  // Here is our autodetection
1152  int format;
1153  int r = (stdInput) ?
1154  findFileFormatBuffer(data, &format) :
1155  findFileFormat(filename, &format);
1156 
1157  // Maybe we have a filelist
1158  if (r != 0 || format == IFF_UNKNOWN) {
1159  STRING s;
1160  if (stdInput) {
1161  s = buf.c_str();
1162  } else {
1163  std::ifstream t(filename);
1164  std::string u((std::istreambuf_iterator<char>(t)),
1165  std::istreambuf_iterator<char>());
1166  s = u.c_str();
1167  }
1168  return ProcessPagesFileList(nullptr, &s, retry_config,
1169  timeout_millisec, renderer,
1171  }
1172 
1173  // Maybe we have a TIFF which is potentially multipage
1174  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1175  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1176  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1177  format == IFF_TIFF_ZIP);
1178 
1179  // Fail early if we can, before producing any output
1180  Pix *pix = nullptr;
1181  if (!tiff) {
1182  pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
1183  if (pix == nullptr) {
1184  return false;
1185  }
1186  }
1187 
1188  // Begin the output
1189  if (renderer && !renderer->BeginDocument(unknown_title_)) {
1190  pixDestroy(&pix);
1191  return false;
1192  }
1193 
1194  // Produce output
1195  r = (tiff) ?
1196  ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1197  timeout_millisec, renderer,
1199  ProcessPage(pix, 0, filename, retry_config,
1200  timeout_millisec, renderer);
1201 
1202  // Clean up memory as needed
1203  pixDestroy(&pix);
1204 
1205  // End the output
1206  if (!r || (renderer && !renderer->EndDocument())) {
1207  return false;
1208  }
1210  return true;
1211 }
#define PERF_COUNT_START(FUNCT_NAME)
const char * c_str() const
Definition: strngs.cpp:207
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1213
bool stream_filelist
Definition: baseapi.cpp:92
Definition: strngs.h:45
#define PERF_COUNT_END

◆ Recognize()

int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 844 of file baseapi.cpp.

844  {
845  if (tesseract_ == nullptr)
846  return -1;
847  if (FindLines() != 0)
848  return -1;
849  delete page_res_;
850  if (block_list_->empty()) {
851  page_res_ = new PAGE_RES(false, block_list_,
853  return 0; // Empty page.
854  }
855 
857  recognition_done_ = true;
858 #ifndef DISABLED_LEGACY_ENGINE
863  } else
864 #endif // ndef DISABLED_LEGACY_ENGINE
865  {
868  }
869 
870  if (page_res_ == nullptr) {
871  return -1;
872  }
873 
877  return 0;
878  }
879 #ifndef DISABLED_LEGACY_ENGINE
882  return 0;
883  }
884 #endif // ndef DISABLED_LEGACY_ENGINE
885 
886  if (truth_cb_ != nullptr) {
887  tesseract_->wordrec_run_blamer.set_value(true);
888  PageIterator *page_it = new PageIterator(
893  image_height_, page_it, this->tesseract()->pix_grey());
894  delete page_it;
895  }
896 
897  int result = 0;
899  #ifndef GRAPHICS_DISABLED
901  #endif // GRAPHICS_DISABLED
902  // The page_res is invalid after an interactive session, so cleanup
903  // in a way that lets us continue to the next page without crashing.
904  delete page_res_;
905  page_res_ = nullptr;
906  return -1;
907  #ifndef DISABLED_LEGACY_ENGINE
909  STRING fontname;
910  ExtractFontName(*output_file_, &fontname);
912  } else if (tesseract_->tessedit_ambigs_training) {
913  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
914  // OCR the page segmented into words by tesseract.
916  *input_file_, page_res_, monitor, training_output_file);
917  fclose(training_output_file);
918  #endif // ndef DISABLED_LEGACY_ENGINE
919  } else {
920  // Now run the main recognition.
921  bool wait_for_text = true;
922  GetBoolVariable("paragraph_text_based", &wait_for_text);
923  if (!wait_for_text) DetectParagraphs(false);
924  if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
925  if (wait_for_text) DetectParagraphs(true);
926  } else {
927  result = -1;
928  }
929  }
930  return result;
931 }
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2389
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:47
Pix * pix_grey() const
Tesseract * tesseract() const
Definition: baseapi.h:783
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
Dict & getDict() override
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:313
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2600
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
bool tessedit_resegment_from_line_boxes
int GetScaledYResolution() const
Definition: thresholder.h:93
void TrainLineRecognizer(const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list)
Definition: linerec.cpp:43
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
TruthCallback * truth_cb_
Definition: baseapi.h:884
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:327
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:308
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
FILE * init_recog_training(const STRING &fname)
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Definition: strngs.h:45
const UNICHARSET & getUnicharset() const
Definition: dict.h:98
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
bool wordrec_run_blamer
Definition: wordrec.h:237
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
virtual void Run(A1, A2, A3, A4)=0
void CorrectClassifyWords(PAGE_RES *page_res)
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
bool AnyLSTMLang() const

◆ RecognizeForChopTest()

int tesseract::TessBaseAPI::RecognizeForChopTest ( ETEXT_DESC monitor)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)Variant on Recognize used for testing chopper.

Tests the chopper by exhaustively running chop_one_blob.

Definition at line 935 of file baseapi.cpp.

935  {
936  if (tesseract_ == nullptr)
937  return -1;
938  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
939  tprintf("Please call SetImage before attempting recognition.\n");
940  return -1;
941  }
942  if (page_res_ != nullptr)
943  ClearResults();
944  if (FindLines() != 0)
945  return -1;
946  // Additional conditions under which chopper test cannot be run
947  if (tesseract_->interactive_display_mode) return -1;
948 
949  recognition_done_ = true;
950 
951  page_res_ = new PAGE_RES(false, block_list_,
953 
954  PAGE_RES_IT page_res_it(page_res_);
955 
956  while (page_res_it.word() != nullptr) {
957  WERD_RES *word_res = page_res_it.word();
958  GenericVector<TBOX> boxes;
959  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
960  page_res_it.row()->row, word_res);
961  page_res_it.forward();
962  }
963  return 0;
964 }
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2389
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:51
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
WERD * word
Definition: pageres.h:189

◆ RunAdaptiveClassifier()

void tesseract::TessBaseAPI::RunAdaptiveClassifier ( TBLOB blob,
int  num_max_matches,
int *  unichar_ids,
float *  ratings,
int *  num_matches_returned 
)

Method to run adaptive classifier on a blob. It returns at max num_max_matches results.

Method to run adaptive classifier on a blob.

Definition at line 2982 of file baseapi.cpp.

2986  {
2987  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
2988  tesseract_->AdaptiveClassifier(blob, choices);
2989  BLOB_CHOICE_IT choices_it(choices);
2990  int& index = *num_matches_returned;
2991  index = 0;
2992  for (choices_it.mark_cycle_pt();
2993  !choices_it.cycled_list() && index < num_max_matches;
2994  choices_it.forward()) {
2995  BLOB_CHOICE* choice = choices_it.data();
2996  unichar_ids[index] = choice->unichar_id();
2997  ratings[index] = choice->rating();
2998  ++index;
2999  }
3000  *num_matches_returned = index;
3001  delete choices;
3002 }
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:192
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
float rating() const
Definition: ratngs.h:80
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77

◆ set_min_orientation_margin()

void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2537 of file baseapi.cpp.

2537  {
2538  tesseract_->min_orientation_margin.set_value(margin);
2539 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870

◆ SetDictFunc()

void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 2284 of file baseapi.cpp.

2284  {
2285  if (tesseract_ != nullptr) {
2287  }
2288 }
Dict & getDict() override
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:357

◆ SetFillLatticeFunc()

void tesseract::TessBaseAPI::SetFillLatticeFunc ( FillLatticeFunc  f)

Sets Wordrec::fill_lattice_ function to point to the given function.

Definition at line 2311 of file baseapi.cpp.

2311  {
2312  if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
2313 }
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:485
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870

◆ SetImage() [1/2]

void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Copies the image buffer and converts to Pix. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 594 of file baseapi.cpp.

596  {
597  if (InternalSetImage()) {
598  thresholder_->SetImage(imagedata, width, height,
599  bytes_per_pixel, bytes_per_line);
601  }
602 }
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:63
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2317
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:968

◆ SetImage() [2/2]

void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract takes its own copy of the image, so it need not persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. Tesseract uses Pix as its internal representation and it is therefore more efficient to provide a Pix directly.

Definition at line 619 of file baseapi.cpp.

619  {
620  if (InternalSetImage()) {
621  if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
622  // remove alpha channel from png
623  PIX* p1 = pixRemoveAlpha(pix);
624  pixSetSpp(p1, 3);
625  pix = pixCopy(nullptr, p1);
626  pixDestroy(&p1);
627  }
628  thresholder_->SetImage(pix);
630  }
631 }
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:63
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2317
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:968

◆ SetProbabilityInContextFunc()

void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 2298 of file baseapi.cpp.

2298  {
2299  if (tesseract_ != nullptr) {
2301  // Set it for the sublangs too.
2302  int num_subs = tesseract_->num_sub_langs();
2303  for (int i = 0; i < num_subs; ++i) {
2305  }
2306  }
2307 }
Dict & getDict() override
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:369
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
Tesseract * get_sub_lang(int index) const
int num_sub_langs() const

◆ SetRectangle()

void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Definition at line 638 of file baseapi.cpp.

638  {
639  if (thresholder_ == nullptr)
640  return;
641  thresholder_->SetRectangle(left, top, width, height);
642  ClearResults();
643 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
void SetRectangle(int left, int top, int width, int height)

◆ SetSourceResolution()

void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 604 of file baseapi.cpp.

604  {
605  if (thresholder_)
607  else
608  tprintf("Please call SetImage before SetSourceResolution.\n");
609 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

◆ SetThresholder()

void tesseract::TessBaseAPI::SetThresholder ( ImageThresholder thresholder)
inline

In extreme cases only, usually with a subclass of Thresholder, it is possible to provide a different Thresholder. The Thresholder may be preloaded with an image, settings etc, or they may be set after. Note that Tesseract takes ownership of the Thresholder and will delete it when it it is replaced or the API is destructed.

Definition at line 376 of file baseapi.h.

376  {
377  delete thresholder_;
378  thresholder_ = thresholder;
379  ClearResults();
380  }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874

◆ tesseract()

Tesseract* tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 783 of file baseapi.h.

783 { return tesseract_; }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870