tesseract  4.0.0-1-g2a2b
ocropus add-ons

Functions

TESS_LOCAL void tesseract::TessBaseAPI::AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
 
TESS_LOCAL PAGE_REStesseract::TessBaseAPI::RecognitionPass1 (BLOCK_LIST *block_list)
 
TESS_LOCAL PAGE_REStesseract::TessBaseAPI::RecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result)
 
static TESS_LOCAL int tesseract::TessBaseAPI::TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
 
TESS_LOCAL const PAGE_REStesseract::TessBaseAPI::GetPageRes () const
 

Detailed Description

Function Documentation

◆ AdaptToCharacter()

void tesseract::TessBaseAPI::AdaptToCharacter ( const char *  unichar_repr,
int  length,
float  baseline,
float  xheight,
float  descender,
float  ascender 
)
protected

Adapt to recognize the current image as the given character. The image must be preloaded and be just an image of a single character.

Adapt to recognize the current image as the given character. The image must be preloaded into pix_binary_ and be just an image of a single character.

Definition at line 2751 of file baseapi.cpp.

2756  {
2757  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
2758  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
2760  tesseract_->pix_binary());
2761  float threshold;
2762  float best_rating = -100;
2763 
2764 
2765  // Classify to get a raw choice.
2766  BLOB_CHOICE_LIST choices;
2767  tesseract_->AdaptiveClassifier(blob, &choices);
2768  BLOB_CHOICE_IT choice_it;
2769  choice_it.set_to_list(&choices);
2770  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2771  choice_it.forward()) {
2772  if (choice_it.data()->rating() > best_rating) {
2773  best_rating = choice_it.data()->rating();
2774  }
2775  }
2776 
2777  threshold = tesseract_->matcher_good_threshold;
2778 
2779  if (blob->outlines)
2780  tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
2782  delete blob;
2783 }
int UNICHAR_ID
Definition: unichar.h:35
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:192
bool classify_bln_numeric_mode
Definition: classify.h:541
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
UNICHARSET unicharset
Definition: ccutil.h:68
double matcher_good_threshold
Definition: classify.h:461
Pix * pix_binary() const
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:514
Definition: blobs.h:268
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:857
TESSLINE * outlines
Definition: blobs.h:384

◆ GetPageRes()

TESS_LOCAL const PAGE_RES* tesseract::TessBaseAPI::GetPageRes ( ) const
inlineprotected

Definition at line 865 of file baseapi.h.

865 { return page_res_; }
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877

◆ RecognitionPass1()

PAGE_RES * tesseract::TessBaseAPI::RecognitionPass1 ( BLOCK_LIST *  block_list)
protected

Recognize text doing one pass only, using settings for a given pass.

Definition at line 2786 of file baseapi.cpp.

2786  {
2787  PAGE_RES *page_res = new PAGE_RES(false, block_list,
2789  tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
2790  return page_res;
2791 }
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:308

◆ RecognitionPass2()

PAGE_RES * tesseract::TessBaseAPI::RecognitionPass2 ( BLOCK_LIST *  block_list,
PAGE_RES pass1_result 
)
protected

Definition at line 2793 of file baseapi.cpp.

2794  {
2795  if (!pass1_result)
2796  pass1_result = new PAGE_RES(false, block_list,
2798  tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
2799  return pass1_result;
2800 }
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:308

◆ TesseractExtractResult()

int tesseract::TessBaseAPI::TesseractExtractResult ( char **  text,
int **  lengths,
float **  costs,
int **  x0,
int **  y0,
int **  x1,
int **  y1,
PAGE_RES page_res 
)
staticprotected

Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.

Definition at line 2877 of file baseapi.cpp.

2884  {
2885  TESS_CHAR_LIST tess_chars;
2886  TESS_CHAR_IT tess_chars_it(&tess_chars);
2887  extract_result(&tess_chars_it, page_res);
2888  tess_chars_it.move_to_first();
2889  int n = tess_chars.length();
2890  int text_len = 0;
2891  *lengths = new int[n];
2892  *costs = new float[n];
2893  *x0 = new int[n];
2894  *y0 = new int[n];
2895  *x1 = new int[n];
2896  *y1 = new int[n];
2897  int i = 0;
2898  for (tess_chars_it.mark_cycle_pt();
2899  !tess_chars_it.cycled_list();
2900  tess_chars_it.forward(), i++) {
2901  TESS_CHAR *tc = tess_chars_it.data();
2902  text_len += (*lengths)[i] = tc->length;
2903  (*costs)[i] = tc->cost;
2904  (*x0)[i] = tc->box.left();
2905  (*y0)[i] = tc->box.bottom();
2906  (*x1)[i] = tc->box.right();
2907  (*y1)[i] = tc->box.top();
2908  }
2909  char *p = *text = new char[text_len];
2910 
2911  tess_chars_it.move_to_first();
2912  for (tess_chars_it.mark_cycle_pt();
2913  !tess_chars_it.cycled_list();
2914  tess_chars_it.forward()) {
2915  TESS_CHAR *tc = tess_chars_it.data();
2916  strncpy(p, tc->unicode_repr, tc->length);
2917  p += tc->length;
2918  }
2919  return n;
2920 }