tesseract
5.0.0-alpha-619-ge9db
|
Go to the documentation of this file.
27 #ifndef DISABLED_LEGACY_ENGINE
29 #endif // ndef DISABLED_LEGACY_ENGINE
91 #ifndef DISABLED_LEGACY_ENGINE
97 int score1 = 0, score2 = 0;
100 for (
int f = 0; f < fonts_.
size(); ++f) {
101 if (fonts_[f].score > score1) {
103 fontinfo_id2_ = fontinfo_id_;
104 score1 = fonts_[f].score;
105 fontinfo_id_ = fonts_[f].fontinfo_id;
106 }
else if (fonts_[f].score > score2) {
107 score2 = fonts_[f].score;
108 fontinfo_id2_ = fonts_[f].fontinfo_id;
112 #endif // ndef DISABLED_LEGACY_ENGINE
141 unichar_id_ = newunichar_id;
150 script_id_ = newscript_id;
153 matrix_cell_.
col = col;
154 matrix_cell_.
row = row;
170 tprintf(
"r%.2f c%.2f x[%g,%g]: %d %s",
172 min_xheight_, max_xheight_, unichar_id_,
173 (unicharset ==
nullptr) ?
"" :
178 tprintf(
" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n",
179 script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_);
182 static int SortByRating(
const void *p1,
const void *p2) {
183 const BLOB_CHOICE *bc1 = *static_cast<const BLOB_CHOICE *const *>(p1);
184 const BLOB_CHOICE *bc2 = *static_cast<const BLOB_CHOICE *const *>(p2);
185 return (bc1->rating_ < bc2->rating_) ? -1 : 1;
193 #ifndef DISABLED_LEGACY_ENGINE
196 #endif // ndef DISABLED_LEGACY_ENGINE
197 int16_t fontinfo_id_;
198 int16_t fontinfo_id2_;
272 const char *src_lengths,
275 uint8_t src_permuter,
278 this->
init(src_string, src_lengths, src_rating,
279 src_certainty, src_permuter);
283 :
ELIST_LINK(word), unicharset_(word.unicharset_) {
292 inline int length()
const {
296 return adjust_factor_;
299 adjust_factor_ = factor;
305 assert(index < length_);
306 return unichar_ids_[index];
308 inline int state(
int index)
const {
309 return state_[index];
312 if (index < 0 || index >= length_)
314 return script_pos_[index];
316 inline float rating()
const {
322 inline float certainty(
int index)
const {
323 return certainties_[index];
326 return min_x_height_;
329 return max_x_height_;
331 inline void set_x_heights(
float min_height,
float max_height) {
332 min_x_height_ = min_height;
333 max_x_height_ = max_height;
349 assert(index < length_);
353 return dangerous_ambig_found_;
356 dangerous_ambig_found_ = value;
362 certainty_ = new_val;
379 reserved_, unichar_ids_);
381 reserved_, script_pos_);
385 reserved_, certainties_);
391 certainties_ =
new float[1];
398 inline void init(
int reserved) {
399 reserved_ = reserved;
403 state_ =
new int[reserved];
404 certainties_ =
new float[reserved];
406 unichar_ids_ =
nullptr;
407 script_pos_ =
nullptr;
409 certainties_ =
nullptr;
412 adjust_factor_ = 1.0f;
414 certainty_ = FLT_MAX;
415 min_x_height_ = 0.0f;
416 max_x_height_ = FLT_MAX;
418 unichars_in_script_order_ =
false;
419 dangerous_ambig_found_ =
false;
427 void init(
const char *src_string,
const char *src_lengths,
428 float src_rating,
float src_certainty,
429 uint8_t src_permuter);
435 certainty_ = -FLT_MAX;
444 assert(reserved_ > length_);
455 assert(index < length_);
457 state_[index] = blob_count;
496 for (
int i = 0; i < length_; ++i) {
497 word_str += unicharset_->
debug_str(unichar_ids_[i]);
504 for (
int i = 0; i < length_; ++i) {
511 for (
int i = 0; i < length_; ++i) {
521 return unichars_in_script_order_ = in_script_order;
525 return unichars_in_script_order_;
532 return unichar_string_;
539 return unichar_lengths_;
555 const TBOX& blob_box,
570 void print(
const char *msg)
const;
607 float adjust_factor_;
621 bool unichars_in_script_order_;
623 bool dangerous_ambig_found_;
627 mutable STRING unichar_string_;
628 mutable STRING unichar_lengths_;
643 BLOB_CHOICE_LIST *ratings,
const STRING & unichar_string() const
bool set_unichars_in_script_order(bool in_script_order)
WERD_CHOICE shallow_copy(int start, int end) const
void set_adjust_factor(float factor)
UNICHAR_ID unichar_id(int index) const
bool dangerous_ambig_found() const
float min_xheight() const
int GetTopScriptID() const
void set_certainty(float new_val)
int TotalOfStates() const
bool contains_unichar_id(UNICHAR_ID unichar_id) const
void make_bad()
Set the fields in this choice to be default (bad) values.
static T * double_the_size_memcpy(int current_size, T *data)
void print(const UNICHARSET *unicharset) const
WERD_CHOICE & operator+=(const WERD_CHOICE &second)
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
WERD_CHOICE(const WERD_CHOICE &word)
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
const UNICHARSET * unicharset() const
static const float kBadRating
int state(int index) const
tesseract::ScriptPos BlobPosition(int index) const
UNICHAR_ID unichar_id() const
#define ELISTIZEH(CLASSNAME)
void set_script(int newscript_id)
void set_classifier(BlobChoiceClassifier classifier)
const MATRIX_COORD & matrix_cell()
void reverse_and_mirror_unichar_ids()
void set_rating(float newrat)
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
void SetScriptPositions(bool small_caps, TWERD *word, int debug=0)
void punct_stripped(int *start_core, int *end_core) const
void set_matrix_cell(int col, int row)
static int SortByRating(const void *p1, const void *p2)
void set_unichar_id(UNICHAR_ID unichar_id, int index)
float min_x_height() const
STRING debug_str(UNICHAR_ID id) const
const char * c_str() const
void set_certainty(float newrat)
MATRIX_COORD MatrixCoord(int index) const
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
BlobChoiceClassifier classifier() const
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2)
float max_xheight() const
float adjust_factor() const
void UpdateStateForSplit(int blob_position)
bool unichars_in_script_order() const
void GetNonSuperscriptSpan(int *start, int *end) const
const GenericVector< tesseract::ScoredFont > & fonts() const
void set_rating(float new_val)
const STRING debug_string() const
bool ContainsAnyNonSpaceDelimited() const
void DisplaySegmentation(TWERD *word)
bool has_rtl_unichar_id() const
int16_t fontinfo_id() const
void remove_last_unichar_id()
void print_state(const char *msg) const
void set_x_heights(float min_height, float max_height)
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
WERD_CHOICE & operator=(const WERD_CHOICE &source)
int16_t fontinfo_id2() const
void set_dangerous_ambig_found_(bool value)
void SetAllScriptPositions(tesseract::ScriptPos position)
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
const char * permuter_name() const
DLLSYM void tprintf(const char *format,...)
WERD_CHOICE(const UNICHARSET *unicharset)
static tesseract::ScriptPos ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset, const TBOX &blob_box, UNICHAR_ID unichar_id)
void remove_unichar_ids(int index, int num)
const UNICHAR_ID * unichar_ids() const
void remove_unichar_id(int index)
void set_permuter(uint8_t perm)
void set_unichar_id(UNICHAR_ID newunichar_id)
const STRING & unichar_lengths() const
void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice)
void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
bool IsClassified() const
const char * ScriptPosToString(enum ScriptPos script_pos)
float max_x_height() const