| 
    tesseract
    5.0.0-alpha-619-ge9db
    
   | 
 
#include <unicharset.h>
Public Types | |
| enum | Direction {  U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3, U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7, U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11, U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15, U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_FIRST_STRONG_ISOLATE = 19, U_LEFT_TO_RIGHT_ISOLATE = 20, U_RIGHT_TO_LEFT_ISOLATE = 21, U_POP_DIRECTIONAL_ISOLATE = 22, U_CHAR_DIRECTION_COUNT }  | 
Public Member Functions | |
| UNICHARSET () | |
| ~UNICHARSET () | |
| UNICHAR_ID | unichar_to_id (const char *const unichar_repr) const | 
| UNICHAR_ID | unichar_to_id (const char *const unichar_repr, int length) const | 
| int | step (const char *str) const | 
| bool | encodable_string (const char *str, int *first_bad_position) const | 
| bool | encode_string (const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const | 
| const char * | id_to_unichar (UNICHAR_ID id) const | 
| const char * | id_to_unichar_ext (UNICHAR_ID id) const | 
| STRING | debug_str (UNICHAR_ID id) const | 
| STRING | debug_str (const char *unichar_repr) const | 
| void | unichar_insert (const char *const unichar_repr, OldUncleanUnichars old_style) | 
| void | unichar_insert (const char *const unichar_repr) | 
| void | unichar_insert_backwards_compatible (const char *const unichar_repr) | 
| bool | contains_unichar_id (UNICHAR_ID unichar_id) const | 
| bool | contains_unichar (const char *const unichar_repr) const | 
| bool | contains_unichar (const char *const unichar_repr, int length) const | 
| bool | eq (UNICHAR_ID unichar_id, const char *const unichar_repr) const | 
| void | delete_pointers_in_unichars () | 
| void | clear () | 
| int | size () const | 
| void | reserve (int unichars_number) | 
| bool | save_to_file (const char *const filename) const | 
| bool | save_to_file (FILE *file) const | 
| bool | save_to_file (tesseract::TFile *file) const | 
| bool | save_to_string (STRING *str) const | 
| bool | load_from_file (const char *const filename, bool skip_fragments) | 
| bool | load_from_file (const char *const filename) | 
| bool | load_from_file (FILE *file, bool skip_fragments) | 
| bool | load_from_file (FILE *file) | 
| bool | load_from_file (tesseract::TFile *file, bool skip_fragments) | 
| void | post_load_setup () | 
| bool | major_right_to_left () const | 
| void | set_black_and_whitelist (const char *blacklist, const char *whitelist, const char *unblacklist) | 
| void | set_isalpha (UNICHAR_ID unichar_id, bool value) | 
| void | set_islower (UNICHAR_ID unichar_id, bool value) | 
| void | set_isupper (UNICHAR_ID unichar_id, bool value) | 
| void | set_isdigit (UNICHAR_ID unichar_id, bool value) | 
| void | set_ispunctuation (UNICHAR_ID unichar_id, bool value) | 
| void | set_isngram (UNICHAR_ID unichar_id, bool value) | 
| void | set_script (UNICHAR_ID unichar_id, const char *value) | 
| void | set_other_case (UNICHAR_ID unichar_id, UNICHAR_ID other_case) | 
| void | set_direction (UNICHAR_ID unichar_id, UNICHARSET::Direction value) | 
| void | set_mirror (UNICHAR_ID unichar_id, UNICHAR_ID mirror) | 
| void | set_normed (UNICHAR_ID unichar_id, const char *normed) | 
| void | set_normed_ids (UNICHAR_ID unichar_id) | 
| bool | get_isalpha (UNICHAR_ID unichar_id) const | 
| bool | get_islower (UNICHAR_ID unichar_id) const | 
| bool | get_isupper (UNICHAR_ID unichar_id) const | 
| bool | get_isdigit (UNICHAR_ID unichar_id) const | 
| bool | get_ispunctuation (UNICHAR_ID unichar_id) const | 
| bool | get_isngram (UNICHAR_ID unichar_id) const | 
| bool | get_isprivate (UNICHAR_ID unichar_id) const | 
| bool | top_bottom_useful () const | 
| void | set_ranges_empty () | 
| void | SetPropertiesFromOther (const UNICHARSET &src) | 
| void | PartialSetPropertiesFromOther (int start_index, const UNICHARSET &src) | 
| void | ExpandRangesFromOther (const UNICHARSET &src) | 
| void | CopyFrom (const UNICHARSET &src) | 
| void | AppendOtherUnicharset (const UNICHARSET &src) | 
| bool | SizesDistinct (UNICHAR_ID id1, UNICHAR_ID id2) const | 
| void | get_top_bottom (UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const | 
| void | set_top_bottom (UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top) | 
| void | get_width_stats (UNICHAR_ID unichar_id, float *width, float *width_sd) const | 
| void | set_width_stats (UNICHAR_ID unichar_id, float width, float width_sd) | 
| void | get_bearing_stats (UNICHAR_ID unichar_id, float *bearing, float *bearing_sd) const | 
| void | set_bearing_stats (UNICHAR_ID unichar_id, float bearing, float bearing_sd) | 
| void | get_advance_stats (UNICHAR_ID unichar_id, float *advance, float *advance_sd) const | 
| void | set_advance_stats (UNICHAR_ID unichar_id, float advance, float advance_sd) | 
| bool | PropertiesIncomplete (UNICHAR_ID unichar_id) const | 
| bool | IsSpaceDelimited (UNICHAR_ID unichar_id) const | 
| int | get_script (UNICHAR_ID unichar_id) const | 
| unsigned int | get_properties (UNICHAR_ID unichar_id) const | 
| char | get_chartype (UNICHAR_ID unichar_id) const | 
| UNICHAR_ID | get_other_case (UNICHAR_ID unichar_id) const | 
| Direction | get_direction (UNICHAR_ID unichar_id) const | 
| UNICHAR_ID | get_mirror (UNICHAR_ID unichar_id) const | 
| UNICHAR_ID | to_lower (UNICHAR_ID unichar_id) const | 
| UNICHAR_ID | to_upper (UNICHAR_ID unichar_id) const | 
| bool | has_special_codes () const | 
| bool | AnyRepeatedUnicodes () const | 
| const CHAR_FRAGMENT * | get_fragment (UNICHAR_ID unichar_id) const | 
| bool | get_isalpha (const char *const unichar_repr) const | 
| bool | get_islower (const char *const unichar_repr) const | 
| bool | get_isupper (const char *const unichar_repr) const | 
| bool | get_isdigit (const char *const unichar_repr) const | 
| bool | get_ispunctuation (const char *const unichar_repr) const | 
| unsigned int | get_properties (const char *const unichar_repr) const | 
| char | get_chartype (const char *const unichar_repr) const | 
| int | get_script (const char *const unichar_repr) const | 
| const CHAR_FRAGMENT * | get_fragment (const char *const unichar_repr) const | 
| bool | get_isalpha (const char *const unichar_repr, int length) const | 
| bool | get_islower (const char *const unichar_repr, int length) const | 
| bool | get_isupper (const char *const unichar_repr, int length) const | 
| bool | get_isdigit (const char *const unichar_repr, int length) const | 
| bool | get_ispunctuation (const char *const unichar_repr, int length) const | 
| const char * | get_normed_unichar (UNICHAR_ID unichar_id) const | 
| const GenericVector< UNICHAR_ID > & | normed_ids (UNICHAR_ID unichar_id) const | 
| int | get_script (const char *const unichar_repr, int length) const | 
| int | get_script_table_size () const | 
| const char * | get_script_from_script_id (int id) const | 
| int | get_script_id_from_name (const char *script_name) const | 
| bool | is_null_script (const char *script) const | 
| int | add_script (const char *script) | 
| bool | get_enabled (UNICHAR_ID unichar_id) const | 
| int | null_sid () const | 
| int | common_sid () const | 
| int | latin_sid () const | 
| int | cyrillic_sid () const | 
| int | greek_sid () const | 
| int | han_sid () const | 
| int | hiragana_sid () const | 
| int | katakana_sid () const | 
| int | thai_sid () const | 
| int | hangul_sid () const | 
| int | default_sid () const | 
| bool | script_has_upper_lower () const | 
| bool | script_has_xheight () const | 
Static Public Member Functions | |
| static STRING | debug_utf8_str (const char *str) | 
| static std::string | CleanupString (const char *utf8_str) | 
| static std::string | CleanupString (const char *utf8_str, size_t length) | 
Static Public Attributes | |
| static const TESS_API char * | kCustomLigatures [][2] | 
| static const TESS_API char * | kSpecialUnicharCodes [SPECIAL_UNICHAR_CODES_COUNT] | 
Definition at line 145 of file unicharset.h.
Definition at line 156 of file unicharset.h.
| UNICHARSET::UNICHARSET | ( | ) | 
Definition at line 175 of file unicharset.cpp.
| UNICHARSET::~UNICHARSET | ( | ) | 
Definition at line 190 of file unicharset.cpp.
| int UNICHARSET::add_script | ( | const char * | script | ) | 
Definition at line 1020 of file unicharset.cpp.
| bool UNICHARSET::AnyRepeatedUnicodes | ( | ) | const | 
Definition at line 1007 of file unicharset.cpp.
| void UNICHARSET::AppendOtherUnicharset | ( | const UNICHARSET & | src | ) | 
Definition at line 463 of file unicharset.cpp.
      
  | 
  inlinestatic | 
Definition at line 246 of file unicharset.h.
      
  | 
  static | 
Definition at line 1110 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 306 of file unicharset.h.
      
  | 
  inline | 
Definition at line 875 of file unicharset.h.
| bool UNICHARSET::contains_unichar | ( | const char *const | unichar_repr | ) | const | 
Definition at line 670 of file unicharset.cpp.
| bool UNICHARSET::contains_unichar | ( | const char *const | unichar_repr, | 
| int | length | ||
| ) | const | 
Definition at line 676 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 284 of file unicharset.h.
| void UNICHARSET::CopyFrom | ( | const UNICHARSET & | src | ) | 
Definition at line 447 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 877 of file unicharset.h.
      
  | 
  inline | 
Definition at line 254 of file unicharset.h.
| STRING UNICHARSET::debug_str | ( | UNICHAR_ID | id | ) | const | 
Definition at line 342 of file unicharset.cpp.
      
  | 
  static | 
Definition at line 318 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 884 of file unicharset.h.
      
  | 
  inline | 
Definition at line 298 of file unicharset.h.
| bool UNICHARSET::encodable_string | ( | const char * | str, | 
| int * | first_bad_position | ||
| ) | const | 
Definition at line 243 of file unicharset.cpp.
| bool UNICHARSET::encode_string | ( | const char * | str, | 
| bool | give_up_on_failure, | ||
| GenericVector< UNICHAR_ID > * | encoding, | ||
| GenericVector< char > * | lengths, | ||
| int * | encoded_length | ||
| ) | const | 
Definition at line 258 of file unicharset.cpp.
| bool UNICHARSET::eq | ( | UNICHAR_ID | unichar_id, | 
| const char *const | unichar_repr | ||
| ) | const | 
Definition at line 686 of file unicharset.cpp.
| void UNICHARSET::ExpandRangesFromOther | ( | const UNICHARSET & | src | ) | 
Definition at line 434 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 620 of file unicharset.h.
      
  | 
  inline | 
Definition at line 603 of file unicharset.h.
      
  | 
  inline | 
Definition at line 761 of file unicharset.h.
| char UNICHARSET::get_chartype | ( | UNICHAR_ID | unichar_id | ) | const | 
Definition at line 616 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 680 of file unicharset.h.
      
  | 
  inline | 
Definition at line 868 of file unicharset.h.
      
  | 
  inline | 
Definition at line 774 of file unicharset.h.
      
  | 
  inline | 
Definition at line 724 of file unicharset.h.
      
  | 
  inline | 
Definition at line 731 of file unicharset.h.
      
  | 
  inline | 
Definition at line 784 of file unicharset.h.
      
  | 
  inline | 
Definition at line 481 of file unicharset.h.
      
  | 
  inline | 
Definition at line 746 of file unicharset.h.
      
  | 
  inline | 
Definition at line 805 of file unicharset.h.
      
  | 
  inline | 
Definition at line 502 of file unicharset.h.
      
  | 
  inline | 
Definition at line 736 of file unicharset.h.
      
  | 
  inline | 
Definition at line 791 of file unicharset.h.
      
  | 
  inline | 
Definition at line 488 of file unicharset.h.
      
  | 
  inline | 
Definition at line 516 of file unicharset.h.
| bool UNICHARSET::get_isprivate | ( | UNICHAR_ID | unichar_id | ) | const | 
Definition at line 387 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 751 of file unicharset.h.
      
  | 
  inline | 
Definition at line 812 of file unicharset.h.
      
  | 
  inline | 
Definition at line 509 of file unicharset.h.
      
  | 
  inline | 
Definition at line 741 of file unicharset.h.
      
  | 
  inline | 
Definition at line 798 of file unicharset.h.
      
  | 
  inline | 
Definition at line 495 of file unicharset.h.
      
  | 
  inline | 
Definition at line 687 of file unicharset.h.
      
  | 
  inline | 
Definition at line 818 of file unicharset.h.
      
  | 
  inline | 
Definition at line 673 of file unicharset.h.
      
  | 
  inline | 
Definition at line 757 of file unicharset.h.
| unsigned int UNICHARSET::get_properties | ( | UNICHAR_ID | unichar_id | ) | const | 
Definition at line 601 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 768 of file unicharset.h.
      
  | 
  inline | 
Definition at line 833 of file unicharset.h.
      
  | 
  inline | 
Definition at line 653 of file unicharset.h.
      
  | 
  inline | 
Definition at line 844 of file unicharset.h.
| int UNICHARSET::get_script_id_from_name | ( | const char * | script_name | ) | const | 
Definition at line 1099 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 839 of file unicharset.h.
      
  | 
  inline | 
Definition at line 558 of file unicharset.h.
      
  | 
  inline | 
Definition at line 586 of file unicharset.h.
      
  | 
  inline | 
Definition at line 878 of file unicharset.h.
      
  | 
  inline | 
Definition at line 879 of file unicharset.h.
      
  | 
  inline | 
Definition at line 883 of file unicharset.h.
      
  | 
  inline | 
Definition at line 712 of file unicharset.h.
      
  | 
  inline | 
Definition at line 880 of file unicharset.h.
| const char * UNICHARSET::id_to_unichar | ( | UNICHAR_ID | id | ) | const | 
Definition at line 290 of file unicharset.cpp.
| const char * UNICHARSET::id_to_unichar_ext | ( | UNICHAR_ID | id | ) | const | 
Definition at line 298 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 858 of file unicharset.h.
      
  | 
  inline | 
Definition at line 642 of file unicharset.h.
      
  | 
  inline | 
Definition at line 881 of file unicharset.h.
      
  | 
  inline | 
Definition at line 876 of file unicharset.h.
      
  | 
  inline | 
Definition at line 386 of file unicharset.h.
      
  | 
  inline | 
Definition at line 378 of file unicharset.h.
      
  | 
  inline | 
Definition at line 393 of file unicharset.h.
| bool UNICHARSET::load_from_file | ( | FILE * | file, | 
| bool | skip_fragments | ||
| ) | 
Definition at line 742 of file unicharset.cpp.
| bool UNICHARSET::load_from_file | ( | tesseract::TFile * | file, | 
| bool | skip_fragments | ||
| ) | 
Definition at line 751 of file unicharset.cpp.
| bool UNICHARSET::major_right_to_left | ( | ) | const | 
Definition at line 952 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 825 of file unicharset.h.
      
  | 
  inline | 
Definition at line 874 of file unicharset.h.
| void UNICHARSET::PartialSetPropertiesFromOther | ( | int | start_index, | 
| const UNICHARSET & | src | ||
| ) | 
Definition at line 404 of file unicharset.cpp.
| void UNICHARSET::post_load_setup | ( | ) | 
Definition at line 886 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 636 of file unicharset.h.
| void UNICHARSET::reserve | ( | int | unichars_number | ) | 
Definition at line 194 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 350 of file unicharset.h.
      
  | 
  inline | 
Definition at line 360 of file unicharset.h.
      
  | 
  inline | 
Definition at line 366 of file unicharset.h.
| bool UNICHARSET::save_to_string | ( | STRING * | str | ) | const | 
Definition at line 691 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 887 of file unicharset.h.
      
  | 
  inline | 
Definition at line 894 of file unicharset.h.
      
  | 
  inline | 
Definition at line 630 of file unicharset.h.
      
  | 
  inline | 
Definition at line 613 of file unicharset.h.
| void UNICHARSET::set_black_and_whitelist | ( | const char * | blacklist, | 
| const char * | whitelist, | ||
| const char * | unblacklist | ||
| ) | 
Definition at line 969 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 462 of file unicharset.h.
      
  | 
  inline | 
Definition at line 421 of file unicharset.h.
      
  | 
  inline | 
Definition at line 436 of file unicharset.h.
      
  | 
  inline | 
Definition at line 426 of file unicharset.h.
      
  | 
  inline | 
Definition at line 446 of file unicharset.h.
      
  | 
  inline | 
Definition at line 441 of file unicharset.h.
      
  | 
  inline | 
Definition at line 431 of file unicharset.h.
      
  | 
  inline | 
Definition at line 467 of file unicharset.h.
      
  | 
  inline | 
Definition at line 472 of file unicharset.h.
| void UNICHARSET::set_normed_ids | ( | UNICHAR_ID | unichar_id | ) | 
Definition at line 372 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 457 of file unicharset.h.
| void UNICHARSET::set_ranges_empty | ( | ) | 
Definition at line 395 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 452 of file unicharset.h.
      
  | 
  inline | 
Definition at line 572 of file unicharset.h.
      
  | 
  inline | 
Definition at line 597 of file unicharset.h.
      
  | 
  inline | 
Definition at line 535 of file unicharset.h.
      
  | 
  inline | 
Definition at line 341 of file unicharset.h.
| bool UNICHARSET::SizesDistinct | ( | UNICHAR_ID | id1, | 
| UNICHAR_ID | id2 | ||
| ) | const | 
Definition at line 485 of file unicharset.cpp.
| int UNICHARSET::step | ( | const char * | str | ) | const | 
Definition at line 232 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 882 of file unicharset.h.
      
  | 
  inline | 
Definition at line 694 of file unicharset.h.
      
  | 
  inline | 
Definition at line 702 of file unicharset.h.
      
  | 
  inline | 
Definition at line 527 of file unicharset.h.
      
  | 
  inline | 
Definition at line 264 of file unicharset.h.
| void UNICHARSET::unichar_insert | ( | const char *const | unichar_repr, | 
| OldUncleanUnichars | old_style | ||
| ) | 
Definition at line 625 of file unicharset.cpp.
      
  | 
  inline | 
Definition at line 269 of file unicharset.h.
| UNICHAR_ID UNICHARSET::unichar_to_id | ( | const char *const | unichar_repr | ) | const | 
Definition at line 209 of file unicharset.cpp.
| UNICHAR_ID UNICHARSET::unichar_to_id | ( | const char *const | unichar_repr, | 
| int | length | ||
| ) | const | 
Definition at line 217 of file unicharset.cpp.
      
  | 
  static | 
Definition at line 150 of file unicharset.h.
      
  | 
  static | 
Definition at line 153 of file unicharset.h.