tesseract
5.0.0-alpha-619-ge9db
|
Go to the documentation of this file.
27 #ifndef DISABLED_LEGACY_ENGINE
32 #define CTRL_NEWLINE '\012' //newline
33 #define CTRL_HARDLINE '\015' //cr
38 const TBOX *target_word_box) {
45 block_of_last_word =
nullptr;
46 while (page_res_it.
word () !=
nullptr) {
49 if (target_word_box) {
52 (current_word_box.
right() + current_word_box.
left()) / 2,
53 (current_word_box.
bottom() + current_word_box.
top()) / 2);
54 if (!target_word_box->
contains(center_pt)) {
60 block_of_last_word != page_res_it.
block ()) {
61 block_of_last_word = page_res_it.
block ();
80 nextword, nextblock), force_eol);
104 bool need_reject =
false;
182 tprintf (
"Dict word: \"%s\": %d\n",
191 word->
reject_map[i].setrej_minimal_rej_accept();
199 word->
reject_map[i].setrej_minimal_rej_accept();
227 if (next_word ==
nullptr || next_block ==
nullptr || block != next_block)
229 if (next_word->
space () > 0)
235 end_gap = block_box.
right () - word_box.
right ();
236 end_gap -= static_cast<int32_t>(block->
space ());
237 width = next_box.
right () - next_box.
left ();
256 if (i < word->reject_map.length()) {
281 for (i = 0; i < len; i++) {
283 word_res->
reject_map[i].setrej_minimal_rej_accept();
296 for (i = 0; i < len; ++i) {
299 word_res->
reject_map[i].setrej_minimal_rej_accept();
310 for (i = 0; i < len; ++i) {
313 word_res->
reject_map[i].setrej_minimal_rej_accept();
317 for (i = 0; i < len; i++) {
320 word_res->
reject_map[i].setrej_minimal_rej_accept();
322 word_res->
reject_map[i].setrej_minimal_rej_accept();
324 word_res->
reject_map[i].setrej_minimal_rej_accept();
333 for (i = 0; i < len; i++) {
337 word_res->
reject_map[i].setrej_minimal_rej_accept();
341 word_res->
reject_map[i].setrej_minimal_rej_accept();
353 for (i = 0; i < len; i++) {
359 word_res->
reject_map[i].setrej_minimal_rej_accept();
368 for (
int i = 0; i < word.
length(); ++i) {
378 for (
int i = 0; i < word.
length(); ++i) {
388 const char* lengths) {
389 bool prev_digit =
false;
391 if (*lengths == 1 && *s ==
'(')
395 ((*s ==
'$') || (*s ==
'.') || (*s ==
'+') || (*s ==
'-')))
398 for (; *s !=
'\0'; s += *(lengths++)) {
401 else if (prev_digit &&
402 (*lengths == 1 && ((*s ==
'.') || (*s ==
',') || (*s ==
'-'))))
404 else if (prev_digit && *lengths == 1 &&
405 (*(s + *lengths) ==
'\0') && ((*s ==
'%') || (*s ==
')')))
407 else if (prev_digit &&
408 *lengths == 1 && (*s ==
'%') &&
409 (*(lengths + 1) == 1 && *(s + *lengths) ==
')') &&
410 (*(s + *lengths + *(lengths + 1)) ==
'\0'))
const STRING & unichar_string() const
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box)
BLOCK_RES * next_block() const
bool flag(WERD_FLAGS mask) const
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
UNICHAR_ID unichar_id(int index) const
bool get_isdigit(UNICHAR_ID unichar_id) const
bool get_isalpha(UNICHAR_ID unichar_id) const
TBOX bounding_box() const
BLOCK_RES * block() const
bool tessedit_write_rep_codes
CRUNCH_MODE unlv_crunch_mode
int dict_word(const WERD_CHOICE &word)
WERD_RES * restart_page()
bool contains(const FCOORD pt) const
void set_unlv_suspects(WERD_RES *word)
bool tessedit_zero_rejection
double suspect_rating_per_ch
bool write_results_empty_block
const UNICHARSET * unicharset() const
bool tilde_crunch_written
int16_t count_alphas(const WERD_CHOICE &word)
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
int16_t space() const
return spacing
const UNICHARSET * uch_set
int16_t count_alphanums(const WERD_CHOICE &word)
PDBLK pdblk
Page Description Block.
bool suspect_constrain_1Il
bool last_char_was_newline
WERD_CHOICE * best_choice
int16_t safe_dict_word(const WERD_RES *werd_res)
bool acceptable_number_string(const char *s, const char *lengths)
bool tessedit_rejection_debug
const char * c_str() const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool tessedit_zero_kelvin_rejection
const STRING debug_string() const
double suspect_accept_rating
WERD_RES * next_word() const
DLLSYM void tprintf(const char *format,...)
bool tessedit_word_for_word
UNICHAR_ID get_rep_char(WERD_RES *word)
void MergeAdjacentBlobs(int index)
bool unlv_tilde_crunching
bool tessedit_minimal_rejection
bool tessedit_write_block_separators
const STRING & unichar_lengths() const
void write_results(PAGE_RES_IT &page_res_it, char newline_type, bool force_eol)
char determine_newline_type(WERD *word, BLOCK *block, WERD *next_word, BLOCK *next_block)
bool check_debug_pt(WERD_RES *word, int location)