24 #include "allheaders.h" 37 in_minor_direction_ =
false;
38 at_beginning_of_minor_run_ =
false;
39 preserve_interword_spaces_ =
false;
41 BoolParam *p = ParamUtils::FindParam<BoolParam>(
44 if (p !=
nullptr) preserve_interword_spaces_ = (bool)(*p);
46 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
47 MoveToLogicalStartOfTextline();
56 return current_paragraph_is_ltr_;
59 bool ResultIterator::CurrentParagraphIsLtr()
const {
63 it.RestartParagraph();
89 num_rtl = leftmost_rtl ? 1 : 0;
97 num_ltr += rightmost_ltr ? 1 : 0;
109 return num_ltr >= num_rtl;
116 void ResultIterator::CalculateBlobOrder(
118 bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
119 blob_indices->
clear();
146 if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM &&
147 (letter_types[i + 1] == U_EURO_NUM_SEP ||
148 letter_types[i + 1] == U_COMMON_NUM_SEP)) {
149 letter_types[i + 1] = U_EURO_NUM;
155 if (letter_types[i] == U_EURO_NUM_TERM) {
157 while (j <
word_length_ && letter_types[j] == U_EURO_NUM_TERM) { j++; }
158 if (j <
word_length_ && letter_types[j] == U_EURO_NUM) {
160 for (
int k = i; k < j; k++) letter_types[k] = U_EURO_NUM;
163 while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) { j--; }
164 if (j > -1 && letter_types[j] == U_EURO_NUM) {
166 for (
int k = j; k <= i; k++) letter_types[k] = U_EURO_NUM;
174 int ti = letter_types[i];
175 if (ti == U_LTR || ti == U_EURO_NUM) {
179 int tj = letter_types[j];
180 if (tj == U_LTR || tj == U_EURO_NUM) {
182 }
else if (tj == U_COMMON_NUM_SEP || tj == U_OTHER_NEUTRAL) {
189 for (
int k = i; k <= last_good; k++) letter_types[k] = U_LTR;
192 letter_types[i] = U_RTL;
199 if (letter_types[i] == U_RTL) {
205 for (; j >= 0 && letter_types[j] != U_RTL; j--) { }
207 for (
int k = j + 1; k <= i; k++) blob_indices->
push_back(k);
215 for (
int i = 0; i < dirs.
size(); i++) {
228 bool paragraph_is_ltr,
229 const LTRResultIterator &resit,
236 bool paragraph_is_ltr,
237 const LTRResultIterator &resit,
242 directions = (dirs_arg !=
nullptr) ? dirs_arg : &dirs;
250 directions->
push_back(ltr_it.WordDirection());
258 bool paragraph_is_ltr,
262 if (word_dirs.
size() == 0)
return;
266 int minor_direction, major_direction, major_step, start, end;
267 if (paragraph_is_ltr) {
269 end = word_dirs.
size();
274 start = word_dirs.
size() - 1;
283 int neutral_end = start;
284 while (neutral_end > 0 && word_dirs[neutral_end] ==
DIR_NEUTRAL) {
290 int left = neutral_end;
295 for (
int i = left; i < word_dirs.
size(); i++) {
304 for (
int i = start; i != end;) {
305 if (word_dirs[i] == minor_direction) {
307 while (j != end && word_dirs[j] != major_direction)
309 if (j == end) j -= major_step;
310 while (j != i && word_dirs[j] != minor_direction)
314 for (
int k = j; k != i; k -= major_step) {
328 int ResultIterator::LTRWordIndex()
const {
329 int this_word_index = 0;
331 textline.RestartRow();
332 while (!textline.PositionedAtSameWord(
it_)) {
336 return this_word_index;
339 void ResultIterator::MoveToLogicalStartOfWord() {
345 CalculateBlobOrder(&blob_order);
346 if (blob_order.
size() == 0 || blob_order[0] == 0)
return;
350 bool ResultIterator::IsAtFinalSymbolOfWord()
const {
353 CalculateBlobOrder(&blob_order);
357 bool ResultIterator::IsAtFirstSymbolOfWord()
const {
360 CalculateBlobOrder(&blob_order);
364 void ResultIterator::AppendSuffixMarks(
STRING *text)
const {
366 bool reading_direction_is_ltr =
367 current_paragraph_is_ltr_ ^ in_minor_direction_;
375 *
this, &textline_order);
376 int this_word_index = LTRWordIndex();
377 int i = textline_order.
get_index(this_word_index);
380 int last_non_word_mark = 0;
381 for (i++; i < textline_order.
size() && textline_order[i] < 0; i++) {
382 last_non_word_mark = textline_order[i];
385 *text += reading_direction_is_ltr ?
kLRM :
kRLM;
387 if (current_paragraph_is_ltr_) {
395 void ResultIterator::MoveToLogicalStartOfTextline() {
399 dynamic_cast<const LTRResultIterator&>(*
this),
402 for (; i < word_indices.
size() && word_indices[i] < 0; i++) {
403 if (word_indices[i] ==
kMinorRunStart) in_minor_direction_ =
true;
404 else if (word_indices[i] ==
kMinorRunEnd) in_minor_direction_ =
false;
406 if (in_minor_direction_) at_beginning_of_minor_run_ =
true;
407 if (i >= word_indices.
size())
return;
408 int first_word_index = word_indices[i];
409 for (
int j = 0; j < first_word_index; j++) {
412 MoveToLogicalStartOfWord();
417 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
418 in_minor_direction_ =
false;
419 at_beginning_of_minor_run_ =
false;
420 MoveToLogicalStartOfTextline();
424 if (
it_->
block() ==
nullptr)
return false;
433 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
435 in_minor_direction_ =
false;
436 MoveToLogicalStartOfTextline();
441 CalculateBlobOrder(&blob_order);
443 while (next_blob < blob_order.
size() &&
447 if (next_blob < blob_order.
size()) {
450 at_beginning_of_minor_run_ =
false;
459 int this_word_index = LTRWordIndex();
463 int final_real_index = word_indices.
size() - 1;
464 while (final_real_index > 0 && word_indices[final_real_index] < 0)
466 for (
int i = 0; i < final_real_index; i++) {
467 if (word_indices[i] == this_word_index) {
469 for (; j < final_real_index && word_indices[j] < 0; j++) {
470 if (word_indices[j] ==
kMinorRunStart) in_minor_direction_ =
true;
471 if (word_indices[j] ==
kMinorRunEnd) in_minor_direction_ =
false;
473 at_beginning_of_minor_run_ = (word_indices[j - 1] ==
kMinorRunStart);
476 tprintf(
"Next(RIL_WORD): %d -> %d\n",
477 this_word_index, word_indices[j]);
480 for (
int k = 0; k < word_indices[j]; k++) {
483 MoveToLogicalStartOfWord();
488 tprintf(
"Next(RIL_WORD): %d -> EOL\n", this_word_index);
499 if (
it_->
block() ==
nullptr)
return false;
500 if (
it_->
word() ==
nullptr)
return true;
503 bool at_word_start = IsAtFirstSymbolOfWord();
504 if (level ==
RIL_WORD)
return at_word_start;
508 line_start.MoveToLogicalStartOfTextline();
510 bool at_textline_start = at_word_start && *line_start.
it_ == *
it_;
515 bool at_block_start = at_textline_start &&
517 if (level ==
RIL_BLOCK)
return at_block_start;
519 bool at_para_start = at_block_start ||
520 (at_textline_start &&
523 if (level ==
RIL_PARA)
return at_para_start;
536 if (
Empty(element))
return true;
545 if (next.
Empty(element))
return true;
546 while (element > level) {
565 if (
it_->
word() ==
nullptr)
return nullptr;
572 pp.AppendUTF8ParagraphText(&text);
577 AppendUTF8ParagraphText(&text);
582 it.MoveToLogicalStartOfTextline();
583 it.IterateAndAppendUTF8TextlineText(&text);
587 AppendUTF8WordText(&text);
591 bool reading_direction_is_ltr =
592 current_paragraph_is_ltr_ ^ in_minor_direction_;
593 if (at_beginning_of_minor_run_) {
594 text += reading_direction_is_ltr ?
kLRM :
kRLM;
597 if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text);
601 int length = text.
length() + 1;
602 char* result =
new char[length];
603 strncpy(result, text.
string(), length);
615 void ResultIterator::AppendUTF8WordText(
STRING *text)
const {
618 bool reading_direction_is_ltr =
619 current_paragraph_is_ltr_ ^ in_minor_direction_;
620 if (at_beginning_of_minor_run_) {
621 *text += reading_direction_is_ltr ?
kLRM :
kRLM;
625 CalculateBlobOrder(&blob_order);
626 for (
int i = 0; i < blob_order.
size(); i++) {
629 AppendSuffixMarks(text);
632 void ResultIterator::IterateAndAppendUTF8TextlineText(
STRING *text) {
641 *
this, &dirs, &textline_order);
643 current_paragraph_is_ltr_ ?
"ltr" :
"rtl");
644 PrintScriptDirs(dirs);
646 current_paragraph_is_ltr_ ?
"ltr" :
"rtl");
647 for (
int i = 0; i < textline_order.
size(); i++) {
648 tprintf(
"%d ", textline_order[i]);
653 int words_appended = 0;
656 : (words_appended > 0);
657 for (
int i = 0; i < numSpaces; ++i) {
660 AppendUTF8WordText(text);
663 tprintf(
"Num spaces=%d, text=%s\n", numSpaces, text->
string());
667 tprintf(
"%d words printed\n", words_appended);
676 void ResultIterator::AppendUTF8ParagraphText(
STRING *text)
const {
678 it.RestartParagraph();
679 it.MoveToLogicalStartOfTextline();
682 it.IterateAndAppendUTF8TextlineText(text);
683 }
while (it.it_->block() !=
nullptr && !it.IsAtBeginningOf(
RIL_PARA));
686 bool ResultIterator::BidiDebug(
int min_level)
const {
688 IntParam *p = ParamUtils::FindParam<IntParam>(
691 if (p !=
nullptr) debug_level = (int32_t)(*p);
692 return debug_level >= min_level;
BLOCK_RES * block() const
bool IsWithinFirstTextlineOfParagraph() const
UNICHARSET::Direction SymbolDirection(int blob_index) const
bool ParagraphIsLtr() const
GenericVector< IntParam * > int_params
GenericVector< BoolParam * > bool_params
const char * paragraph_separator_
static void CalculateTextlineOrder(bool paragraph_is_ltr, const GenericVector< StrongScriptDirection > &word_dirs, GenericVectorEqEq< int > *reading_order)
const char * string() const
TESS_LOCAL ResultIterator(const LTRResultIterator &resit)
const char * line_separator_
virtual char * GetUTF8Text(PageIteratorLevel level) const
TESS_LOCAL void BeginWord(int offset)
virtual std::vector< std::vector< std::pair< const char *, float > > > * GetBestLSTMSymbolChoices() const
int get_index(const T &object) const
int BlanksBeforeWord() const
tesseract::ParamsVectors * GlobalParams()
ROW_RES * prev_row() const
static const int kComplexWord
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
DLLSYM void tprintf(const char *format,...)
virtual void RestartRow()
BLOCK_RES * prev_block() const
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
const char * BestUTF8(int blob_index, bool in_rtl_context) const
static const int kMinorRunStart
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
bool UnicharsInReadingOrder() const
std::vector< std::vector< std::pair< const char *, float > > > timesteps
bool Empty(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
WERD_CHOICE * best_choice
int BlanksBeforeWord() const
static const int kMinorRunEnd