25 #include "allheaders.h"
36 in_minor_direction_ =
false;
37 at_beginning_of_minor_run_ =
false;
38 preserve_interword_spaces_ =
false;
40 auto* p = ParamUtils::FindParam<BoolParam>(
"preserve_interword_spaces",
44 preserve_interword_spaces_ = (bool)(*p);
46 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
47 MoveToLogicalStartOfTextline();
56 return current_paragraph_is_ltr_;
59 bool ResultIterator::CurrentParagraphIsLtr()
const {
63 it.RestartParagraph();
89 num_rtl = leftmost_rtl ? 1 : 0;
97 num_ltr += rightmost_ltr ? 1 : 0;
110 return num_ltr >= num_rtl;
117 void ResultIterator::CalculateBlobOrder(
119 bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
120 blob_indices->
clear();
147 if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM &&
148 (letter_types[i + 1] == U_EURO_NUM_SEP ||
149 letter_types[i + 1] == U_COMMON_NUM_SEP)) {
150 letter_types[i + 1] = U_EURO_NUM;
156 if (letter_types[i] == U_EURO_NUM_TERM) {
158 while (j <
word_length_ && letter_types[j] == U_EURO_NUM_TERM) {
161 if (j <
word_length_ && letter_types[j] == U_EURO_NUM) {
163 for (
int k = i; k < j; k++) letter_types[k] = U_EURO_NUM;
166 while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) {
169 if (j > -1 && letter_types[j] == U_EURO_NUM) {
171 for (
int k = j; k <= i; k++) letter_types[k] = U_EURO_NUM;
179 int ti = letter_types[i];
180 if (ti == U_LTR || ti == U_EURO_NUM) {
184 int tj = letter_types[j];
185 if (tj == U_LTR || tj == U_EURO_NUM) {
187 }
else if (tj == U_COMMON_NUM_SEP || tj == U_OTHER_NEUTRAL) {
194 for (
int k = i; k <= last_good; k++) letter_types[k] = U_LTR;
197 letter_types[i] = U_RTL;
204 if (letter_types[i] == U_RTL) {
210 for (; j >= 0 && letter_types[j] != U_RTL; j--) {
213 for (
int k = j + 1; k <= i; k++) blob_indices->
push_back(k);
221 for (
int i = 0; i < dirs.
size(); i++) {
244 bool paragraph_is_ltr,
const LTRResultIterator& resit,
251 bool paragraph_is_ltr,
const LTRResultIterator& resit,
256 directions = (dirs_arg !=
nullptr) ? dirs_arg : &dirs;
265 directions->
push_back(ltr_it.WordDirection());
273 bool paragraph_is_ltr,
277 if (word_dirs.
size() == 0)
282 int minor_direction, major_direction, major_step, start, end;
283 if (paragraph_is_ltr) {
285 end = word_dirs.
size();
290 start = word_dirs.
size() - 1;
299 int neutral_end = start;
300 while (neutral_end > 0 && word_dirs[neutral_end] ==
DIR_NEUTRAL) {
306 int left = neutral_end;
312 for (
int i = left; i < word_dirs.
size(); i++) {
322 for (
int i = start; i != end;) {
323 if (word_dirs[i] == minor_direction) {
325 while (j != end && word_dirs[j] != major_direction) j += major_step;
328 while (j != i && word_dirs[j] != minor_direction) j -= major_step;
331 for (
int k = j; k != i; k -= major_step) {
346 int ResultIterator::LTRWordIndex()
const {
347 int this_word_index = 0;
349 textline.RestartRow();
350 while (!textline.PositionedAtSameWord(
it_)) {
354 return this_word_index;
357 void ResultIterator::MoveToLogicalStartOfWord() {
363 CalculateBlobOrder(&blob_order);
364 if (blob_order.
size() == 0 || blob_order[0] == 0)
369 bool ResultIterator::IsAtFinalSymbolOfWord()
const {
373 CalculateBlobOrder(&blob_order);
377 bool ResultIterator::IsAtFirstSymbolOfWord()
const {
381 CalculateBlobOrder(&blob_order);
385 void ResultIterator::AppendSuffixMarks(
STRING* text)
const {
388 bool reading_direction_is_ltr =
389 current_paragraph_is_ltr_ ^ in_minor_direction_;
397 int this_word_index = LTRWordIndex();
398 int i = textline_order.
get_index(this_word_index);
402 int last_non_word_mark = 0;
403 for (i++; i < textline_order.
size() && textline_order[i] < 0; i++) {
404 last_non_word_mark = textline_order[i];
407 *text += reading_direction_is_ltr ?
kLRM :
kRLM;
409 if (current_paragraph_is_ltr_) {
417 void ResultIterator::MoveToLogicalStartOfTextline() {
421 dynamic_cast<const LTRResultIterator&>(*
this),
424 for (; i < word_indices.
size() && word_indices[i] < 0; i++) {
426 in_minor_direction_ =
true;
428 in_minor_direction_ =
false;
430 if (in_minor_direction_)
431 at_beginning_of_minor_run_ =
true;
432 if (i >= word_indices.
size())
434 int first_word_index = word_indices[i];
435 for (
int j = 0; j < first_word_index; j++) {
438 MoveToLogicalStartOfWord();
443 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
444 in_minor_direction_ =
false;
445 at_beginning_of_minor_run_ =
false;
446 MoveToLogicalStartOfTextline();
461 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
463 in_minor_direction_ =
false;
464 MoveToLogicalStartOfTextline();
468 CalculateBlobOrder(&blob_order);
470 while (next_blob < blob_order.
size() &&
474 if (next_blob < blob_order.
size()) {
477 at_beginning_of_minor_run_ =
false;
488 int this_word_index = LTRWordIndex();
490 int final_real_index = word_indices.
size() - 1;
491 while (final_real_index > 0 && word_indices[final_real_index] < 0)
493 for (
int i = 0; i < final_real_index; i++) {
494 if (word_indices[i] == this_word_index) {
496 for (; j < final_real_index && word_indices[j] < 0; j++) {
498 in_minor_direction_ =
true;
500 in_minor_direction_ =
false;
502 at_beginning_of_minor_run_ = (word_indices[j - 1] ==
kMinorRunStart);
505 tprintf(
"Next(RIL_WORD): %d -> %d\n", this_word_index,
509 for (
int k = 0; k < word_indices[j]; k++) {
512 MoveToLogicalStartOfWord();
517 tprintf(
"Next(RIL_WORD): %d -> EOL\n", this_word_index);
535 bool at_word_start = IsAtFirstSymbolOfWord();
537 return at_word_start;
541 line_start.MoveToLogicalStartOfTextline();
543 bool at_textline_start = at_word_start && *line_start.
it_ == *
it_;
545 return at_textline_start;
549 bool at_block_start = at_textline_start &&
552 return at_block_start;
559 return at_para_start;
582 if (next.
Empty(element))
584 while (element > level) {
585 element = static_cast<PageIteratorLevel>(element - 1);
594 if (CurrentParagraphIsLtr())
611 pp.AppendUTF8ParagraphText(&text);
615 AppendUTF8ParagraphText(&text);
619 it.MoveToLogicalStartOfTextline();
620 it.IterateAndAppendUTF8TextlineText(&text);
623 AppendUTF8WordText(&text);
626 bool reading_direction_is_ltr =
627 current_paragraph_is_ltr_ ^ in_minor_direction_;
628 if (at_beginning_of_minor_run_) {
629 text += reading_direction_is_ltr ?
kLRM :
kRLM;
632 if (IsAtFinalSymbolOfWord())
633 AppendSuffixMarks(&text);
636 int length = text.
length() + 1;
637 char* result =
new char[length];
638 strncpy(result, text.
c_str(), length);
641 std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
650 std::vector<std::vector<std::pair<const char*, float>>>*
659 void ResultIterator::AppendUTF8WordText(
STRING* text)
const {
663 bool reading_direction_is_ltr =
664 current_paragraph_is_ltr_ ^ in_minor_direction_;
665 if (at_beginning_of_minor_run_) {
666 *text += reading_direction_is_ltr ?
kLRM :
kRLM;
670 CalculateBlobOrder(&blob_order);
671 for (
int i = 0; i < blob_order.
size(); i++) {
674 AppendSuffixMarks(text);
677 void ResultIterator::IterateAndAppendUTF8TextlineText(
STRING* text) {
688 current_paragraph_is_ltr_ ?
"ltr" :
"rtl");
689 PrintScriptDirs(dirs);
691 current_paragraph_is_ltr_ ?
"ltr" :
"rtl");
692 for (
int i = 0; i < textline_order.
size(); i++) {
693 tprintf(
"%d ", textline_order[i]);
698 int words_appended = 0;
701 : (words_appended > 0);
702 for (
int i = 0; i < numSpaces; ++i) {
705 AppendUTF8WordText(text);
708 tprintf(
"Num spaces=%d, text=%s\n", numSpaces, text->
c_str());
712 tprintf(
"%d words printed\n", words_appended);
721 void ResultIterator::AppendUTF8ParagraphText(
STRING* text)
const {
723 it.RestartParagraph();
724 it.MoveToLogicalStartOfTextline();
728 it.IterateAndAppendUTF8TextlineText(text);
729 }
while (it.it_->block() !=
nullptr && !it.IsAtBeginningOf(
RIL_PARA));
732 bool ResultIterator::BidiDebug(
int min_level)
const {
735 ParamUtils::FindParam<IntParam>(
"bidi_debug",
GlobalParams()->int_params,
738 debug_level = (int32_t)(*p);
739 return debug_level >= min_level;