tesseract  5.0.0-alpha-619-ge9db
resultiterator.h
Go to the documentation of this file.
1 // File: resultiterator.h
3 // Description: Iterator for tesseract results that is capable of
4 // iterating in proper reading order over Bi Directional
5 // (e.g. mixed Hebrew and English) text.
6 // Author: David Eger
7 // Created: Fri May 27 13:58:06 PST 2011
8 //
9 // (C) Copyright 2011, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
23 #define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
24 
25 #include <set> // for std::pair
26 #include <vector> // for std::vector
27 
28 #include "ltrresultiterator.h" // for LTRResultIterator
29 #include "platform.h" // for TESS_API, TESS_LOCAL
30 #include "publictypes.h" // for PageIteratorLevel
31 #include "unichar.h" // for StrongScriptDirection
32 
33 template <typename T>
34 class GenericVector;
35 template <typename T>
36 class GenericVectorEqEq;
37 
38 class STRING;
39 
40 namespace tesseract {
41 
42 class Tesseract;
43 
45  public:
46  static ResultIterator* StartOfParagraph(const LTRResultIterator& resit);
47 
52  ~ResultIterator() override = default;
53 
54  // ============= Moving around within the page ============.
59  void Begin() override;
60 
73  bool Next(PageIteratorLevel level) override;
74 
81  bool IsAtBeginningOf(PageIteratorLevel level) const override;
82 
88  bool IsAtFinalElement(PageIteratorLevel level,
89  PageIteratorLevel element) const override;
90 
91  // ============= Functions that refer to words only ============.
92  // Returns the number of blanks before the current word.
93  int BlanksBeforeWord() const;
94 
95  // ============= Accessing data ==============.
96 
101  virtual char* GetUTF8Text(PageIteratorLevel level) const;
102 
106  virtual std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
107  GetRawLSTMTimesteps() const;
108  virtual std::vector<std::vector<std::pair<const char*, float>>>*
109  GetBestLSTMSymbolChoices() const;
110 
115  bool ParagraphIsLtr() const;
116 
117  // ============= Exposed only for testing =============.
118 
141  static void CalculateTextlineOrder(
142  bool paragraph_is_ltr,
143  const GenericVector<StrongScriptDirection>& word_dirs,
144  GenericVectorEqEq<int>* reading_order);
145 
146  static const int kMinorRunStart;
147  static const int kMinorRunEnd;
148  static const int kComplexWord;
149 
150  protected:
157  TESS_LOCAL explicit ResultIterator(const LTRResultIterator& resit);
158 
159  private:
164  bool CurrentParagraphIsLtr() const;
165 
177  void CalculateTextlineOrder(bool paragraph_is_ltr,
178  const LTRResultIterator& resit,
179  GenericVectorEqEq<int>* indices) const;
181  void CalculateTextlineOrder(bool paragraph_is_ltr,
182  const LTRResultIterator& resit,
184  GenericVectorEqEq<int>* indices) const;
185 
190  int LTRWordIndex() const;
191 
196  void CalculateBlobOrder(GenericVector<int>* blob_indices) const;
197 
199  void MoveToLogicalStartOfTextline();
200 
205  void MoveToLogicalStartOfWord();
206 
208  bool IsAtFinalSymbolOfWord() const;
209 
211  bool IsAtFirstSymbolOfWord() const;
212 
217  void AppendSuffixMarks(STRING* text) const;
218 
220  void AppendUTF8WordText(STRING* text) const;
221 
229  void IterateAndAppendUTF8TextlineText(STRING* text);
230 
237  void AppendUTF8ParagraphText(STRING* text) const;
238 
240  bool BidiDebug(int min_level) const;
241 
242  bool current_paragraph_is_ltr_;
243 
248  bool at_beginning_of_minor_run_;
249 
251  bool in_minor_direction_;
252 
257  bool preserve_interword_spaces_;
258 };
259 
260 } // namespace tesseract.
261 
262 #endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
TESS_LOCAL
#define TESS_LOCAL
Definition: platform.h:55
tesseract::ResultIterator::kMinorRunStart
static const int kMinorRunStart
Definition: resultiterator.h:146
platform.h
STRING
Definition: strngs.h:45
ltrresultiterator.h
tesseract::LTRResultIterator
Definition: ltrresultiterator.h:47
tesseract::ResultIterator::kComplexWord
static const int kComplexWord
Definition: resultiterator.h:148
publictypes.h
tesseract::ResultIterator::kMinorRunEnd
static const int kMinorRunEnd
Definition: resultiterator.h:147
tesseract::PageIteratorLevel
PageIteratorLevel
Definition: publictypes.h:216
tesseract
Definition: baseapi.h:65
GenericVectorEqEq
Definition: genericvector.h:640
GenericVector
Definition: baseapi.h:40
tesseract::ResultIterator
Definition: resultiterator.h:44
unichar.h
TESS_API
#define TESS_API
Definition: platform.h:54