tesseract  5.0.0-alpha-619-ge9db
publictypes.h
Go to the documentation of this file.
1 // File: publictypes.h
3 // Description: Types used in both the API and internally
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
20 #define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
21 
22 // This file contains types that are used both by the API and internally
23 // to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
24 // dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
25 // Restated: It is OK for low-level Tesseract files to include publictypes.h,
26 // but not for the low-level tesseract code to include top-level API code.
27 // This file should not use other Tesseract types, as that would drag
28 // their includes into the API-level.
29 // API-level code should include apitypes.h in preference to this file.
30 
32 constexpr int kPointsPerInch = 72;
37 constexpr int kMinCredibleResolution = 70;
39 constexpr int kMaxCredibleResolution = 2400;
44 constexpr int kResolutionEstimationFactor = 10;
45 
53  PT_UNKNOWN, // Type is not yet known. Keep as the first element.
54  PT_FLOWING_TEXT, // Text that lives inside a column.
55  PT_HEADING_TEXT, // Text that spans more than one column.
56  PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
57  PT_EQUATION, // Partition belonging to an equation region.
58  PT_INLINE_EQUATION, // Partition has inline equation.
59  PT_TABLE, // Partition belonging to a table region.
60  PT_VERTICAL_TEXT, // Text-line runs vertically.
61  PT_CAPTION_TEXT, // Text that belongs to an image.
62  PT_FLOWING_IMAGE, // Image that lives inside a column.
63  PT_HEADING_IMAGE, // Image that spans more than one column.
64  PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
65  PT_HORZ_LINE, // Horizontal Line.
66  PT_VERT_LINE, // Vertical Line.
67  PT_NOISE, // Lies outside of any column.
69 };
70 
73  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
74 }
77  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
79 }
82  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
83  type == PT_PULLOUT_TEXT || type == PT_TABLE ||
86 }
87 // Returns true if PolyBlockType is of pullout(inter-column) type
89  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
90 }
91 
92 namespace tesseract {
121 };
122 
135 };
136 
152 };
153 
162  PSM_AUTO_ONLY = 2,
164  PSM_AUTO = 3,
167  PSM_SINGLE_BLOCK = 6,
174  11,
177 
180 };
181 
188 inline bool PSM_OSD_ENABLED(int pageseg_mode) {
189  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
190 }
191 inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
192  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
193 }
194 inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
195  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
196 }
197 inline bool PSM_SPARSE(int pageseg_mode) {
198  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
199 }
200 inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
201  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
202 }
203 inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
204  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
205 }
206 inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
207  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
208  pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
209 }
210 
217  RIL_BLOCK, // Block of text/image/separator line.
218  RIL_PARA, // Paragraph within a block.
219  RIL_TEXTLINE, // Line within a paragraph.
220  RIL_WORD, // Word within a textline.
221  RIL_SYMBOL // Symbol/character within a word.
222 };
223 
253 };
254 
266  OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
267  OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
268  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
269  // to Tesseract when things get difficult.
270  // deprecated
271  OEM_DEFAULT, // Specify this mode when calling init_*(),
272  // to indicate that any of the above modes
273  // should be automatically inferred from the
274  // variables in the language-specific config,
275  // command-line configs, or if not specified
276  // in any of the above should be set to the
277  // default OEM_TESSERACT_ONLY.
278  OEM_COUNT // Number of OEMs
279 };
280 
281 } // namespace tesseract.
282 
283 #endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
tesseract::WRITING_DIRECTION_LEFT_TO_RIGHT
Definition: publictypes.h:132
PT_VERTICAL_TEXT
Definition: publictypes.h:60
tesseract::PSM_OSD_ENABLED
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:188
tesseract::PSM_SPARSE_TEXT_OSD
Sparse text with orientation and script det.
Definition: publictypes.h:175
tesseract::RIL_WORD
Definition: publictypes.h:220
kPointsPerInch
constexpr int kPointsPerInch
Definition: publictypes.h:32
tesseract::JUSTIFICATION_RIGHT
Definition: publictypes.h:252
tesseract::OEM_TESSERACT_LSTM_COMBINED
Definition: publictypes.h:268
tesseract::PSM_COL_FIND_ENABLED
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:194
tesseract::PSM_RAW_LINE
Definition: publictypes.h:176
tesseract::TEXTLINE_ORDER_RIGHT_TO_LEFT
Definition: publictypes.h:150
tesseract::RIL_BLOCK
Definition: publictypes.h:217
PT_PULLOUT_IMAGE
Definition: publictypes.h:64
kMaxCredibleResolution
constexpr int kMaxCredibleResolution
Definition: publictypes.h:39
PT_PULLOUT_TEXT
Definition: publictypes.h:56
PT_HEADING_TEXT
Definition: publictypes.h:55
tesseract::ParagraphJustification
ParagraphJustification
Definition: publictypes.h:248
PTIsPulloutType
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:88
tesseract::ORIENTATION_PAGE_RIGHT
Definition: publictypes.h:118
tesseract::WRITING_DIRECTION_TOP_TO_BOTTOM
Definition: publictypes.h:134
tesseract::OEM_LSTM_ONLY
Definition: publictypes.h:267
tesseract::PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:168
PT_COUNT
Definition: publictypes.h:68
tesseract::WritingDirection
WritingDirection
Definition: publictypes.h:131
tesseract::ORIENTATION_PAGE_LEFT
Definition: publictypes.h:120
tesseract::PSM_SINGLE_LINE
Treat the image as a single text line.
Definition: publictypes.h:169
tesseract::PSM_WORD_FIND_ENABLED
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:206
tesseract::PSM_SINGLE_WORD
Treat the image as a single word.
Definition: publictypes.h:170
tesseract::RIL_SYMBOL
Definition: publictypes.h:221
tesseract::ORIENTATION_PAGE_DOWN
Definition: publictypes.h:119
tesseract::OcrEngineMode
OcrEngineMode
Definition: publictypes.h:265
PT_INLINE_EQUATION
Definition: publictypes.h:58
PT_EQUATION
Definition: publictypes.h:57
tesseract::PSM_SPARSE
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:197
tesseract::JUSTIFICATION_LEFT
Definition: publictypes.h:250
tesseract::PSM_SINGLE_BLOCK_VERT_TEXT
Definition: publictypes.h:166
PT_NOISE
Definition: publictypes.h:67
tesseract::TextlineOrder
TextlineOrder
Definition: publictypes.h:148
tesseract::PSM_SINGLE_COLUMN
Assume a single column of text of variable sizes.
Definition: publictypes.h:165
tesseract::WRITING_DIRECTION_RIGHT_TO_LEFT
Definition: publictypes.h:133
tesseract::PSM_COUNT
Number of enum entries.
Definition: publictypes.h:179
tesseract::JUSTIFICATION_UNKNOWN
Definition: publictypes.h:249
tesseract::TEXTLINE_ORDER_LEFT_TO_RIGHT
Definition: publictypes.h:149
tesseract::PSM_BLOCK_FIND_ENABLED
bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:200
tesseract::OEM_DEFAULT
Definition: publictypes.h:271
tesseract::TEXTLINE_ORDER_TOP_TO_BOTTOM
Definition: publictypes.h:151
kResolutionEstimationFactor
constexpr int kResolutionEstimationFactor
Definition: publictypes.h:44
kMinCredibleResolution
constexpr int kMinCredibleResolution
Definition: publictypes.h:37
PT_HORZ_LINE
Definition: publictypes.h:65
tesseract::PSM_AUTO_OSD
Definition: publictypes.h:161
tesseract::PageIteratorLevel
PageIteratorLevel
Definition: publictypes.h:216
PTIsImageType
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:76
PT_VERT_LINE
Definition: publictypes.h:66
PT_FLOWING_TEXT
Definition: publictypes.h:54
tesseract
Definition: baseapi.h:65
tesseract::OEM_COUNT
Definition: publictypes.h:278
PT_HEADING_IMAGE
Definition: publictypes.h:63
tesseract::PSM_LINE_FIND_ENABLED
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:203
tesseract::RIL_TEXTLINE
Definition: publictypes.h:219
PT_TABLE
Definition: publictypes.h:59
tesseract::ORIENTATION_PAGE_UP
Definition: publictypes.h:117
tesseract::PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:163
PTIsLineType
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:72
PT_UNKNOWN
Definition: publictypes.h:53
tesseract::PageSegMode
PageSegMode
Definition: publictypes.h:159
PTIsTextType
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:81
tesseract::PSM_ORIENTATION_ENABLED
bool PSM_ORIENTATION_ENABLED(int pageseg_mode)
Definition: publictypes.h:191
tesseract::PSM_CIRCLE_WORD
Treat the image as a single word in a circle.
Definition: publictypes.h:171
tesseract::PSM_AUTO
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:164
tesseract::PSM_OSD_ONLY
Orientation and script detection only.
Definition: publictypes.h:160
tesstrain_utils.type
type
Definition: tesstrain_utils.py:141
tesseract::PSM_SPARSE_TEXT
Find as much text as possible in no particular order.
Definition: publictypes.h:173
PolyBlockType
PolyBlockType
Definition: publictypes.h:52
tesseract::Orientation
Orientation
Definition: publictypes.h:116
tesseract::OEM_TESSERACT_ONLY
Definition: publictypes.h:266
tesseract::RIL_PARA
Definition: publictypes.h:218
PT_FLOWING_IMAGE
Definition: publictypes.h:62
tesseract::JUSTIFICATION_CENTER
Definition: publictypes.h:251
PT_CAPTION_TEXT
Definition: publictypes.h:61
tesseract::PSM_SINGLE_CHAR
Treat the image as a single character.
Definition: publictypes.h:172