tesseract  5.0.0-alpha-619-ge9db
output.cpp
Go to the documentation of this file.
1 /******************************************************************
2  * File: output.cpp (Formerly output.c)
3  * Description: Output pass
4  * Author: Phil Cheatle
5  *
6  * (C) Copyright 1994, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include <cctype>
20 #include <cerrno>
21 #include <cstring>
22 #include "control.h"
23 #include <tesseract/helpers.h>
24 #include "output.h"
25 #include "tesseractclass.h"
26 #include "tessvars.h"
27 #ifndef DISABLED_LEGACY_ENGINE
28 #include "docqual.h"
29 #include "reject.h"
30 #endif
31 
32 #define CTRL_NEWLINE '\012' //newline
33 #define CTRL_HARDLINE '\015' //cr
34 
35 namespace tesseract {
36 void Tesseract::output_pass( //Tess output pass //send to api
37  PAGE_RES_IT &page_res_it,
38  const TBOX *target_word_box) {
39  BLOCK_RES *block_of_last_word;
40  bool force_eol; //During output
41  BLOCK *nextblock; //block of next word
42  WERD *nextword; //next word
43 
44  page_res_it.restart_page ();
45  block_of_last_word = nullptr;
46  while (page_res_it.word () != nullptr) {
47  check_debug_pt (page_res_it.word (), 120);
48 
49  if (target_word_box) {
50  TBOX current_word_box = page_res_it.word()->word->bounding_box();
51  FCOORD center_pt(
52  (current_word_box.right() + current_word_box.left()) / 2,
53  (current_word_box.bottom() + current_word_box.top()) / 2);
54  if (!target_word_box->contains(center_pt)) {
55  page_res_it.forward();
56  continue;
57  }
58  }
60  block_of_last_word != page_res_it.block ()) {
61  block_of_last_word = page_res_it.block ();
62  }
63 
64  force_eol = (tessedit_write_block_separators &&
65  (page_res_it.block () != page_res_it.next_block ())) ||
66  (page_res_it.next_word () == nullptr);
67 
68  if (page_res_it.next_word () != nullptr)
69  nextword = page_res_it.next_word ()->word;
70  else
71  nextword = nullptr;
72  if (page_res_it.next_block () != nullptr)
73  nextblock = page_res_it.next_block ()->block;
74  else
75  nextblock = nullptr;
76  //regardless of tilde crunching
77  write_results(page_res_it,
78  determine_newline_type(page_res_it.word()->word,
79  page_res_it.block()->block,
80  nextword, nextblock), force_eol);
81  page_res_it.forward();
82  }
83 }
84 
85 
86 /*************************************************************************
87  * write_results()
88  *
89  * All recognition and rejection has now been done. Generate the following:
90  * .txt file - giving the final best choices with NO highlighting
91  * .raw file - giving the tesseract top choice output for each word
92  * .map file - showing how the .txt file has been rejected in the .ep file
93  * epchoice list - a list of one element per word, containing the text for the
94  * epaper. Reject strings are inserted.
95  * inset list - a list of bounding boxes of reject insets - indexed by the
96  * reject strings in the epchoice text.
97  *************************************************************************/
98 void Tesseract::write_results(PAGE_RES_IT& page_res_it,
99  char newline_type, // type of newline
100  bool force_eol) { // override tilde crunch?
101  WERD_RES *word = page_res_it.word();
102  const UNICHARSET &uchset = *word->uch_set;
103  int i;
104  bool need_reject = false;
105  UNICHAR_ID space = uchset.unichar_to_id(" ");
106 
107  if ((word->unlv_crunch_mode != CR_NONE ||
108  word->best_choice->length() == 0) &&
110  if ((word->unlv_crunch_mode != CR_DELETE) &&
111  (!stats_.tilde_crunch_written ||
112  ((word->unlv_crunch_mode == CR_KEEP_SPACE) &&
113  (word->word->space () > 0) &&
114  !word->word->flag (W_FUZZY_NON) &&
115  !word->word->flag (W_FUZZY_SP)))) {
116  if (!word->word->flag (W_BOL) &&
117  (word->word->space () > 0) &&
118  !word->word->flag (W_FUZZY_NON) &&
119  !word->word->flag (W_FUZZY_SP)) {
120  stats_.last_char_was_tilde = false;
121  }
122  need_reject = true;
123  }
124  if ((need_reject && !stats_.last_char_was_tilde) ||
125  (force_eol && stats_.write_results_empty_block)) {
126  /* Write a reject char - mark as rejected unless zero_rejection mode */
127  stats_.last_char_was_tilde = true;
128  stats_.tilde_crunch_written = true;
129  stats_.last_char_was_newline = false;
130  stats_.write_results_empty_block = false;
131  }
132 
133  if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) {
134  stats_.tilde_crunch_written = false;
135  stats_.last_char_was_newline = true;
136  stats_.last_char_was_tilde = false;
137  }
138 
139  if (force_eol)
140  stats_.write_results_empty_block = true;
141  return;
142  }
143 
144  /* NORMAL PROCESSING of non tilde crunched words */
145 
146  stats_.tilde_crunch_written = false;
147  if (newline_type)
148  stats_.last_char_was_newline = true;
149  else
150  stats_.last_char_was_newline = false;
151  stats_.write_results_empty_block = force_eol; // about to write a real word
152 
153  if (unlv_tilde_crunching &&
154  stats_.last_char_was_tilde &&
155  (word->word->space() == 0) &&
157  (word->best_choice->unichar_id(0) == space)) {
158  /* Prevent adjacent tilde across words - we know that adjacent tildes within
159  words have been removed */
160  word->MergeAdjacentBlobs(0);
161  }
162  if (newline_type ||
164  stats_.last_char_was_tilde = false;
165  else {
166  if (word->reject_map.length () > 0) {
167  if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)
168  stats_.last_char_was_tilde = true;
169  else
170  stats_.last_char_was_tilde = false;
171  }
172  else if (word->word->space () > 0)
173  stats_.last_char_was_tilde = false;
174  /* else it is unchanged as there are no output chars */
175  }
176 
177  ASSERT_HOST (word->best_choice->length() == word->reject_map.length());
178 
179  set_unlv_suspects(word);
180  check_debug_pt (word, 120);
182  tprintf ("Dict word: \"%s\": %d\n",
183  word->best_choice->debug_string().c_str(),
184  dict_word(*(word->best_choice)));
185  }
186  if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) {
188  /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
189  for (i = 0; i < word->best_choice->length(); ++i) {
190  if (word->reject_map[i].rejected())
191  word->reject_map[i].setrej_minimal_rej_accept();
192  }
193  }
195  /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
196  for (i = 0; i < word->best_choice->length(); ++i) {
197  if ((word->best_choice->unichar_id(i) != space) &&
198  word->reject_map[i].rejected())
199  word->reject_map[i].setrej_minimal_rej_accept();
200  }
201  }
202  }
203 }
204 } // namespace tesseract
205 
206 /**********************************************************************
207  * determine_newline_type
208  *
209  * Find whether we have a wrapping or hard newline.
210  * Return false if not at end of line.
211  **********************************************************************/
212 
213 char determine_newline_type( //test line ends
214  WERD *word, //word to do
215  BLOCK *block, //current block
216  WERD *next_word, //next word
217  BLOCK *next_block //block of next word
218  ) {
219  int16_t end_gap; //to right edge
220  int16_t width; //of next word
221  TBOX word_box; //bounding
222  TBOX next_box; //next word
223  TBOX block_box; //block bounding
224 
225  if (!word->flag (W_EOL))
226  return false; //not end of line
227  if (next_word == nullptr || next_block == nullptr || block != next_block)
228  return CTRL_NEWLINE;
229  if (next_word->space () > 0)
230  return CTRL_HARDLINE; //it is tabbed
231  word_box = word->bounding_box ();
232  next_box = next_word->bounding_box ();
233  block_box = block->pdblk.bounding_box ();
234  //gap to eol
235  end_gap = block_box.right () - word_box.right ();
236  end_gap -= static_cast<int32_t>(block->space ());
237  width = next_box.right () - next_box.left ();
238  // tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",
239  // block_box.right(),word_box.right(),end_gap,
240  // next_box.right(),next_box.left(),width,
241  // end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);
242  return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;
243 }
244 
245 /*************************************************************************
246  * get_rep_char()
247  * Return the first accepted character from the repetition string. This is the
248  * character which is repeated - as determined earlier by fix_rep_char()
249  *************************************************************************/
250 namespace tesseract {
251 UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
252  int i;
253  for (i = 0; ((i < word->reject_map.length()) &&
254  (word->reject_map[i].rejected())); ++i);
255 
256  if (i < word->reject_map.length()) {
257  return word->best_choice->unichar_id(i);
258  } else {
259  return word->uch_set->unichar_to_id(unrecognised_char.c_str());
260  }
261 }
262 
263 /*************************************************************************
264  * SUSPECT LEVELS
265  *
266  * 0 - don't reject ANYTHING
267  * 1,2 - partial rejection
268  * 3 - BEST
269  *
270  * NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and
271  * tessedit_minimal_rejection.
272  *************************************************************************/
273 void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
274  int len = word_res->reject_map.length();
275  const WERD_CHOICE &word = *(word_res->best_choice);
276  const UNICHARSET &uchset = *word.unicharset();
277  int i;
278  float rating_per_ch;
279 
280  if (suspect_level == 0) {
281  for (i = 0; i < len; i++) {
282  if (word_res->reject_map[i].rejected())
283  word_res->reject_map[i].setrej_minimal_rej_accept();
284  }
285  return;
286  }
287 
288  if (suspect_level >= 3)
289  return; //Use defaults
290 
291  /* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/
292 
293  if (safe_dict_word(word_res) &&
294  (count_alphas(word) > suspect_short_words)) {
295  /* Unreject alphas in dictionary words */
296  for (i = 0; i < len; ++i) {
297  if (word_res->reject_map[i].rejected() &&
298  uchset.get_isalpha(word.unichar_id(i)))
299  word_res->reject_map[i].setrej_minimal_rej_accept();
300  }
301  }
302 
303  rating_per_ch = word.rating() / word_res->reject_map.length();
304 
305  if (rating_per_ch >= suspect_rating_per_ch)
306  return; // Don't touch bad ratings
307 
308  if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
309  /* Unreject any Tess Acceptable word - but NOT tess reject chs*/
310  for (i = 0; i < len; ++i) {
311  if (word_res->reject_map[i].rejected() &&
312  (!uchset.eq(word.unichar_id(i), " ")))
313  word_res->reject_map[i].setrej_minimal_rej_accept();
314  }
315  }
316 
317  for (i = 0; i < len; i++) {
318  if (word_res->reject_map[i].rejected()) {
319  if (word_res->reject_map[i].flag(R_DOC_REJ))
320  word_res->reject_map[i].setrej_minimal_rej_accept();
321  if (word_res->reject_map[i].flag(R_BLOCK_REJ))
322  word_res->reject_map[i].setrej_minimal_rej_accept();
323  if (word_res->reject_map[i].flag(R_ROW_REJ))
324  word_res->reject_map[i].setrej_minimal_rej_accept();
325  }
326  }
327 
328  if (suspect_level == 2)
329  return;
330 
331  if (!suspect_constrain_1Il ||
332  (word_res->reject_map.length() <= suspect_short_words)) {
333  for (i = 0; i < len; i++) {
334  if (word_res->reject_map[i].rejected()) {
335  if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
336  word_res->reject_map[i].flag(R_POSTNN_1IL)))
337  word_res->reject_map[i].setrej_minimal_rej_accept();
338 
339  if (!suspect_constrain_1Il &&
340  word_res->reject_map[i].flag(R_MM_REJECT))
341  word_res->reject_map[i].setrej_minimal_rej_accept();
342  }
343  }
344  }
345 
346  if (acceptable_word_string(*word_res->uch_set,
347  word.unichar_string().c_str(),
348  word.unichar_lengths().c_str()) !=
349  AC_UNACCEPTABLE ||
351  word.unichar_lengths().c_str())) {
352  if (word_res->reject_map.length() > suspect_short_words) {
353  for (i = 0; i < len; i++) {
354  if (word_res->reject_map[i].rejected() &&
355  (!word_res->reject_map[i].perm_rejected() ||
356  word_res->reject_map[i].flag (R_1IL_CONFLICT) ||
357  word_res->reject_map[i].flag (R_POSTNN_1IL) ||
358  word_res->reject_map[i].flag (R_MM_REJECT))) {
359  word_res->reject_map[i].setrej_minimal_rej_accept();
360  }
361  }
362  }
363  }
364 }
365 
366 int16_t Tesseract::count_alphas(const WERD_CHOICE &word) {
367  int count = 0;
368  for (int i = 0; i < word.length(); ++i) {
369  if (word.unicharset()->get_isalpha(word.unichar_id(i)))
370  count++;
371  }
372  return count;
373 }
374 
375 
376 int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) {
377  int count = 0;
378  for (int i = 0; i < word.length(); ++i) {
379  if (word.unicharset()->get_isalpha(word.unichar_id(i)) ||
380  word.unicharset()->get_isdigit(word.unichar_id(i)))
381  count++;
382  }
383  return count;
384 }
385 
386 
387 bool Tesseract::acceptable_number_string(const char* s,
388  const char* lengths) {
389  bool prev_digit = false;
390 
391  if (*lengths == 1 && *s == '(')
392  s++;
393 
394  if (*lengths == 1 &&
395  ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))
396  s++;
397 
398  for (; *s != '\0'; s += *(lengths++)) {
399  if (unicharset.get_isdigit(s, *lengths))
400  prev_digit = true;
401  else if (prev_digit &&
402  (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))
403  prev_digit = false;
404  else if (prev_digit && *lengths == 1 &&
405  (*(s + *lengths) == '\0') && ((*s == '%') || (*s == ')')))
406  return true;
407  else if (prev_digit &&
408  *lengths == 1 && (*s == '%') &&
409  (*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
410  (*(s + *lengths + *(lengths + 1)) == '\0'))
411  return true;
412  else
413  return false;
414  }
415  return true;
416 }
417 } // namespace tesseract
WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:529
PAGE_RES_IT::forward
WERD_RES * forward()
Definition: pageres.h:728
tesseract::Tesseract::output_pass
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box)
Definition: output.cpp:35
PAGE_RES_IT::next_block
BLOCK_RES * next_block() const
Definition: pageres.h:763
CR_DELETE
Definition: pageres.h:156
tessvars.h
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:116
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:58
W_REP_CHAR
repeated character
Definition: werd.h:52
WERD_CHOICE::unichar_id
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:303
UNICHARSET::get_isdigit
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:502
WERD_CHOICE
Definition: ratngs.h:261
UNICHARSET::get_isalpha
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:481
tesseractclass.h
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
control.h
PAGE_RES_IT::block
BLOCK_RES * block() const
Definition: pageres.h:754
tesseract::Tesseract::tessedit_write_rep_codes
bool tessedit_write_rep_codes
Definition: tesseractclass.h:996
WERD_RES::unlv_crunch_mode
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:309
CR_NONE
Definition: pageres.h:153
tesseract::Wordrec::dict_word
int dict_word(const WERD_CHOICE &word)
Definition: tface.cpp:103
PAGE_RES_IT::restart_page
WERD_RES * restart_page()
Definition: pageres.h:695
TBOX::top
int16_t top() const
Definition: rect.h:57
TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:330
tesseract::Tesseract::set_unlv_suspects
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:272
WERD_RES
Definition: pageres.h:160
tesseract::Tesseract::tessedit_zero_rejection
bool tessedit_zero_rejection
Definition: tesseractclass.h:1020
tesseract::Tesseract::suspect_rating_per_ch
double suspect_rating_per_ch
Definition: tesseractclass.h:1017
tesseract::TesseractStats::write_results_empty_block
bool write_results_empty_block
Definition: tesseractclass.h:140
WERD_CHOICE::unicharset
const UNICHARSET * unicharset() const
Definition: ratngs.h:288
tesseract::TesseractStats::tilde_crunch_written
bool tilde_crunch_written
Definition: tesseractclass.h:137
tesseract::Tesseract::count_alphas
int16_t count_alphas(const WERD_CHOICE &word)
Definition: output.cpp:365
UNICHARSET::eq
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:686
FCOORD
Definition: points.h:187
tesseract::Tesseract::acceptable_word_string
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
Definition: control.cpp:1744
BLOCK_RES
Definition: pageres.h:110
tesseract::CCUtil::unicharset
UNICHARSET unicharset
Definition: ccutil.h:57
BLOCK::space
int16_t space() const
return spacing
Definition: ocrblock.h:97
WERD_RES::uch_set
const UNICHARSET * uch_set
Definition: pageres.h:197
tesseract::Tesseract::unrecognised_char
char * unrecognised_char
Definition: tesseractclass.h:1013
BLOCK
Definition: ocrblock.h:28
tesseract::Tesseract::count_alphanums
int16_t count_alphanums(const WERD_CHOICE &word)
Definition: output.cpp:375
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
R_DOC_REJ
Definition: rejctmap.h:113
REJMAP::length
int32_t length() const
Definition: rejctmap.h:222
W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:54
tesseract::Tesseract::suspect_constrain_1Il
bool suspect_constrain_1Il
Definition: tesseractclass.h:1016
tesseract::TesseractStats::last_char_was_newline
bool last_char_was_newline
Definition: tesseractclass.h:138
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::Tesseract::safe_dict_word
int16_t safe_dict_word(const WERD_RES *werd_res)
Definition: reject.cpp:605
tesseract::Tesseract::acceptable_number_string
bool acceptable_number_string(const char *s, const char *lengths)
Definition: output.cpp:386
WERD::space
uint8_t space()
Definition: werd.h:98
tesseract::Tesseract::tessedit_rejection_debug
bool tessedit_rejection_debug
Definition: tesseractclass.h:1026
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::TesseractStats::last_char_was_tilde
bool last_char_was_tilde
Definition: tesseractclass.h:139
UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:209
W_EOL
end of line
Definition: werd.h:47
tesseract::Tesseract::tessedit_zero_kelvin_rejection
bool tessedit_zero_kelvin_rejection
Definition: tesseractclass.h:1024
tesseract::Tesseract::suspect_level
int suspect_level
Definition: tesseractclass.h:1014
UNICHARSET
Definition: unicharset.h:145
WERD_RES::tess_accepted
bool tess_accepted
Definition: pageres.h:297
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
WERD_RES::reject_map
REJMAP reject_map
Definition: pageres.h:288
helpers.h
tesseract
Definition: baseapi.h:65
WERD_CHOICE::debug_string
const STRING debug_string() const
Definition: ratngs.h:493
PAGE_RES_IT::word
WERD_RES * word() const
Definition: pageres.h:748
R_1IL_CONFLICT
Definition: rejctmap.h:90
UNICHAR_ID
int UNICHAR_ID
Definition: unichar.h:36
PAGE_RES_IT
Definition: pageres.h:668
reject.h
tesseract::Tesseract::suspect_accept_rating
double suspect_accept_rating
Definition: tesseractclass.h:1018
CTRL_HARDLINE
#define CTRL_HARDLINE
Definition: output.cpp:32
AC_UNACCEPTABLE
Unacceptable word.
Definition: control.h:29
WERD_CHOICE::length
int length() const
Definition: ratngs.h:291
output.h
count
int count(LIST var_list)
Definition: oldlist.cpp:79
W_FUZZY_SP
fuzzy space
Definition: werd.h:53
WERD
Definition: werd.h:55
PAGE_RES_IT::next_word
WERD_RES * next_word() const
Definition: pageres.h:757
BLOCK_RES::block
BLOCK * block
Definition: pageres.h:113
TBOX::left
int16_t left() const
Definition: rect.h:71
R_ROW_REJ
Definition: rejctmap.h:115
CTRL_NEWLINE
#define CTRL_NEWLINE
Definition: output.cpp:31
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::Tesseract::suspect_short_words
int suspect_short_words
Definition: tesseractclass.h:1015
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
R_MM_REJECT
Definition: rejctmap.h:93
tesseract::Tesseract::tessedit_word_for_word
bool tessedit_word_for_word
Definition: tesseractclass.h:1022
WERD_RES::word
WERD * word
Definition: pageres.h:180
WERD_CHOICE::rating
float rating() const
Definition: ratngs.h:315
tesseract::Tesseract::get_rep_char
UNICHAR_ID get_rep_char(WERD_RES *word)
Definition: output.cpp:251
R_BLOCK_REJ
Definition: rejctmap.h:114
WERD_RES::MergeAdjacentBlobs
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:969
tesseract::Tesseract::unlv_tilde_crunching
bool unlv_tilde_crunching
Definition: tesseractclass.h:930
R_POSTNN_1IL
Definition: rejctmap.h:91
CR_KEEP_SPACE
Definition: pageres.h:154
tesseract::Tesseract::tessedit_minimal_rejection
bool tessedit_minimal_rejection
Definition: tesseractclass.h:1019
tesseract::Tesseract::tessedit_write_block_separators
bool tessedit_write_block_separators
Definition: tesseractclass.h:995
WERD_CHOICE::unichar_lengths
const STRING & unichar_lengths() const
Definition: ratngs.h:536
tesseract::Tesseract::write_results
void write_results(PAGE_RES_IT &page_res_it, char newline_type, bool force_eol)
Definition: output.cpp:96
determine_newline_type
char determine_newline_type(WERD *word, BLOCK *block, WERD *next_word, BLOCK *next_block)
Definition: output.cpp:211
W_BOL
start of line
Definition: werd.h:46
tesseract::Tesseract::check_debug_pt
bool check_debug_pt(WERD_RES *word, int location)
Definition: control.cpp:1848
TBOX
Definition: rect.h:33
docqual.h