tesseract  5.0.0-alpha-619-ge9db
wordseg.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: wordseg.cpp (Formerly wspace.c)
3  * Description: Code to segment the blobs into words.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1992, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "blobbox.h"
20 #include "statistc.h"
21 #include "drawtord.h"
22 #include "makerow.h"
23 #include "pitsync1.h"
24 #include "tovars.h"
25 #include "topitch.h"
26 #include "cjkpitch.h"
27 #include "textord.h"
28 #include "fpchop.h"
29 #include "wordseg.h"
30 
31 // Include automatically generated configuration file if running autoconf.
32 #ifdef HAVE_CONFIG_H
33 #include "config_auto.h"
34 #endif
35 
36 BOOL_VAR(textord_fp_chopping, true, "Do fixed pitch chopping");
38  "Force proportional word segmentation on all rows");
40  "Chopper is being tested.");
41 
42 #define BLOCK_STATS_CLUSTERS 10
43 
44 
52 void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
53  TO_ROW_IT to_row_it(rows);
54  ROW_IT row_it(real_rows);
55  for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
56  to_row_it.forward()) {
57  TO_ROW* row = to_row_it.data();
58  // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
59  // to create the word.
60  C_BLOB_LIST cblobs;
61  C_BLOB_IT cblob_it(&cblobs);
62  BLOBNBOX_IT box_it(row->blob_list());
63  for (;!box_it.empty(); box_it.forward()) {
64  BLOBNBOX* bblob= box_it.extract();
65  if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
66  if (bblob->cblob() != nullptr) {
67  C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
68  cout_it.move_to_last();
69  cout_it.add_list_after(bblob->cblob()->out_list());
70  delete bblob->cblob();
71  }
72  } else {
73  if (bblob->cblob() != nullptr)
74  cblob_it.add_after_then_move(bblob->cblob());
75  }
76  delete bblob;
77  }
78  // Convert the TO_ROW to a ROW.
79  ROW* real_row = new ROW(row, static_cast<int16_t>(row->kern_size),
80  static_cast<int16_t>(row->space_size));
81  WERD_IT word_it(real_row->word_list());
82  WERD* word = new WERD(&cblobs, 0, nullptr);
83  word->set_flag(W_BOL, true);
84  word->set_flag(W_EOL, true);
85  word->set_flag(W_DONT_CHOP, one_blob);
86  word_it.add_after_then_move(word);
87  row_it.add_after_then_move(real_row);
88  }
89 }
90 
96 void make_words(tesseract::Textord *textord,
97  ICOORD page_tr, // top right
98  float gradient, // page skew
99  BLOCK_LIST *blocks, // block list
100  TO_BLOCK_LIST *port_blocks) { // output list
101  TO_BLOCK_IT block_it; // iterator
102  TO_BLOCK *block; // current block
103 
104  if (textord->use_cjk_fp_model()) {
105  compute_fixed_pitch_cjk(page_tr, port_blocks);
106  } else {
107  compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
108  !bool(textord_test_landscape));
109  }
110  textord->to_spacing(page_tr, port_blocks);
111  block_it.set_to_list(port_blocks);
112  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
113  block = block_it.data();
114  make_real_words(textord, block, FCOORD(1.0f, 0.0f));
115  }
116 }
117 
118 
126 void set_row_spaces( //find space sizes
127  TO_BLOCK* block, //block to do
128  FCOORD rotation, //for drawing
129  bool testing_on //correct orientation
130 ) {
131  TO_ROW *row; //current row
132  TO_ROW_IT row_it = block->get_rows ();
133 
134  if (row_it.empty ())
135  return; //empty block
136  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
137  row = row_it.data ();
138  if (row->fixed_pitch == 0) {
139  row->min_space =
140  static_cast<int32_t>(ceil (row->pr_space -
141  (row->pr_space -
143  row->max_nonspace =
144  static_cast<int32_t>(floor (row->pr_nonsp +
145  (row->pr_space -
147  if (testing_on && textord_show_initial_words) {
148  tprintf ("Assigning defaults %d non, %d space to row at %g\n",
149  row->max_nonspace, row->min_space, row->intercept ());
150  }
151  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
152  row->space_size = row->pr_space;
153  row->kern_size = row->pr_nonsp;
154  }
155 #ifndef GRAPHICS_DISABLED
156  if (textord_show_initial_words && testing_on) {
157  plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
158  }
159 #endif
160  }
161 }
162 
163 
170 int32_t row_words( //compute space size
171  TO_BLOCK* block, //block it came from
172  TO_ROW* row, //row to operate on
173  int32_t maxwidth, //max expected space size
174  FCOORD rotation, //for drawing
175  bool testing_on //for debug
176 ) {
177  bool testing_row; //contains testpt
178  bool prev_valid; //if decent size
179  int32_t prev_x; //end of prev blob
180  int32_t cluster_count; //no of clusters
181  int32_t gap_index; //which cluster
182  int32_t smooth_factor; //for smoothing stats
183  BLOBNBOX *blob; //current blob
184  float lower, upper; //clustering parameters
185  float gaps[3]; //gap clusers
186  ICOORD testpt;
187  TBOX blob_box; //bounding box
188  //iterator
189  BLOBNBOX_IT blob_it = row->blob_list ();
190  STATS gap_stats (0, maxwidth);
191  STATS cluster_stats[4]; //clusters
192 
194  smooth_factor =
195  static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
196  // if (testing_on)
197  // tprintf("Row smooth factor=%d\n",smooth_factor);
198  prev_valid = false;
199  prev_x = -INT32_MAX;
200  testing_row = false;
201  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
202  blob = blob_it.data ();
203  blob_box = blob->bounding_box ();
204  if (blob_box.contains (testpt))
205  testing_row = true;
206  gap_stats.add (blob_box.width (), 1);
207  }
208  gap_stats.clear ();
209  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
210  blob = blob_it.data ();
211  if (!blob->joined_to_prev ()) {
212  blob_box = blob->bounding_box ();
213  if (prev_valid && blob_box.left () - prev_x < maxwidth) {
214  gap_stats.add (blob_box.left () - prev_x, 1);
215  }
216  prev_valid = true;
217  prev_x = blob_box.right ();
218  }
219  }
220  if (gap_stats.get_total () == 0) {
221  row->min_space = 0; //no evidence
222  row->max_nonspace = 0;
223  return 0;
224  }
225  gap_stats.smooth (smooth_factor);
226  lower = row->xheight * textord_words_initial_lower;
227  upper = row->xheight * textord_words_initial_upper;
228  cluster_count = gap_stats.cluster (lower, upper,
230  cluster_stats);
231  while (cluster_count < 2 && ceil (lower) < floor (upper)) {
232  //shrink gap
233  upper = (upper * 3 + lower) / 4;
234  lower = (lower * 3 + upper) / 4;
235  cluster_count = gap_stats.cluster (lower, upper,
237  cluster_stats);
238  }
239  if (cluster_count < 2) {
240  row->min_space = 0; //no evidence
241  row->max_nonspace = 0;
242  return 0;
243  }
244  for (gap_index = 0; gap_index < cluster_count; gap_index++)
245  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
246  //get medians
247  if (cluster_count > 2) {
248  if (testing_on && textord_show_initial_words) {
249  tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
250  row->intercept (),
251  cluster_stats[1].ile (0.5),
252  cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
253  }
254  lower = gaps[0];
255  if (gaps[1] > lower) {
256  upper = gaps[1]; //prefer most frequent
257  if (upper < block->xheight * textord_words_min_minspace
258  && gaps[2] > gaps[1]) {
259  upper = gaps[2];
260  }
261  }
262  else if (gaps[2] > lower
263  && gaps[2] >= block->xheight * textord_words_min_minspace)
264  upper = gaps[2];
265  else if (lower >= block->xheight * textord_words_min_minspace) {
266  upper = lower; //not nice
267  lower = gaps[1];
268  if (testing_on && textord_show_initial_words) {
269  tprintf ("Had to switch most common from lower to upper!!\n");
270  gap_stats.print();
271  }
272  }
273  else {
274  row->min_space = 0; //no evidence
275  row->max_nonspace = 0;
276  return 0;
277  }
278  }
279  else {
280  if (gaps[1] < gaps[0]) {
281  if (testing_on && textord_show_initial_words) {
282  tprintf ("Had to switch most common from lower to upper!!\n");
283  gap_stats.print();
284  }
285  lower = gaps[1];
286  upper = gaps[0];
287  }
288  else {
289  upper = gaps[1];
290  lower = gaps[0];
291  }
292  }
293  if (upper < block->xheight * textord_words_min_minspace) {
294  row->min_space = 0; //no evidence
295  row->max_nonspace = 0;
296  return 0;
297  }
298  if (upper * 3 < block->min_space * 2 + block->max_nonspace
299  || lower * 3 > block->min_space * 2 + block->max_nonspace) {
300  if (testing_on && textord_show_initial_words) {
301  tprintf ("Disagreement between block and row at %g!!\n",
302  row->intercept ());
303  tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
304  gap_stats.print();
305  }
306  }
307  row->min_space =
308  static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
309  row->max_nonspace =
310  static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
311  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
312  row->space_size = upper;
313  row->kern_size = lower;
314  if (testing_on && textord_show_initial_words) {
315  if (testing_row) {
316  tprintf ("GAP STATS\n");
317  gap_stats.print();
318  tprintf ("SPACE stats\n");
319  cluster_stats[2].print_summary();
320  tprintf ("NONSPACE stats\n");
321  cluster_stats[1].print_summary();
322  }
323  tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
324  row->intercept (), row->min_space, upper,
325  row->max_nonspace, lower);
326  }
327  return cluster_stats[2].get_total ();
328 }
329 
330 
337 int32_t row_words2( //compute space size
338  TO_BLOCK* block, //block it came from
339  TO_ROW* row, //row to operate on
340  int32_t maxwidth, //max expected space size
341  FCOORD rotation, //for drawing
342  bool testing_on //for debug
343 ) {
344  bool prev_valid; //if decent size
345  bool this_valid; //current blob big enough
346  int32_t prev_x; //end of prev blob
347  int32_t min_width; //min interesting width
348  int32_t valid_count; //good gaps
349  int32_t total_count; //total gaps
350  int32_t cluster_count; //no of clusters
351  int32_t prev_count; //previous cluster_count
352  int32_t gap_index; //which cluster
353  int32_t smooth_factor; //for smoothing stats
354  BLOBNBOX *blob; //current blob
355  float lower, upper; //clustering parameters
356  ICOORD testpt;
357  TBOX blob_box; //bounding box
358  //iterator
359  BLOBNBOX_IT blob_it = row->blob_list ();
360  STATS gap_stats (0, maxwidth);
361  //gap sizes
362  float gaps[BLOCK_STATS_CLUSTERS];
363  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
364  //clusters
365 
367  smooth_factor =
368  static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
369  // if (testing_on)
370  // tprintf("Row smooth factor=%d\n",smooth_factor);
371  prev_valid = false;
372  prev_x = -INT16_MAX;
373  const bool testing_row = false;
374  //min blob size
375  min_width = static_cast<int32_t>(block->pr_space);
376  total_count = 0;
377  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
378  blob = blob_it.data ();
379  if (!blob->joined_to_prev ()) {
380  blob_box = blob->bounding_box ();
381  this_valid = blob_box.width () >= min_width;
382  if (this_valid && prev_valid
383  && blob_box.left () - prev_x < maxwidth) {
384  gap_stats.add (blob_box.left () - prev_x, 1);
385  }
386  total_count++; //count possibles
387  prev_x = blob_box.right ();
388  prev_valid = this_valid;
389  }
390  }
391  valid_count = gap_stats.get_total ();
392  if (valid_count < total_count * textord_words_minlarge) {
393  gap_stats.clear ();
394  prev_x = -INT16_MAX;
395  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
396  blob_it.forward ()) {
397  blob = blob_it.data ();
398  if (!blob->joined_to_prev ()) {
399  blob_box = blob->bounding_box ();
400  if (blob_box.left () - prev_x < maxwidth) {
401  gap_stats.add (blob_box.left () - prev_x, 1);
402  }
403  prev_x = blob_box.right ();
404  }
405  }
406  }
407  if (gap_stats.get_total () == 0) {
408  row->min_space = 0; //no evidence
409  row->max_nonspace = 0;
410  return 0;
411  }
412 
413  cluster_count = 0;
414  lower = block->xheight * words_initial_lower;
415  upper = block->xheight * words_initial_upper;
416  gap_stats.smooth (smooth_factor);
417  do {
418  prev_count = cluster_count;
419  cluster_count = gap_stats.cluster (lower, upper,
421  BLOCK_STATS_CLUSTERS, cluster_stats);
422  }
423  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
424  if (cluster_count < 1) {
425  row->min_space = 0;
426  row->max_nonspace = 0;
427  return 0;
428  }
429  for (gap_index = 0; gap_index < cluster_count; gap_index++)
430  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
431  //get medians
432  if (testing_on) {
433  tprintf ("cluster_count=%d:", cluster_count);
434  for (gap_index = 0; gap_index < cluster_count; gap_index++)
435  tprintf (" %g(%d)", gaps[gap_index],
436  cluster_stats[gap_index + 1].get_total ());
437  tprintf ("\n");
438  }
439 
440  //Try to find proportional non-space and space for row.
441  for (gap_index = 0; gap_index < cluster_count
442  && gaps[gap_index] > block->max_nonspace; gap_index++);
443  if (gap_index < cluster_count)
444  lower = gaps[gap_index]; //most frequent below
445  else {
446  if (testing_on)
447  tprintf ("No cluster below block threshold!, using default=%g\n",
448  block->pr_nonsp);
449  lower = block->pr_nonsp;
450  }
451  for (gap_index = 0; gap_index < cluster_count
452  && gaps[gap_index] <= block->max_nonspace; gap_index++);
453  if (gap_index < cluster_count)
454  upper = gaps[gap_index]; //most frequent above
455  else {
456  if (testing_on)
457  tprintf ("No cluster above block threshold!, using default=%g\n",
458  block->pr_space);
459  upper = block->pr_space;
460  }
461  row->min_space =
462  static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
463  row->max_nonspace =
464  static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
465  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
466  row->space_size = upper;
467  row->kern_size = lower;
468  if (testing_on) {
469  if (testing_row) {
470  tprintf ("GAP STATS\n");
471  gap_stats.print();
472  tprintf ("SPACE stats\n");
473  cluster_stats[2].print_summary();
474  tprintf ("NONSPACE stats\n");
475  cluster_stats[1].print_summary();
476  }
477  tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
478  row->intercept (), row->min_space, upper,
479  row->max_nonspace, lower);
480  }
481  return 1;
482 }
483 
484 
491 void make_real_words(
492  tesseract::Textord *textord,
493  TO_BLOCK *block, //block to do
494  FCOORD rotation //for drawing
495  ) {
496  TO_ROW *row; //current row
497  TO_ROW_IT row_it = block->get_rows ();
498  ROW *real_row = nullptr; //output row
499  ROW_IT real_row_it = block->block->row_list ();
500 
501  if (row_it.empty ())
502  return; //empty block
503  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
504  row = row_it.data ();
505  if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
506  real_row = make_rep_words (row, block);
507  } else if (!row->blob_list()->empty()) {
508  // In a fixed pitch document, some lines may be detected as fixed pitch
509  // while others don't, and will go through different path.
510  // For non-space delimited language like CJK, fixed pitch chop always
511  // leave the entire line as one word. We can force consistent chopping
512  // with force_make_prop_words flag.
513  POLY_BLOCK* pb = block->block->pdblk.poly_block();
514  if (textord_chopper_test) {
515  real_row = textord->make_blob_words (row, rotation);
516  } else if (textord_force_make_prop_words ||
517  (pb != nullptr && !pb->IsText()) ||
518  row->pitch_decision == PITCH_DEF_PROP ||
520  real_row = textord->make_prop_words (row, rotation);
521  } else if (row->pitch_decision == PITCH_DEF_FIXED ||
523  real_row = fixed_pitch_words (row, rotation);
524  } else {
525  ASSERT_HOST(false);
526  }
527  }
528  if (real_row != nullptr) {
529  //put row in block
530  real_row_it.add_after_then_move (real_row);
531  }
532  }
533  block->block->set_stats (block->fixed_pitch == 0, static_cast<int16_t>(block->kern_size),
534  static_cast<int16_t>(block->space_size),
535  static_cast<int16_t>(block->fixed_pitch));
536  block->block->check_pitch ();
537 }
538 
539 
547 ROW *make_rep_words( //make a row
548  TO_ROW *row, //row to convert
549  TO_BLOCK *block //block it lives in
550  ) {
551  ROW *real_row; //output row
552  TBOX word_box; //bounding box
553  //iterator
554  WERD_IT word_it = &row->rep_words;
555 
556  if (word_it.empty ())
557  return nullptr;
558  word_box = word_it.data ()->bounding_box ();
559  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
560  word_box += word_it.data ()->bounding_box ();
561  row->xheight = block->xheight;
562  real_row = new ROW(row,
563  static_cast<int16_t>(block->kern_size), static_cast<int16_t>(block->space_size));
564  word_it.set_to_list (real_row->word_list ());
565  //put words in row
566  word_it.add_list_after (&row->rep_words);
567  real_row->recalc_bounding_box ();
568  return real_row;
569 }
570 
571 
579 WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator
580  int32_t blobcount, //no of blobs to use
581  bool bol, //start of line
582  uint8_t blanks //no of blanks
583  ) {
584  C_OUTLINE_IT cout_it;
585  C_BLOB_LIST cblobs;
586  C_BLOB_IT cblob_it = &cblobs;
587  WERD *word; // new word
588  BLOBNBOX *bblob; // current blob
589  int32_t blobindex; // in row
590 
591  for (blobindex = 0; blobindex < blobcount; blobindex++) {
592  bblob = box_it->extract();
593  if (bblob->joined_to_prev()) {
594  if (bblob->cblob() != nullptr) {
595  cout_it.set_to_list(cblob_it.data()->out_list());
596  cout_it.move_to_last();
597  cout_it.add_list_after(bblob->cblob()->out_list());
598  delete bblob->cblob();
599  }
600  }
601  else {
602  if (bblob->cblob() != nullptr)
603  cblob_it.add_after_then_move(bblob->cblob());
604  }
605  delete bblob;
606  box_it->forward(); // next one
607  }
608 
609  if (blanks < 1)
610  blanks = 1;
611 
612  word = new WERD(&cblobs, blanks, nullptr);
613 
614  if (bol)
615  word->set_flag(W_BOL, true);
616  if (box_it->at_first())
617  word->set_flag(W_EOL, true); // at end of line
618 
619  return word;
620 }
TO_ROW::min_space
int32_t min_space
Definition: blobbox.h:662
STATS::get_total
int32_t get_total() const
Definition: statistc.h:83
TO_BLOCK::max_nonspace
int32_t max_nonspace
Definition: blobbox.h:792
make_single_word
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:51
TO_ROW::rep_words
WERD_LIST rep_words
Definition: blobbox.h:667
TO_ROW::space_size
float space_size
Definition: blobbox.h:666
BLOCK::check_pitch
void check_pitch()
check proportional
Definition: ocrblock.cpp:163
C_BLOB::out_list
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:69
TO_ROW::pr_nonsp
float pr_nonsp
Definition: blobbox.h:654
TO_BLOCK::kern_size
float kern_size
Definition: blobbox.h:789
POLY_BLOCK::IsText
bool IsText() const
Definition: polyblk.h:62
tesseract::Textord::to_spacing
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
Definition: tospace.cpp:43
TO_ROW::pr_space
float pr_space
Definition: blobbox.h:653
W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:51
STATS::print_summary
void print_summary() const
Definition: statistc.cpp:534
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
plot_word_decisions
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:239
BLOCK::row_list
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:115
PITCH_DEF_PROP
Definition: blobbox.h:48
blobbox.h
textord_words_initial_upper
double textord_words_initial_upper
Definition: tovars.cpp:52
ICOORD
integer coordinate
Definition: points.h:30
textord_test_landscape
bool textord_test_landscape
Definition: makerow.cpp:48
ROW::recalc_bounding_box
void recalc_bounding_box()
Definition: ocrrow.cpp:96
TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:330
TO_BLOCK
Definition: blobbox.h:691
PITCH_CORR_PROP
Definition: blobbox.h:51
row_words2
int32_t row_words2(TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
Definition: wordseg.cpp:336
PITCH_DEF_FIXED
Definition: blobbox.h:46
TO_ROW::pitch_decision
PITCH_TYPE pitch_decision
Definition: blobbox.h:649
textord_words_minlarge
double textord_words_minlarge
Definition: tovars.cpp:54
FCOORD
Definition: points.h:187
BLOBNBOX
Definition: blobbox.h:142
tesseract::Textord::make_blob_words
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:1177
BLOCK_STATS_CLUSTERS
#define BLOCK_STATS_CLUSTERS
Definition: wordseg.cpp:41
textord_show_initial_words
bool textord_show_initial_words
Definition: tovars.cpp:22
textord_wordstats_smooth_factor
double textord_wordstats_smooth_factor
Definition: tovars.cpp:36
statistc.h
tovars.h
topitch.h
set_row_spaces
void set_row_spaces(TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: wordseg.cpp:125
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
makerow.h
make_words
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:95
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
PITCH_CORR_FIXED
Definition: blobbox.h:50
cjkpitch.h
TO_BLOCK::block
BLOCK * block
Definition: blobbox.h:776
textord_words_initial_lower
double textord_words_initial_lower
Definition: tovars.cpp:50
WERD::set_flag
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:117
TO_BLOCK::fixed_pitch
float fixed_pitch
Definition: blobbox.h:788
words_initial_upper
double words_initial_upper
Definition: tovars.cpp:68
TO_BLOCK::xheight
float xheight
Definition: blobbox.h:787
W_EOL
end of line
Definition: werd.h:47
BLOBNBOX::joined_to_prev
bool joined_to_prev() const
Definition: blobbox.h:255
textord_spacesize_ratioprop
double textord_spacesize_ratioprop
Definition: tovars.cpp:77
textord_test_y
int textord_test_y
Definition: makerow.cpp:61
TBOX::width
int16_t width() const
Definition: rect.h:114
BOOL_VAR
#define BOOL_VAR(name, val, comment)
Definition: params.h:303
textord.h
row_words
int32_t row_words(TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
Definition: wordseg.cpp:169
tesseract::Textord::use_cjk_fp_model
bool use_cjk_fp_model() const
Definition: textord.h:92
TO_ROW::fixed_pitch
float fixed_pitch
Definition: blobbox.h:650
TO_ROW::xheight
float xheight
Definition: blobbox.h:656
make_real_word
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
Definition: wordseg.cpp:578
compute_fixed_pitch_cjk
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1040
textord_chopper_test
bool textord_chopper_test
Definition: wordseg.cpp:39
TO_BLOCK::space_size
float space_size
Definition: blobbox.h:790
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
TO_BLOCK::pr_space
float pr_space
Definition: blobbox.h:795
drawtord.h
textord_words_definite_spread
double textord_words_definite_spread
Definition: tovars.cpp:73
tesseract::Textord::make_prop_words
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:885
TO_BLOCK::pr_nonsp
float pr_nonsp
Definition: blobbox.h:796
make_rep_words
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
Definition: wordseg.cpp:546
TO_BLOCK::min_space
int32_t min_space
Definition: blobbox.h:791
TO_BLOCK::get_rows
TO_ROW_LIST * get_rows()
Definition: blobbox.h:703
STATS::ile
double ile(double frac) const
Definition: statistc.cpp:156
TO_ROW::space_threshold
int32_t space_threshold
Definition: blobbox.h:664
fixed_pitch_words
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
Definition: fpchop.cpp:42
words_initial_lower
double words_initial_lower
Definition: tovars.cpp:67
TO_ROW::max_nonspace
int32_t max_nonspace
Definition: blobbox.h:663
TO_ROW::intercept
float intercept() const
Definition: blobbox.h:588
wordseg.h
WERD
Definition: werd.h:55
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::Textord
Definition: textord.h:68
ROW
Definition: ocrrow.h:35
textord_words_min_minspace
double textord_words_min_minspace
Definition: tovars.cpp:46
TBOX::right
int16_t right() const
Definition: rect.h:78
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
POLY_BLOCK
Definition: polyblk.h:26
TO_ROW
Definition: blobbox.h:543
compute_fixed_pitch
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
Definition: topitch.cpp:78
TO_ROW::kern_size
float kern_size
Definition: blobbox.h:665
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
textord_test_x
int textord_test_x
Definition: makerow.cpp:60
ROW::word_list
WERD_LIST * word_list()
Definition: ocrrow.h:54
pitsync1.h
to_win
ScrollView * to_win
Definition: drawtord.cpp:34
textord_force_make_prop_words
bool textord_force_make_prop_words
Definition: wordseg.cpp:37
fpchop.h
W_BOL
start of line
Definition: werd.h:46
BLOCK::set_stats
void set_stats(bool prop, int16_t kern, int16_t space, int16_t ch_pitch)
Definition: ocrblock.h:57
textord_fp_chopping
bool textord_fp_chopping
Definition: wordseg.cpp:35
TO_ROW::blob_list
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:599
TBOX
Definition: rect.h:33
make_real_words
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
Definition: wordseg.cpp:490