tesseract  4.0.0-1-g2a2b
wordseg.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: wordseg.cpp (Formerly wspace.c)
3  * Description: Code to segment the blobs into words.
4  * Author: Ray Smith
5  * Created: Fri Oct 16 11:32:28 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "blobbox.h"
21 #include "statistc.h"
22 #include "drawtord.h"
23 #include "makerow.h"
24 #include "pitsync1.h"
25 #include "tovars.h"
26 #include "topitch.h"
27 #include "cjkpitch.h"
28 #include "textord.h"
29 #include "fpchop.h"
30 #include "wordseg.h"
31 
32 // Include automatically generated configuration file if running autoconf.
33 #ifdef HAVE_CONFIG_H
34 #include "config_auto.h"
35 #endif
36 
37 #define EXTERN
38 
39 EXTERN BOOL_VAR(textord_fp_chopping, TRUE, "Do fixed pitch chopping");
41  "Force proportional word segmentation on all rows");
43  "Chopper is being tested.");
44 
45 #define FIXED_WIDTH_MULTIPLE 5
46 #define BLOCK_STATS_CLUSTERS 10
47 
48 
56 void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
57  TO_ROW_IT to_row_it(rows);
58  ROW_IT row_it(real_rows);
59  for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
60  to_row_it.forward()) {
61  TO_ROW* row = to_row_it.data();
62  // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
63  // to create the word.
64  C_BLOB_LIST cblobs;
65  C_BLOB_IT cblob_it(&cblobs);
66  BLOBNBOX_IT box_it(row->blob_list());
67  for (;!box_it.empty(); box_it.forward()) {
68  BLOBNBOX* bblob= box_it.extract();
69  if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
70  if (bblob->cblob() != nullptr) {
71  C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
72  cout_it.move_to_last();
73  cout_it.add_list_after(bblob->cblob()->out_list());
74  delete bblob->cblob();
75  }
76  } else {
77  if (bblob->cblob() != nullptr)
78  cblob_it.add_after_then_move(bblob->cblob());
79  }
80  delete bblob;
81  }
82  // Convert the TO_ROW to a ROW.
83  ROW* real_row = new ROW(row, static_cast<int16_t>(row->kern_size),
84  static_cast<int16_t>(row->space_size));
85  WERD_IT word_it(real_row->word_list());
86  WERD* word = new WERD(&cblobs, 0, nullptr);
87  word->set_flag(W_BOL, TRUE);
88  word->set_flag(W_EOL, TRUE);
89  word->set_flag(W_DONT_CHOP, one_blob);
90  word_it.add_after_then_move(word);
91  row_it.add_after_then_move(real_row);
92  }
93 }
94 
101  ICOORD page_tr, // top right
102  float gradient, // page skew
103  BLOCK_LIST *blocks, // block list
104  TO_BLOCK_LIST *port_blocks) { // output list
105  TO_BLOCK_IT block_it; // iterator
106  TO_BLOCK *block; // current block
107 
108  if (textord->use_cjk_fp_model()) {
109  compute_fixed_pitch_cjk(page_tr, port_blocks);
110  } else {
111  compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
113  }
114  textord->to_spacing(page_tr, port_blocks);
115  block_it.set_to_list(port_blocks);
116  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
117  block = block_it.data();
118  make_real_words(textord, block, FCOORD(1.0f, 0.0f));
119  }
120 }
121 
122 
130 void set_row_spaces( //find space sizes
131  TO_BLOCK* block, //block to do
132  FCOORD rotation, //for drawing
133  bool testing_on //correct orientation
134 ) {
135  TO_ROW *row; //current row
136  TO_ROW_IT row_it = block->get_rows ();
137 
138  if (row_it.empty ())
139  return; //empty block
140  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
141  row = row_it.data ();
142  if (row->fixed_pitch == 0) {
143  row->min_space =
144  (int32_t) ceil (row->pr_space -
145  (row->pr_space -
147  row->max_nonspace =
148  (int32_t) floor (row->pr_nonsp +
149  (row->pr_space -
151  if (testing_on && textord_show_initial_words) {
152  tprintf ("Assigning defaults %d non, %d space to row at %g\n",
153  row->max_nonspace, row->min_space, row->intercept ());
154  }
155  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
156  row->space_size = row->pr_space;
157  row->kern_size = row->pr_nonsp;
158  }
159 #ifndef GRAPHICS_DISABLED
160  if (textord_show_initial_words && testing_on) {
161  plot_word_decisions (to_win, (int16_t) row->fixed_pitch, row);
162  }
163 #endif
164  }
165 }
166 
167 
174 int32_t row_words( //compute space size
175  TO_BLOCK* block, //block it came from
176  TO_ROW* row, //row to operate on
177  int32_t maxwidth, //max expected space size
178  FCOORD rotation, //for drawing
179  bool testing_on //for debug
180 ) {
181  bool testing_row; //contains testpt
182  bool prev_valid; //if decent size
183  int32_t prev_x; //end of prev blob
184  int32_t cluster_count; //no of clusters
185  int32_t gap_index; //which cluster
186  int32_t smooth_factor; //for smoothing stats
187  BLOBNBOX *blob; //current blob
188  float lower, upper; //clustering parameters
189  float gaps[3]; //gap clusers
190  ICOORD testpt;
191  TBOX blob_box; //bounding box
192  //iterator
193  BLOBNBOX_IT blob_it = row->blob_list ();
194  STATS gap_stats (0, maxwidth);
195  STATS cluster_stats[4]; //clusters
196 
198  smooth_factor =
199  (int32_t) (block->xheight * textord_wordstats_smooth_factor + 1.5);
200  // if (testing_on)
201  // tprintf("Row smooth factor=%d\n",smooth_factor);
202  prev_valid = false;
203  prev_x = -INT32_MAX;
204  testing_row = false;
205  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
206  blob = blob_it.data ();
207  blob_box = blob->bounding_box ();
208  if (blob_box.contains (testpt))
209  testing_row = true;
210  gap_stats.add (blob_box.width (), 1);
211  }
212  gap_stats.clear ();
213  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
214  blob = blob_it.data ();
215  if (!blob->joined_to_prev ()) {
216  blob_box = blob->bounding_box ();
217  if (prev_valid && blob_box.left () - prev_x < maxwidth) {
218  gap_stats.add (blob_box.left () - prev_x, 1);
219  }
220  prev_valid = TRUE;
221  prev_x = blob_box.right ();
222  }
223  }
224  if (gap_stats.get_total () == 0) {
225  row->min_space = 0; //no evidence
226  row->max_nonspace = 0;
227  return 0;
228  }
229  gap_stats.smooth (smooth_factor);
230  lower = row->xheight * textord_words_initial_lower;
231  upper = row->xheight * textord_words_initial_upper;
232  cluster_count = gap_stats.cluster (lower, upper,
234  cluster_stats);
235  while (cluster_count < 2 && ceil (lower) < floor (upper)) {
236  //shrink gap
237  upper = (upper * 3 + lower) / 4;
238  lower = (lower * 3 + upper) / 4;
239  cluster_count = gap_stats.cluster (lower, upper,
241  cluster_stats);
242  }
243  if (cluster_count < 2) {
244  row->min_space = 0; //no evidence
245  row->max_nonspace = 0;
246  return 0;
247  }
248  for (gap_index = 0; gap_index < cluster_count; gap_index++)
249  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
250  //get medians
251  if (cluster_count > 2) {
252  if (testing_on && textord_show_initial_words) {
253  tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
254  row->intercept (),
255  cluster_stats[1].ile (0.5),
256  cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
257  }
258  lower = gaps[0];
259  if (gaps[1] > lower) {
260  upper = gaps[1]; //prefer most frequent
261  if (upper < block->xheight * textord_words_min_minspace
262  && gaps[2] > gaps[1]) {
263  upper = gaps[2];
264  }
265  }
266  else if (gaps[2] > lower
267  && gaps[2] >= block->xheight * textord_words_min_minspace)
268  upper = gaps[2];
269  else if (lower >= block->xheight * textord_words_min_minspace) {
270  upper = lower; //not nice
271  lower = gaps[1];
272  if (testing_on && textord_show_initial_words) {
273  tprintf ("Had to switch most common from lower to upper!!\n");
274  gap_stats.print();
275  }
276  }
277  else {
278  row->min_space = 0; //no evidence
279  row->max_nonspace = 0;
280  return 0;
281  }
282  }
283  else {
284  if (gaps[1] < gaps[0]) {
285  if (testing_on && textord_show_initial_words) {
286  tprintf ("Had to switch most common from lower to upper!!\n");
287  gap_stats.print();
288  }
289  lower = gaps[1];
290  upper = gaps[0];
291  }
292  else {
293  upper = gaps[1];
294  lower = gaps[0];
295  }
296  }
297  if (upper < block->xheight * textord_words_min_minspace) {
298  row->min_space = 0; //no evidence
299  row->max_nonspace = 0;
300  return 0;
301  }
302  if (upper * 3 < block->min_space * 2 + block->max_nonspace
303  || lower * 3 > block->min_space * 2 + block->max_nonspace) {
304  if (testing_on && textord_show_initial_words) {
305  tprintf ("Disagreement between block and row at %g!!\n",
306  row->intercept ());
307  tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
308  gap_stats.print();
309  }
310  }
311  row->min_space =
312  (int32_t) ceil (upper - (upper - lower) * textord_words_definite_spread);
313  row->max_nonspace =
314  (int32_t) floor (lower + (upper - lower) * textord_words_definite_spread);
315  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
316  row->space_size = upper;
317  row->kern_size = lower;
318  if (testing_on && textord_show_initial_words) {
319  if (testing_row) {
320  tprintf ("GAP STATS\n");
321  gap_stats.print();
322  tprintf ("SPACE stats\n");
323  cluster_stats[2].print_summary();
324  tprintf ("NONSPACE stats\n");
325  cluster_stats[1].print_summary();
326  }
327  tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
328  row->intercept (), row->min_space, upper,
329  row->max_nonspace, lower);
330  }
331  return cluster_stats[2].get_total ();
332 }
333 
334 
341 int32_t row_words2( //compute space size
342  TO_BLOCK* block, //block it came from
343  TO_ROW* row, //row to operate on
344  int32_t maxwidth, //max expected space size
345  FCOORD rotation, //for drawing
346  bool testing_on //for debug
347 ) {
348  bool prev_valid; //if decent size
349  bool this_valid; //current blob big enough
350  int32_t prev_x; //end of prev blob
351  int32_t min_width; //min interesting width
352  int32_t valid_count; //good gaps
353  int32_t total_count; //total gaps
354  int32_t cluster_count; //no of clusters
355  int32_t prev_count; //previous cluster_count
356  int32_t gap_index; //which cluster
357  int32_t smooth_factor; //for smoothing stats
358  BLOBNBOX *blob; //current blob
359  float lower, upper; //clustering parameters
360  ICOORD testpt;
361  TBOX blob_box; //bounding box
362  //iterator
363  BLOBNBOX_IT blob_it = row->blob_list ();
364  STATS gap_stats (0, maxwidth);
365  //gap sizes
366  float gaps[BLOCK_STATS_CLUSTERS];
367  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
368  //clusters
369 
371  smooth_factor =
372  (int32_t) (block->xheight * textord_wordstats_smooth_factor + 1.5);
373  // if (testing_on)
374  // tprintf("Row smooth factor=%d\n",smooth_factor);
375  prev_valid = false;
376  prev_x = -INT16_MAX;
377  const bool testing_row = false;
378  //min blob size
379  min_width = (int32_t) block->pr_space;
380  total_count = 0;
381  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
382  blob = blob_it.data ();
383  if (!blob->joined_to_prev ()) {
384  blob_box = blob->bounding_box ();
385  this_valid = blob_box.width () >= min_width;
386  if (this_valid && prev_valid
387  && blob_box.left () - prev_x < maxwidth) {
388  gap_stats.add (blob_box.left () - prev_x, 1);
389  }
390  total_count++; //count possibles
391  prev_x = blob_box.right ();
392  prev_valid = this_valid;
393  }
394  }
395  valid_count = gap_stats.get_total ();
396  if (valid_count < total_count * textord_words_minlarge) {
397  gap_stats.clear ();
398  prev_x = -INT16_MAX;
399  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
400  blob_it.forward ()) {
401  blob = blob_it.data ();
402  if (!blob->joined_to_prev ()) {
403  blob_box = blob->bounding_box ();
404  if (blob_box.left () - prev_x < maxwidth) {
405  gap_stats.add (blob_box.left () - prev_x, 1);
406  }
407  prev_x = blob_box.right ();
408  }
409  }
410  }
411  if (gap_stats.get_total () == 0) {
412  row->min_space = 0; //no evidence
413  row->max_nonspace = 0;
414  return 0;
415  }
416 
417  cluster_count = 0;
418  lower = block->xheight * words_initial_lower;
419  upper = block->xheight * words_initial_upper;
420  gap_stats.smooth (smooth_factor);
421  do {
422  prev_count = cluster_count;
423  cluster_count = gap_stats.cluster (lower, upper,
425  BLOCK_STATS_CLUSTERS, cluster_stats);
426  }
427  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
428  if (cluster_count < 1) {
429  row->min_space = 0;
430  row->max_nonspace = 0;
431  return 0;
432  }
433  for (gap_index = 0; gap_index < cluster_count; gap_index++)
434  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
435  //get medians
436  if (testing_on) {
437  tprintf ("cluster_count=%d:", cluster_count);
438  for (gap_index = 0; gap_index < cluster_count; gap_index++)
439  tprintf (" %g(%d)", gaps[gap_index],
440  cluster_stats[gap_index + 1].get_total ());
441  tprintf ("\n");
442  }
443 
444  //Try to find proportional non-space and space for row.
445  for (gap_index = 0; gap_index < cluster_count
446  && gaps[gap_index] > block->max_nonspace; gap_index++);
447  if (gap_index < cluster_count)
448  lower = gaps[gap_index]; //most frequent below
449  else {
450  if (testing_on)
451  tprintf ("No cluster below block threshold!, using default=%g\n",
452  block->pr_nonsp);
453  lower = block->pr_nonsp;
454  }
455  for (gap_index = 0; gap_index < cluster_count
456  && gaps[gap_index] <= block->max_nonspace; gap_index++);
457  if (gap_index < cluster_count)
458  upper = gaps[gap_index]; //most frequent above
459  else {
460  if (testing_on)
461  tprintf ("No cluster above block threshold!, using default=%g\n",
462  block->pr_space);
463  upper = block->pr_space;
464  }
465  row->min_space =
466  (int32_t) ceil (upper - (upper - lower) * textord_words_definite_spread);
467  row->max_nonspace =
468  (int32_t) floor (lower + (upper - lower) * textord_words_definite_spread);
469  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
470  row->space_size = upper;
471  row->kern_size = lower;
472  if (testing_on) {
473  if (testing_row) {
474  tprintf ("GAP STATS\n");
475  gap_stats.print();
476  tprintf ("SPACE stats\n");
477  cluster_stats[2].print_summary();
478  tprintf ("NONSPACE stats\n");
479  cluster_stats[1].print_summary();
480  }
481  tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
482  row->intercept (), row->min_space, upper,
483  row->max_nonspace, lower);
484  }
485  return 1;
486 }
487 
488 
496  tesseract::Textord *textord,
497  TO_BLOCK *block, //block to do
498  FCOORD rotation //for drawing
499  ) {
500  TO_ROW *row; //current row
501  TO_ROW_IT row_it = block->get_rows ();
502  ROW *real_row = nullptr; //output row
503  ROW_IT real_row_it = block->block->row_list ();
504 
505  if (row_it.empty ())
506  return; //empty block
507  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
508  row = row_it.data ();
509  if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
510  real_row = make_rep_words (row, block);
511  } else if (!row->blob_list()->empty()) {
512  // In a fixed pitch document, some lines may be detected as fixed pitch
513  // while others don't, and will go through different path.
514  // For non-space delimited language like CJK, fixed pitch chop always
515  // leave the entire line as one word. We can force consistent chopping
516  // with force_make_prop_words flag.
517  POLY_BLOCK* pb = block->block->pdblk.poly_block();
518  if (textord_chopper_test) {
519  real_row = textord->make_blob_words (row, rotation);
520  } else if (textord_force_make_prop_words ||
521  (pb != nullptr && !pb->IsText()) ||
522  row->pitch_decision == PITCH_DEF_PROP ||
524  real_row = textord->make_prop_words (row, rotation);
525  } else if (row->pitch_decision == PITCH_DEF_FIXED ||
527  real_row = fixed_pitch_words (row, rotation);
528  } else {
530  }
531  }
532  if (real_row != nullptr) {
533  //put row in block
534  real_row_it.add_after_then_move (real_row);
535  }
536  }
537  block->block->set_stats (block->fixed_pitch == 0, (int16_t) block->kern_size,
538  (int16_t) block->space_size,
539  (int16_t) block->fixed_pitch);
540  block->block->check_pitch ();
541 }
542 
543 
551 ROW *make_rep_words( //make a row
552  TO_ROW *row, //row to convert
553  TO_BLOCK *block //block it lives in
554  ) {
555  ROW *real_row; //output row
556  TBOX word_box; //bounding box
557  //iterator
558  WERD_IT word_it = &row->rep_words;
559 
560  if (word_it.empty ())
561  return nullptr;
562  word_box = word_it.data ()->bounding_box ();
563  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
564  word_box += word_it.data ()->bounding_box ();
565  row->xheight = block->xheight;
566  real_row = new ROW(row,
567  (int16_t) block->kern_size, (int16_t) block->space_size);
568  word_it.set_to_list (real_row->word_list ());
569  //put words in row
570  word_it.add_list_after (&row->rep_words);
571  real_row->recalc_bounding_box ();
572  return real_row;
573 }
574 
575 
583 WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator
584  int32_t blobcount, //no of blobs to use
585  bool bol, //start of line
586  uint8_t blanks //no of blanks
587  ) {
588  C_OUTLINE_IT cout_it;
589  C_BLOB_LIST cblobs;
590  C_BLOB_IT cblob_it = &cblobs;
591  WERD *word; // new word
592  BLOBNBOX *bblob; // current blob
593  int32_t blobindex; // in row
594 
595  for (blobindex = 0; blobindex < blobcount; blobindex++) {
596  bblob = box_it->extract();
597  if (bblob->joined_to_prev()) {
598  if (bblob->cblob() != nullptr) {
599  cout_it.set_to_list(cblob_it.data()->out_list());
600  cout_it.move_to_last();
601  cout_it.add_list_after(bblob->cblob()->out_list());
602  delete bblob->cblob();
603  }
604  }
605  else {
606  if (bblob->cblob() != nullptr)
607  cblob_it.add_after_then_move(bblob->cblob());
608  }
609  delete bblob;
610  box_it->forward(); // next one
611  }
612 
613  if (blanks < 1)
614  blanks = 1;
615 
616  word = new WERD(&cblobs, blanks, nullptr);
617 
618  if (bol)
619  word->set_flag(W_BOL, true);
620  if (box_it->at_first())
621  word->set_flag(W_EOL, true); // at end of line
622 
623  return word;
624 }
float intercept() const
Definition: blobbox.h:601
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
Definition: wordseg.cpp:583
bool use_cjk_fp_model() const
Definition: textord.h:92
void clear()
Definition: statistc.cpp:82
EXTERN double textord_words_min_minspace
Definition: tovars.cpp:49
#define TRUE
Definition: capi.h:51
EXTERN double textord_words_initial_upper
Definition: tovars.cpp:55
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
Definition: wordseg.cpp:551
#define BOOL_VAR(name, val, comment)
Definition: params.h:279
float fixed_pitch
Definition: blobbox.h:802
int32_t row_words2(TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
Definition: wordseg.cpp:341
float fixed_pitch
Definition: blobbox.h:664
WERD_LIST rep_words
Definition: blobbox.h:681
void set_row_spaces(TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: wordseg.cpp:130
float kern_size
Definition: blobbox.h:803
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:118
Definition: rect.h:34
WERD_LIST * word_list()
Definition: ocrrow.h:55
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:249
Definition: werd.h:35
EXTERN bool textord_fp_chopping
Definition: wordseg.cpp:39
bool textord_test_landscape
Definition: makerow.cpp:49
#define BLOCK_STATS_CLUSTERS
Definition: wordseg.cpp:46
int32_t row_words(TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
Definition: wordseg.cpp:174
float space_size
Definition: blobbox.h:680
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:127
Definition: statistc.h:33
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
float pr_nonsp
Definition: blobbox.h:668
int32_t max_nonspace
Definition: blobbox.h:806
EXTERN double textord_spacesize_ratioprop
Definition: tovars.cpp:80
#define EXTERN
Definition: wordseg.cpp:37
float space_size
Definition: blobbox.h:804
EXTERN double textord_words_initial_lower
Definition: tovars.cpp:53
int16_t width() const
Definition: rect.h:115
float xheight
Definition: blobbox.h:670
int16_t left() const
Definition: rect.h:72
float pr_space
Definition: blobbox.h:667
void set_stats(BOOL8 prop, int16_t kern, int16_t space, int16_t ch_pitch)
Definition: ocrblock.h:60
EXTERN double words_initial_upper
Definition: tovars.cpp:71
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:100
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:56
integer coordinate
Definition: points.h:32
void print_summary() const
Definition: statistc.cpp:559
#define FALSE
Definition: capi.h:52
double ile(double frac) const
Definition: statistc.cpp:173
float kern_size
Definition: blobbox.h:679
bool joined_to_prev() const
Definition: blobbox.h:257
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
Definition: wordseg.cpp:495
void smooth(int32_t factor)
Definition: statistc.cpp:288
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
Definition: topitch.cpp:84
EXTERN double textord_words_definite_spread
Definition: tovars.cpp:76
float xheight
Definition: blobbox.h:801
int32_t min_space
Definition: blobbox.h:805
POLY_BLOCK * poly_block() const
Definition: pdblock.h:56
Definition: werd.h:59
unsigned char BOOL8
Definition: host.h:34
Definition: ocrrow.h:36
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: werd.h:34
int32_t space_threshold
Definition: blobbox.h:678
bool IsText() const
Definition: polyblk.h:49
int textord_test_y
Definition: makerow.cpp:62
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:1185
float pr_nonsp
Definition: blobbox.h:810
EXTERN double textord_wordstats_smooth_factor
Definition: tovars.cpp:39
void add(int32_t value, int32_t count)
Definition: statistc.cpp:100
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
BLOCK * block
Definition: blobbox.h:790
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
void recalc_bounding_box()
Definition: ocrrow.cpp:101
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1060
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
Definition: statistc.cpp:319
int textord_test_x
Definition: makerow.cpp:61
float pr_space
Definition: blobbox.h:809
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:892
bool contains(const FCOORD pt) const
Definition: rect.h:333
EXTERN bool textord_force_make_prop_words
Definition: wordseg.cpp:41
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
Definition: points.h:189
const TBOX & bounding_box() const
Definition: blobbox.h:231
int32_t min_space
Definition: blobbox.h:676
void print() const
Definition: statistc.cpp:533
int16_t right() const
Definition: rect.h:79
void check_pitch()
check proportional
Definition: ocrblock.cpp:168
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
Definition: fpchop.cpp:47
int32_t max_nonspace
Definition: blobbox.h:677
EXTERN double words_initial_lower
Definition: tovars.cpp:70
EXTERN bool textord_chopper_test
Definition: wordseg.cpp:43
EXTERN double textord_words_minlarge
Definition: tovars.cpp:57
PDBLK pdblk
Definition: ocrblock.h:192
C_BLOB * cblob() const
Definition: blobbox.h:269
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
int32_t get_total() const
Definition: statistc.h:86
#define ASSERT_HOST(x)
Definition: errcode.h:84
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
Definition: tospace.cpp:44
PITCH_TYPE pitch_decision
Definition: blobbox.h:663