tesseract  4.0.0-1-g2a2b
wordseg.h File Reference
#include "params.h"
#include "blobbox.h"
#include "textord.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Functions

void make_single_word (bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
 
void make_words (tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
 
void set_row_spaces (TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
int32_t row_words (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
 
int32_t row_words2 (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
 
void make_real_words (tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
 
ROWmake_rep_words (TO_ROW *row, TO_BLOCK *block)
 
WERDmake_real_word (BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
 

Variables

bool textord_fp_chopping = TRUE
 
bool textord_force_make_prop_words = FALSE
 
bool textord_chopper_test = FALSE
 

Function Documentation

◆ make_real_word()

WERD* make_real_word ( BLOBNBOX_IT *  box_it,
int32_t  blobcount,
bool  bol,
uint8_t  blanks 
)

Definition at line 583 of file wordseg.cpp.

587  {
588  C_OUTLINE_IT cout_it;
589  C_BLOB_LIST cblobs;
590  C_BLOB_IT cblob_it = &cblobs;
591  WERD *word; // new word
592  BLOBNBOX *bblob; // current blob
593  int32_t blobindex; // in row
594 
595  for (blobindex = 0; blobindex < blobcount; blobindex++) {
596  bblob = box_it->extract();
597  if (bblob->joined_to_prev()) {
598  if (bblob->cblob() != nullptr) {
599  cout_it.set_to_list(cblob_it.data()->out_list());
600  cout_it.move_to_last();
601  cout_it.add_list_after(bblob->cblob()->out_list());
602  delete bblob->cblob();
603  }
604  }
605  else {
606  if (bblob->cblob() != nullptr)
607  cblob_it.add_after_then_move(bblob->cblob());
608  }
609  delete bblob;
610  box_it->forward(); // next one
611  }
612 
613  if (blanks < 1)
614  blanks = 1;
615 
616  word = new WERD(&cblobs, blanks, nullptr);
617 
618  if (bol)
619  word->set_flag(W_BOL, true);
620  if (box_it->at_first())
621  word->set_flag(W_EOL, true); // at end of line
622 
623  return word;
624 }
Definition: werd.h:35
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:127
bool joined_to_prev() const
Definition: blobbox.h:257
Definition: werd.h:59
Definition: werd.h:34
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
C_BLOB * cblob() const
Definition: blobbox.h:269

◆ make_real_words()

void make_real_words ( tesseract::Textord textord,
TO_BLOCK block,
FCOORD  rotation 
)

Definition at line 495 of file wordseg.cpp.

499  {
500  TO_ROW *row; //current row
501  TO_ROW_IT row_it = block->get_rows ();
502  ROW *real_row = nullptr; //output row
503  ROW_IT real_row_it = block->block->row_list ();
504 
505  if (row_it.empty ())
506  return; //empty block
507  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
508  row = row_it.data ();
509  if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
510  real_row = make_rep_words (row, block);
511  } else if (!row->blob_list()->empty()) {
512  // In a fixed pitch document, some lines may be detected as fixed pitch
513  // while others don't, and will go through different path.
514  // For non-space delimited language like CJK, fixed pitch chop always
515  // leave the entire line as one word. We can force consistent chopping
516  // with force_make_prop_words flag.
517  POLY_BLOCK* pb = block->block->pdblk.poly_block();
518  if (textord_chopper_test) {
519  real_row = textord->make_blob_words (row, rotation);
520  } else if (textord_force_make_prop_words ||
521  (pb != nullptr && !pb->IsText()) ||
522  row->pitch_decision == PITCH_DEF_PROP ||
524  real_row = textord->make_prop_words (row, rotation);
525  } else if (row->pitch_decision == PITCH_DEF_FIXED ||
527  real_row = fixed_pitch_words (row, rotation);
528  } else {
530  }
531  }
532  if (real_row != nullptr) {
533  //put row in block
534  real_row_it.add_after_then_move (real_row);
535  }
536  }
537  block->block->set_stats (block->fixed_pitch == 0, (int16_t) block->kern_size,
538  (int16_t) block->space_size,
539  (int16_t) block->fixed_pitch);
540  block->block->check_pitch ();
541 }
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
Definition: wordseg.cpp:551
float fixed_pitch
Definition: blobbox.h:802
WERD_LIST rep_words
Definition: blobbox.h:681
float kern_size
Definition: blobbox.h:803
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:118
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
float space_size
Definition: blobbox.h:804
void set_stats(BOOL8 prop, int16_t kern, int16_t space, int16_t ch_pitch)
Definition: ocrblock.h:60
#define FALSE
Definition: capi.h:52
POLY_BLOCK * poly_block() const
Definition: pdblock.h:56
Definition: ocrrow.h:36
bool IsText() const
Definition: polyblk.h:49
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:1185
BLOCK * block
Definition: blobbox.h:790
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:892
EXTERN bool textord_force_make_prop_words
Definition: wordseg.cpp:41
void check_pitch()
check proportional
Definition: ocrblock.cpp:168
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
Definition: fpchop.cpp:47
EXTERN bool textord_chopper_test
Definition: wordseg.cpp:43
PDBLK pdblk
Definition: ocrblock.h:192
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
#define ASSERT_HOST(x)
Definition: errcode.h:84
PITCH_TYPE pitch_decision
Definition: blobbox.h:663

◆ make_rep_words()

ROW* make_rep_words ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 551 of file wordseg.cpp.

554  {
555  ROW *real_row; //output row
556  TBOX word_box; //bounding box
557  //iterator
558  WERD_IT word_it = &row->rep_words;
559 
560  if (word_it.empty ())
561  return nullptr;
562  word_box = word_it.data ()->bounding_box ();
563  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
564  word_box += word_it.data ()->bounding_box ();
565  row->xheight = block->xheight;
566  real_row = new ROW(row,
567  (int16_t) block->kern_size, (int16_t) block->space_size);
568  word_it.set_to_list (real_row->word_list ());
569  //put words in row
570  word_it.add_list_after (&row->rep_words);
571  real_row->recalc_bounding_box ();
572  return real_row;
573 }
WERD_LIST rep_words
Definition: blobbox.h:681
float kern_size
Definition: blobbox.h:803
Definition: rect.h:34
WERD_LIST * word_list()
Definition: ocrrow.h:55
float space_size
Definition: blobbox.h:804
float xheight
Definition: blobbox.h:670
float xheight
Definition: blobbox.h:801
Definition: ocrrow.h:36
void recalc_bounding_box()
Definition: ocrrow.cpp:101

◆ make_single_word()

void make_single_word ( bool  one_blob,
TO_ROW_LIST *  rows,
ROW_LIST *  real_rows 
)

Definition at line 56 of file wordseg.cpp.

56  {
57  TO_ROW_IT to_row_it(rows);
58  ROW_IT row_it(real_rows);
59  for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
60  to_row_it.forward()) {
61  TO_ROW* row = to_row_it.data();
62  // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
63  // to create the word.
64  C_BLOB_LIST cblobs;
65  C_BLOB_IT cblob_it(&cblobs);
66  BLOBNBOX_IT box_it(row->blob_list());
67  for (;!box_it.empty(); box_it.forward()) {
68  BLOBNBOX* bblob= box_it.extract();
69  if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
70  if (bblob->cblob() != nullptr) {
71  C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
72  cout_it.move_to_last();
73  cout_it.add_list_after(bblob->cblob()->out_list());
74  delete bblob->cblob();
75  }
76  } else {
77  if (bblob->cblob() != nullptr)
78  cblob_it.add_after_then_move(bblob->cblob());
79  }
80  delete bblob;
81  }
82  // Convert the TO_ROW to a ROW.
83  ROW* real_row = new ROW(row, static_cast<int16_t>(row->kern_size),
84  static_cast<int16_t>(row->space_size));
85  WERD_IT word_it(real_row->word_list());
86  WERD* word = new WERD(&cblobs, 0, nullptr);
87  word->set_flag(W_BOL, TRUE);
88  word->set_flag(W_EOL, TRUE);
89  word->set_flag(W_DONT_CHOP, one_blob);
90  word_it.add_after_then_move(word);
91  row_it.add_after_then_move(real_row);
92  }
93 }
#define TRUE
Definition: capi.h:51
WERD_LIST * word_list()
Definition: ocrrow.h:55
Definition: werd.h:35
float space_size
Definition: blobbox.h:680
float kern_size
Definition: blobbox.h:679
bool joined_to_prev() const
Definition: blobbox.h:257
Definition: werd.h:59
Definition: ocrrow.h:36
Definition: werd.h:34
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
C_BLOB * cblob() const
Definition: blobbox.h:269
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ make_words()

void make_words ( tesseract::Textord textord,
ICOORD  page_tr,
float  gradient,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

make_words

Arrange the blobs into words.

Definition at line 100 of file wordseg.cpp.

104  { // output list
105  TO_BLOCK_IT block_it; // iterator
106  TO_BLOCK *block; // current block
107 
108  if (textord->use_cjk_fp_model()) {
109  compute_fixed_pitch_cjk(page_tr, port_blocks);
110  } else {
111  compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
113  }
114  textord->to_spacing(page_tr, port_blocks);
115  block_it.set_to_list(port_blocks);
116  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
117  block = block_it.data();
118  make_real_words(textord, block, FCOORD(1.0f, 0.0f));
119  }
120 }
bool use_cjk_fp_model() const
Definition: textord.h:92
bool textord_test_landscape
Definition: makerow.cpp:49
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
Definition: wordseg.cpp:495
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
Definition: topitch.cpp:84
unsigned char BOOL8
Definition: host.h:34
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1060
Definition: points.h:189
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
Definition: tospace.cpp:44

◆ row_words()

int32_t row_words ( TO_BLOCK block,
TO_ROW row,
int32_t  maxwidth,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 174 of file wordseg.cpp.

180  {
181  bool testing_row; //contains testpt
182  bool prev_valid; //if decent size
183  int32_t prev_x; //end of prev blob
184  int32_t cluster_count; //no of clusters
185  int32_t gap_index; //which cluster
186  int32_t smooth_factor; //for smoothing stats
187  BLOBNBOX *blob; //current blob
188  float lower, upper; //clustering parameters
189  float gaps[3]; //gap clusers
190  ICOORD testpt;
191  TBOX blob_box; //bounding box
192  //iterator
193  BLOBNBOX_IT blob_it = row->blob_list ();
194  STATS gap_stats (0, maxwidth);
195  STATS cluster_stats[4]; //clusters
196 
198  smooth_factor =
199  (int32_t) (block->xheight * textord_wordstats_smooth_factor + 1.5);
200  // if (testing_on)
201  // tprintf("Row smooth factor=%d\n",smooth_factor);
202  prev_valid = false;
203  prev_x = -INT32_MAX;
204  testing_row = false;
205  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
206  blob = blob_it.data ();
207  blob_box = blob->bounding_box ();
208  if (blob_box.contains (testpt))
209  testing_row = true;
210  gap_stats.add (blob_box.width (), 1);
211  }
212  gap_stats.clear ();
213  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
214  blob = blob_it.data ();
215  if (!blob->joined_to_prev ()) {
216  blob_box = blob->bounding_box ();
217  if (prev_valid && blob_box.left () - prev_x < maxwidth) {
218  gap_stats.add (blob_box.left () - prev_x, 1);
219  }
220  prev_valid = TRUE;
221  prev_x = blob_box.right ();
222  }
223  }
224  if (gap_stats.get_total () == 0) {
225  row->min_space = 0; //no evidence
226  row->max_nonspace = 0;
227  return 0;
228  }
229  gap_stats.smooth (smooth_factor);
230  lower = row->xheight * textord_words_initial_lower;
231  upper = row->xheight * textord_words_initial_upper;
232  cluster_count = gap_stats.cluster (lower, upper,
234  cluster_stats);
235  while (cluster_count < 2 && ceil (lower) < floor (upper)) {
236  //shrink gap
237  upper = (upper * 3 + lower) / 4;
238  lower = (lower * 3 + upper) / 4;
239  cluster_count = gap_stats.cluster (lower, upper,
241  cluster_stats);
242  }
243  if (cluster_count < 2) {
244  row->min_space = 0; //no evidence
245  row->max_nonspace = 0;
246  return 0;
247  }
248  for (gap_index = 0; gap_index < cluster_count; gap_index++)
249  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
250  //get medians
251  if (cluster_count > 2) {
252  if (testing_on && textord_show_initial_words) {
253  tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
254  row->intercept (),
255  cluster_stats[1].ile (0.5),
256  cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
257  }
258  lower = gaps[0];
259  if (gaps[1] > lower) {
260  upper = gaps[1]; //prefer most frequent
261  if (upper < block->xheight * textord_words_min_minspace
262  && gaps[2] > gaps[1]) {
263  upper = gaps[2];
264  }
265  }
266  else if (gaps[2] > lower
267  && gaps[2] >= block->xheight * textord_words_min_minspace)
268  upper = gaps[2];
269  else if (lower >= block->xheight * textord_words_min_minspace) {
270  upper = lower; //not nice
271  lower = gaps[1];
272  if (testing_on && textord_show_initial_words) {
273  tprintf ("Had to switch most common from lower to upper!!\n");
274  gap_stats.print();
275  }
276  }
277  else {
278  row->min_space = 0; //no evidence
279  row->max_nonspace = 0;
280  return 0;
281  }
282  }
283  else {
284  if (gaps[1] < gaps[0]) {
285  if (testing_on && textord_show_initial_words) {
286  tprintf ("Had to switch most common from lower to upper!!\n");
287  gap_stats.print();
288  }
289  lower = gaps[1];
290  upper = gaps[0];
291  }
292  else {
293  upper = gaps[1];
294  lower = gaps[0];
295  }
296  }
297  if (upper < block->xheight * textord_words_min_minspace) {
298  row->min_space = 0; //no evidence
299  row->max_nonspace = 0;
300  return 0;
301  }
302  if (upper * 3 < block->min_space * 2 + block->max_nonspace
303  || lower * 3 > block->min_space * 2 + block->max_nonspace) {
304  if (testing_on && textord_show_initial_words) {
305  tprintf ("Disagreement between block and row at %g!!\n",
306  row->intercept ());
307  tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
308  gap_stats.print();
309  }
310  }
311  row->min_space =
312  (int32_t) ceil (upper - (upper - lower) * textord_words_definite_spread);
313  row->max_nonspace =
314  (int32_t) floor (lower + (upper - lower) * textord_words_definite_spread);
315  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
316  row->space_size = upper;
317  row->kern_size = lower;
318  if (testing_on && textord_show_initial_words) {
319  if (testing_row) {
320  tprintf ("GAP STATS\n");
321  gap_stats.print();
322  tprintf ("SPACE stats\n");
323  cluster_stats[2].print_summary();
324  tprintf ("NONSPACE stats\n");
325  cluster_stats[1].print_summary();
326  }
327  tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
328  row->intercept (), row->min_space, upper,
329  row->max_nonspace, lower);
330  }
331  return cluster_stats[2].get_total ();
332 }
float intercept() const
Definition: blobbox.h:601
EXTERN double textord_words_min_minspace
Definition: tovars.cpp:49
#define TRUE
Definition: capi.h:51
EXTERN double textord_words_initial_upper
Definition: tovars.cpp:55
Definition: rect.h:34
float space_size
Definition: blobbox.h:680
Definition: statistc.h:33
int32_t max_nonspace
Definition: blobbox.h:806
EXTERN double textord_spacesize_ratioprop
Definition: tovars.cpp:80
EXTERN double textord_words_initial_lower
Definition: tovars.cpp:53
int16_t width() const
Definition: rect.h:115
float xheight
Definition: blobbox.h:670
int16_t left() const
Definition: rect.h:72
integer coordinate
Definition: points.h:32
void print_summary() const
Definition: statistc.cpp:559
double ile(double frac) const
Definition: statistc.cpp:173
float kern_size
Definition: blobbox.h:679
bool joined_to_prev() const
Definition: blobbox.h:257
EXTERN double textord_words_definite_spread
Definition: tovars.cpp:76
float xheight
Definition: blobbox.h:801
int32_t min_space
Definition: blobbox.h:805
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int32_t space_threshold
Definition: blobbox.h:678
int textord_test_y
Definition: makerow.cpp:62
EXTERN double textord_wordstats_smooth_factor
Definition: tovars.cpp:39
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
int textord_test_x
Definition: makerow.cpp:61
bool contains(const FCOORD pt) const
Definition: rect.h:333
const TBOX & bounding_box() const
Definition: blobbox.h:231
int32_t min_space
Definition: blobbox.h:676
int16_t right() const
Definition: rect.h:79
int32_t max_nonspace
Definition: blobbox.h:677
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612
int32_t get_total() const
Definition: statistc.h:86

◆ row_words2()

int32_t row_words2 ( TO_BLOCK block,
TO_ROW row,
int32_t  maxwidth,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 341 of file wordseg.cpp.

347  {
348  bool prev_valid; //if decent size
349  bool this_valid; //current blob big enough
350  int32_t prev_x; //end of prev blob
351  int32_t min_width; //min interesting width
352  int32_t valid_count; //good gaps
353  int32_t total_count; //total gaps
354  int32_t cluster_count; //no of clusters
355  int32_t prev_count; //previous cluster_count
356  int32_t gap_index; //which cluster
357  int32_t smooth_factor; //for smoothing stats
358  BLOBNBOX *blob; //current blob
359  float lower, upper; //clustering parameters
360  ICOORD testpt;
361  TBOX blob_box; //bounding box
362  //iterator
363  BLOBNBOX_IT blob_it = row->blob_list ();
364  STATS gap_stats (0, maxwidth);
365  //gap sizes
366  float gaps[BLOCK_STATS_CLUSTERS];
367  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
368  //clusters
369 
371  smooth_factor =
372  (int32_t) (block->xheight * textord_wordstats_smooth_factor + 1.5);
373  // if (testing_on)
374  // tprintf("Row smooth factor=%d\n",smooth_factor);
375  prev_valid = false;
376  prev_x = -INT16_MAX;
377  const bool testing_row = false;
378  //min blob size
379  min_width = (int32_t) block->pr_space;
380  total_count = 0;
381  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
382  blob = blob_it.data ();
383  if (!blob->joined_to_prev ()) {
384  blob_box = blob->bounding_box ();
385  this_valid = blob_box.width () >= min_width;
386  if (this_valid && prev_valid
387  && blob_box.left () - prev_x < maxwidth) {
388  gap_stats.add (blob_box.left () - prev_x, 1);
389  }
390  total_count++; //count possibles
391  prev_x = blob_box.right ();
392  prev_valid = this_valid;
393  }
394  }
395  valid_count = gap_stats.get_total ();
396  if (valid_count < total_count * textord_words_minlarge) {
397  gap_stats.clear ();
398  prev_x = -INT16_MAX;
399  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
400  blob_it.forward ()) {
401  blob = blob_it.data ();
402  if (!blob->joined_to_prev ()) {
403  blob_box = blob->bounding_box ();
404  if (blob_box.left () - prev_x < maxwidth) {
405  gap_stats.add (blob_box.left () - prev_x, 1);
406  }
407  prev_x = blob_box.right ();
408  }
409  }
410  }
411  if (gap_stats.get_total () == 0) {
412  row->min_space = 0; //no evidence
413  row->max_nonspace = 0;
414  return 0;
415  }
416 
417  cluster_count = 0;
418  lower = block->xheight * words_initial_lower;
419  upper = block->xheight * words_initial_upper;
420  gap_stats.smooth (smooth_factor);
421  do {
422  prev_count = cluster_count;
423  cluster_count = gap_stats.cluster (lower, upper,
425  BLOCK_STATS_CLUSTERS, cluster_stats);
426  }
427  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
428  if (cluster_count < 1) {
429  row->min_space = 0;
430  row->max_nonspace = 0;
431  return 0;
432  }
433  for (gap_index = 0; gap_index < cluster_count; gap_index++)
434  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
435  //get medians
436  if (testing_on) {
437  tprintf ("cluster_count=%d:", cluster_count);
438  for (gap_index = 0; gap_index < cluster_count; gap_index++)
439  tprintf (" %g(%d)", gaps[gap_index],
440  cluster_stats[gap_index + 1].get_total ());
441  tprintf ("\n");
442  }
443 
444  //Try to find proportional non-space and space for row.
445  for (gap_index = 0; gap_index < cluster_count
446  && gaps[gap_index] > block->max_nonspace; gap_index++);
447  if (gap_index < cluster_count)
448  lower = gaps[gap_index]; //most frequent below
449  else {
450  if (testing_on)
451  tprintf ("No cluster below block threshold!, using default=%g\n",
452  block->pr_nonsp);
453  lower = block->pr_nonsp;
454  }
455  for (gap_index = 0; gap_index < cluster_count
456  && gaps[gap_index] <= block->max_nonspace; gap_index++);
457  if (gap_index < cluster_count)
458  upper = gaps[gap_index]; //most frequent above
459  else {
460  if (testing_on)
461  tprintf ("No cluster above block threshold!, using default=%g\n",
462  block->pr_space);
463  upper = block->pr_space;
464  }
465  row->min_space =
466  (int32_t) ceil (upper - (upper - lower) * textord_words_definite_spread);
467  row->max_nonspace =
468  (int32_t) floor (lower + (upper - lower) * textord_words_definite_spread);
469  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
470  row->space_size = upper;
471  row->kern_size = lower;
472  if (testing_on) {
473  if (testing_row) {
474  tprintf ("GAP STATS\n");
475  gap_stats.print();
476  tprintf ("SPACE stats\n");
477  cluster_stats[2].print_summary();
478  tprintf ("NONSPACE stats\n");
479  cluster_stats[1].print_summary();
480  }
481  tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
482  row->intercept (), row->min_space, upper,
483  row->max_nonspace, lower);
484  }
485  return 1;
486 }
float intercept() const
Definition: blobbox.h:601
Definition: rect.h:34
#define BLOCK_STATS_CLUSTERS
Definition: wordseg.cpp:46
float space_size
Definition: blobbox.h:680
Definition: statistc.h:33
int32_t max_nonspace
Definition: blobbox.h:806
EXTERN double textord_spacesize_ratioprop
Definition: tovars.cpp:80
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
EXTERN double words_initial_upper
Definition: tovars.cpp:71
integer coordinate
Definition: points.h:32
void print_summary() const
Definition: statistc.cpp:559
float kern_size
Definition: blobbox.h:679
bool joined_to_prev() const
Definition: blobbox.h:257
EXTERN double textord_words_definite_spread
Definition: tovars.cpp:76
float xheight
Definition: blobbox.h:801
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int32_t space_threshold
Definition: blobbox.h:678
int textord_test_y
Definition: makerow.cpp:62
float pr_nonsp
Definition: blobbox.h:810
EXTERN double textord_wordstats_smooth_factor
Definition: tovars.cpp:39
int textord_test_x
Definition: makerow.cpp:61
float pr_space
Definition: blobbox.h:809
const TBOX & bounding_box() const
Definition: blobbox.h:231
int32_t min_space
Definition: blobbox.h:676
int16_t right() const
Definition: rect.h:79
int32_t max_nonspace
Definition: blobbox.h:677
EXTERN double words_initial_lower
Definition: tovars.cpp:70
EXTERN double textord_words_minlarge
Definition: tovars.cpp:57
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:612

◆ set_row_spaces()

void set_row_spaces ( TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 130 of file wordseg.cpp.

134  {
135  TO_ROW *row; //current row
136  TO_ROW_IT row_it = block->get_rows ();
137 
138  if (row_it.empty ())
139  return; //empty block
140  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
141  row = row_it.data ();
142  if (row->fixed_pitch == 0) {
143  row->min_space =
144  (int32_t) ceil (row->pr_space -
145  (row->pr_space -
147  row->max_nonspace =
148  (int32_t) floor (row->pr_nonsp +
149  (row->pr_space -
151  if (testing_on && textord_show_initial_words) {
152  tprintf ("Assigning defaults %d non, %d space to row at %g\n",
153  row->max_nonspace, row->min_space, row->intercept ());
154  }
155  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
156  row->space_size = row->pr_space;
157  row->kern_size = row->pr_nonsp;
158  }
159 #ifndef GRAPHICS_DISABLED
160  if (textord_show_initial_words && testing_on) {
161  plot_word_decisions (to_win, (int16_t) row->fixed_pitch, row);
162  }
163 #endif
164  }
165 }
float intercept() const
Definition: blobbox.h:601
float fixed_pitch
Definition: blobbox.h:664
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:249
float space_size
Definition: blobbox.h:680
TO_ROW_LIST * get_rows()
Definition: blobbox.h:717
float pr_nonsp
Definition: blobbox.h:668
float pr_space
Definition: blobbox.h:667
float kern_size
Definition: blobbox.h:679
EXTERN double textord_words_definite_spread
Definition: tovars.cpp:76
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int32_t space_threshold
Definition: blobbox.h:678
EXTERN ScrollView * to_win
Definition: drawtord.cpp:37
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
int32_t min_space
Definition: blobbox.h:676
int32_t max_nonspace
Definition: blobbox.h:677

Variable Documentation

◆ textord_chopper_test

bool textord_chopper_test = FALSE

"Chopper is being tested."

Definition at line 43 of file wordseg.cpp.

◆ textord_force_make_prop_words

bool textord_force_make_prop_words = FALSE

"Force proportional word segmentation on all rows"

Definition at line 41 of file wordseg.cpp.

◆ textord_fp_chopping

bool textord_fp_chopping = TRUE

"Do fixed pitch chopping"

Definition at line 39 of file wordseg.cpp.